2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Kevin Tian <kevin.tian@intel.com>
27 * Jike Song <jike.song@intel.com>
28 * Xiaoguang Chen <xiaoguang.chen@intel.com>
31 #include <linux/init.h>
32 #include <linux/device.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
45 #include <linux/nospec.h>
50 static const struct intel_gvt_ops
*intel_gvt_ops
;
52 /* helper macros copied from vfio-pci */
53 #define VFIO_PCI_OFFSET_SHIFT 40
54 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
55 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
56 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
67 struct hlist_node hnode
;
70 struct kvmgt_guest_info
{
72 struct intel_vgpu
*vgpu
;
73 struct kvm_page_track_notifier_node track_node
;
74 #define NR_BKT (1 << 18)
75 struct hlist_head ptable
[NR_BKT
];
85 static inline bool handle_valid(unsigned long handle
)
87 return !!(handle
& ~0xff);
90 static int kvmgt_guest_init(struct mdev_device
*mdev
);
91 static void intel_vgpu_release_work(struct work_struct
*work
);
92 static bool kvmgt_guest_exit(struct kvmgt_guest_info
*info
);
94 static int gvt_dma_map_iova(struct intel_vgpu
*vgpu
, kvm_pfn_t pfn
,
98 struct device
*dev
= &vgpu
->gvt
->dev_priv
->drm
.pdev
->dev
;
101 if (unlikely(!pfn_valid(pfn
)))
104 page
= pfn_to_page(pfn
);
105 daddr
= dma_map_page(dev
, page
, 0, PAGE_SIZE
,
106 PCI_DMA_BIDIRECTIONAL
);
107 if (dma_mapping_error(dev
, daddr
))
110 *iova
= (unsigned long)(daddr
>> PAGE_SHIFT
);
114 static void gvt_dma_unmap_iova(struct intel_vgpu
*vgpu
, unsigned long iova
)
116 struct device
*dev
= &vgpu
->gvt
->dev_priv
->drm
.pdev
->dev
;
119 daddr
= (dma_addr_t
)(iova
<< PAGE_SHIFT
);
120 dma_unmap_page(dev
, daddr
, PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
123 static struct gvt_dma
*__gvt_cache_find(struct intel_vgpu
*vgpu
, gfn_t gfn
)
125 struct rb_node
*node
= vgpu
->vdev
.cache
.rb_node
;
126 struct gvt_dma
*ret
= NULL
;
129 struct gvt_dma
*itr
= rb_entry(node
, struct gvt_dma
, node
);
132 node
= node
->rb_left
;
133 else if (gfn
> itr
->gfn
)
134 node
= node
->rb_right
;
145 static unsigned long gvt_cache_find(struct intel_vgpu
*vgpu
, gfn_t gfn
)
147 struct gvt_dma
*entry
;
150 mutex_lock(&vgpu
->vdev
.cache_lock
);
152 entry
= __gvt_cache_find(vgpu
, gfn
);
153 iova
= (entry
== NULL
) ? INTEL_GVT_INVALID_ADDR
: entry
->iova
;
155 mutex_unlock(&vgpu
->vdev
.cache_lock
);
159 static void gvt_cache_add(struct intel_vgpu
*vgpu
, gfn_t gfn
,
162 struct gvt_dma
*new, *itr
;
163 struct rb_node
**link
= &vgpu
->vdev
.cache
.rb_node
, *parent
= NULL
;
165 new = kzalloc(sizeof(struct gvt_dma
), GFP_KERNEL
);
172 mutex_lock(&vgpu
->vdev
.cache_lock
);
175 itr
= rb_entry(parent
, struct gvt_dma
, node
);
179 else if (gfn
< itr
->gfn
)
180 link
= &parent
->rb_left
;
182 link
= &parent
->rb_right
;
185 rb_link_node(&new->node
, parent
, link
);
186 rb_insert_color(&new->node
, &vgpu
->vdev
.cache
);
187 mutex_unlock(&vgpu
->vdev
.cache_lock
);
191 mutex_unlock(&vgpu
->vdev
.cache_lock
);
195 static void __gvt_cache_remove_entry(struct intel_vgpu
*vgpu
,
196 struct gvt_dma
*entry
)
198 rb_erase(&entry
->node
, &vgpu
->vdev
.cache
);
202 static void gvt_cache_remove(struct intel_vgpu
*vgpu
, gfn_t gfn
)
204 struct device
*dev
= mdev_dev(vgpu
->vdev
.mdev
);
205 struct gvt_dma
*this;
209 mutex_lock(&vgpu
->vdev
.cache_lock
);
210 this = __gvt_cache_find(vgpu
, gfn
);
212 mutex_unlock(&vgpu
->vdev
.cache_lock
);
217 gvt_dma_unmap_iova(vgpu
, this->iova
);
218 rc
= vfio_unpin_pages(dev
, &g1
, 1);
220 __gvt_cache_remove_entry(vgpu
, this);
221 mutex_unlock(&vgpu
->vdev
.cache_lock
);
224 static void gvt_cache_init(struct intel_vgpu
*vgpu
)
226 vgpu
->vdev
.cache
= RB_ROOT
;
227 mutex_init(&vgpu
->vdev
.cache_lock
);
230 static void gvt_cache_destroy(struct intel_vgpu
*vgpu
)
233 struct rb_node
*node
= NULL
;
234 struct device
*dev
= mdev_dev(vgpu
->vdev
.mdev
);
238 mutex_lock(&vgpu
->vdev
.cache_lock
);
239 node
= rb_first(&vgpu
->vdev
.cache
);
241 mutex_unlock(&vgpu
->vdev
.cache_lock
);
244 dma
= rb_entry(node
, struct gvt_dma
, node
);
245 gvt_dma_unmap_iova(vgpu
, dma
->iova
);
247 __gvt_cache_remove_entry(vgpu
, dma
);
248 mutex_unlock(&vgpu
->vdev
.cache_lock
);
249 vfio_unpin_pages(dev
, &gfn
, 1);
253 static struct intel_vgpu_type
*intel_gvt_find_vgpu_type(struct intel_gvt
*gvt
,
257 struct intel_vgpu_type
*t
;
258 const char *driver_name
= dev_driver_string(
259 &gvt
->dev_priv
->drm
.pdev
->dev
);
261 for (i
= 0; i
< gvt
->num_types
; i
++) {
263 if (!strncmp(t
->name
, name
+ strlen(driver_name
) + 1,
271 static ssize_t
available_instances_show(struct kobject
*kobj
,
272 struct device
*dev
, char *buf
)
274 struct intel_vgpu_type
*type
;
275 unsigned int num
= 0;
276 void *gvt
= kdev_to_i915(dev
)->gvt
;
278 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
282 num
= type
->avail_instance
;
284 return sprintf(buf
, "%u\n", num
);
287 static ssize_t
device_api_show(struct kobject
*kobj
, struct device
*dev
,
290 return sprintf(buf
, "%s\n", VFIO_DEVICE_API_PCI_STRING
);
293 static ssize_t
description_show(struct kobject
*kobj
, struct device
*dev
,
296 struct intel_vgpu_type
*type
;
297 void *gvt
= kdev_to_i915(dev
)->gvt
;
299 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
303 return sprintf(buf
, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
304 "fence: %d\nresolution: %s\n"
306 BYTES_TO_MB(type
->low_gm_size
),
307 BYTES_TO_MB(type
->high_gm_size
),
308 type
->fence
, vgpu_edid_str(type
->resolution
),
312 static MDEV_TYPE_ATTR_RO(available_instances
);
313 static MDEV_TYPE_ATTR_RO(device_api
);
314 static MDEV_TYPE_ATTR_RO(description
);
316 static struct attribute
*type_attrs
[] = {
317 &mdev_type_attr_available_instances
.attr
,
318 &mdev_type_attr_device_api
.attr
,
319 &mdev_type_attr_description
.attr
,
323 static struct attribute_group
*intel_vgpu_type_groups
[] = {
324 [0 ... NR_MAX_INTEL_VGPU_TYPES
- 1] = NULL
,
327 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt
*gvt
)
330 struct intel_vgpu_type
*type
;
331 struct attribute_group
*group
;
333 for (i
= 0; i
< gvt
->num_types
; i
++) {
334 type
= &gvt
->types
[i
];
336 group
= kzalloc(sizeof(struct attribute_group
), GFP_KERNEL
);
340 group
->name
= type
->name
;
341 group
->attrs
= type_attrs
;
342 intel_vgpu_type_groups
[i
] = group
;
348 for (j
= 0; j
< i
; j
++) {
349 group
= intel_vgpu_type_groups
[j
];
356 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt
*gvt
)
359 struct attribute_group
*group
;
361 for (i
= 0; i
< gvt
->num_types
; i
++) {
362 group
= intel_vgpu_type_groups
[i
];
367 static void kvmgt_protect_table_init(struct kvmgt_guest_info
*info
)
369 hash_init(info
->ptable
);
372 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info
*info
)
374 struct kvmgt_pgfn
*p
;
375 struct hlist_node
*tmp
;
378 hash_for_each_safe(info
->ptable
, i
, tmp
, p
, hnode
) {
384 static struct kvmgt_pgfn
*
385 __kvmgt_protect_table_find(struct kvmgt_guest_info
*info
, gfn_t gfn
)
387 struct kvmgt_pgfn
*p
, *res
= NULL
;
389 hash_for_each_possible(info
->ptable
, p
, hnode
, gfn
) {
399 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info
*info
,
402 struct kvmgt_pgfn
*p
;
404 p
= __kvmgt_protect_table_find(info
, gfn
);
408 static void kvmgt_protect_table_add(struct kvmgt_guest_info
*info
, gfn_t gfn
)
410 struct kvmgt_pgfn
*p
;
412 if (kvmgt_gfn_is_write_protected(info
, gfn
))
415 p
= kzalloc(sizeof(struct kvmgt_pgfn
), GFP_ATOMIC
);
416 if (WARN(!p
, "gfn: 0x%llx\n", gfn
))
420 hash_add(info
->ptable
, &p
->hnode
, gfn
);
423 static void kvmgt_protect_table_del(struct kvmgt_guest_info
*info
,
426 struct kvmgt_pgfn
*p
;
428 p
= __kvmgt_protect_table_find(info
, gfn
);
435 static int intel_vgpu_create(struct kobject
*kobj
, struct mdev_device
*mdev
)
437 struct intel_vgpu
*vgpu
= NULL
;
438 struct intel_vgpu_type
*type
;
443 pdev
= mdev_parent_dev(mdev
);
444 gvt
= kdev_to_i915(pdev
)->gvt
;
446 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
448 gvt_vgpu_err("failed to find type %s to create\n",
454 vgpu
= intel_gvt_ops
->vgpu_create(gvt
, type
);
455 if (IS_ERR_OR_NULL(vgpu
)) {
456 ret
= vgpu
== NULL
? -EFAULT
: PTR_ERR(vgpu
);
457 gvt_vgpu_err("failed to create intel vgpu: %d\n", ret
);
461 INIT_WORK(&vgpu
->vdev
.release_work
, intel_vgpu_release_work
);
463 vgpu
->vdev
.mdev
= mdev
;
464 mdev_set_drvdata(mdev
, vgpu
);
466 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
467 dev_name(mdev_dev(mdev
)));
474 static int intel_vgpu_remove(struct mdev_device
*mdev
)
476 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
478 if (handle_valid(vgpu
->handle
))
481 intel_gvt_ops
->vgpu_destroy(vgpu
);
485 static int intel_vgpu_iommu_notifier(struct notifier_block
*nb
,
486 unsigned long action
, void *data
)
488 struct intel_vgpu
*vgpu
= container_of(nb
,
490 vdev
.iommu_notifier
);
492 if (action
== VFIO_IOMMU_NOTIFY_DMA_UNMAP
) {
493 struct vfio_iommu_type1_dma_unmap
*unmap
= data
;
494 unsigned long gfn
, end_gfn
;
496 gfn
= unmap
->iova
>> PAGE_SHIFT
;
497 end_gfn
= gfn
+ unmap
->size
/ PAGE_SIZE
;
499 while (gfn
< end_gfn
)
500 gvt_cache_remove(vgpu
, gfn
++);
506 static int intel_vgpu_group_notifier(struct notifier_block
*nb
,
507 unsigned long action
, void *data
)
509 struct intel_vgpu
*vgpu
= container_of(nb
,
511 vdev
.group_notifier
);
513 /* the only action we care about */
514 if (action
== VFIO_GROUP_NOTIFY_SET_KVM
) {
515 vgpu
->vdev
.kvm
= data
;
518 schedule_work(&vgpu
->vdev
.release_work
);
524 static int intel_vgpu_open(struct mdev_device
*mdev
)
526 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
527 unsigned long events
;
530 vgpu
->vdev
.iommu_notifier
.notifier_call
= intel_vgpu_iommu_notifier
;
531 vgpu
->vdev
.group_notifier
.notifier_call
= intel_vgpu_group_notifier
;
533 events
= VFIO_IOMMU_NOTIFY_DMA_UNMAP
;
534 ret
= vfio_register_notifier(mdev_dev(mdev
), VFIO_IOMMU_NOTIFY
, &events
,
535 &vgpu
->vdev
.iommu_notifier
);
537 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
542 events
= VFIO_GROUP_NOTIFY_SET_KVM
;
543 ret
= vfio_register_notifier(mdev_dev(mdev
), VFIO_GROUP_NOTIFY
, &events
,
544 &vgpu
->vdev
.group_notifier
);
546 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
551 ret
= kvmgt_guest_init(mdev
);
555 intel_gvt_ops
->vgpu_activate(vgpu
);
557 atomic_set(&vgpu
->vdev
.released
, 0);
561 vfio_unregister_notifier(mdev_dev(mdev
), VFIO_GROUP_NOTIFY
,
562 &vgpu
->vdev
.group_notifier
);
565 vfio_unregister_notifier(mdev_dev(mdev
), VFIO_IOMMU_NOTIFY
,
566 &vgpu
->vdev
.iommu_notifier
);
571 static void __intel_vgpu_release(struct intel_vgpu
*vgpu
)
573 struct kvmgt_guest_info
*info
;
576 if (!handle_valid(vgpu
->handle
))
579 if (atomic_cmpxchg(&vgpu
->vdev
.released
, 0, 1))
582 intel_gvt_ops
->vgpu_deactivate(vgpu
);
584 ret
= vfio_unregister_notifier(mdev_dev(vgpu
->vdev
.mdev
), VFIO_IOMMU_NOTIFY
,
585 &vgpu
->vdev
.iommu_notifier
);
586 WARN(ret
, "vfio_unregister_notifier for iommu failed: %d\n", ret
);
588 ret
= vfio_unregister_notifier(mdev_dev(vgpu
->vdev
.mdev
), VFIO_GROUP_NOTIFY
,
589 &vgpu
->vdev
.group_notifier
);
590 WARN(ret
, "vfio_unregister_notifier for group failed: %d\n", ret
);
592 info
= (struct kvmgt_guest_info
*)vgpu
->handle
;
593 kvmgt_guest_exit(info
);
595 vgpu
->vdev
.kvm
= NULL
;
599 static void intel_vgpu_release(struct mdev_device
*mdev
)
601 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
603 __intel_vgpu_release(vgpu
);
606 static void intel_vgpu_release_work(struct work_struct
*work
)
608 struct intel_vgpu
*vgpu
= container_of(work
, struct intel_vgpu
,
611 __intel_vgpu_release(vgpu
);
614 static uint64_t intel_vgpu_get_bar_addr(struct intel_vgpu
*vgpu
, int bar
)
616 u32 start_lo
, start_hi
;
619 start_lo
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
+ bar
)) &
620 PCI_BASE_ADDRESS_MEM_MASK
;
621 mem_type
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
+ bar
)) &
622 PCI_BASE_ADDRESS_MEM_TYPE_MASK
;
625 case PCI_BASE_ADDRESS_MEM_TYPE_64
:
626 start_hi
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
629 case PCI_BASE_ADDRESS_MEM_TYPE_32
:
630 case PCI_BASE_ADDRESS_MEM_TYPE_1M
:
631 /* 1M mem BAR treated as 32-bit BAR */
633 /* mem unknown type treated as 32-bit BAR */
638 return ((u64
)start_hi
<< 32) | start_lo
;
641 static int intel_vgpu_bar_rw(struct intel_vgpu
*vgpu
, int bar
, uint64_t off
,
642 void *buf
, unsigned int count
, bool is_write
)
644 uint64_t bar_start
= intel_vgpu_get_bar_addr(vgpu
, bar
);
648 ret
= intel_gvt_ops
->emulate_mmio_write(vgpu
,
649 bar_start
+ off
, buf
, count
);
651 ret
= intel_gvt_ops
->emulate_mmio_read(vgpu
,
652 bar_start
+ off
, buf
, count
);
656 static ssize_t
intel_vgpu_rw(struct mdev_device
*mdev
, char *buf
,
657 size_t count
, loff_t
*ppos
, bool is_write
)
659 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
660 unsigned int index
= VFIO_PCI_OFFSET_TO_INDEX(*ppos
);
661 uint64_t pos
= *ppos
& VFIO_PCI_OFFSET_MASK
;
665 if (index
>= VFIO_PCI_NUM_REGIONS
) {
666 gvt_vgpu_err("invalid index: %u\n", index
);
671 case VFIO_PCI_CONFIG_REGION_INDEX
:
673 ret
= intel_gvt_ops
->emulate_cfg_write(vgpu
, pos
,
676 ret
= intel_gvt_ops
->emulate_cfg_read(vgpu
, pos
,
679 case VFIO_PCI_BAR0_REGION_INDEX
:
680 ret
= intel_vgpu_bar_rw(vgpu
, PCI_BASE_ADDRESS_0
, pos
,
681 buf
, count
, is_write
);
683 case VFIO_PCI_BAR2_REGION_INDEX
:
684 ret
= intel_vgpu_bar_rw(vgpu
, PCI_BASE_ADDRESS_2
, pos
,
685 buf
, count
, is_write
);
687 case VFIO_PCI_BAR1_REGION_INDEX
:
688 case VFIO_PCI_BAR3_REGION_INDEX
:
689 case VFIO_PCI_BAR4_REGION_INDEX
:
690 case VFIO_PCI_BAR5_REGION_INDEX
:
691 case VFIO_PCI_VGA_REGION_INDEX
:
692 case VFIO_PCI_ROM_REGION_INDEX
:
694 gvt_vgpu_err("unsupported region: %u\n", index
);
697 return ret
== 0 ? count
: ret
;
700 static ssize_t
intel_vgpu_read(struct mdev_device
*mdev
, char __user
*buf
,
701 size_t count
, loff_t
*ppos
)
703 unsigned int done
= 0;
709 if (count
>= 4 && !(*ppos
% 4)) {
712 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
717 if (copy_to_user(buf
, &val
, sizeof(val
)))
721 } else if (count
>= 2 && !(*ppos
% 2)) {
724 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
729 if (copy_to_user(buf
, &val
, sizeof(val
)))
736 ret
= intel_vgpu_rw(mdev
, &val
, sizeof(val
), ppos
,
741 if (copy_to_user(buf
, &val
, sizeof(val
)))
759 static ssize_t
intel_vgpu_write(struct mdev_device
*mdev
,
760 const char __user
*buf
,
761 size_t count
, loff_t
*ppos
)
763 unsigned int done
= 0;
769 if (count
>= 4 && !(*ppos
% 4)) {
772 if (copy_from_user(&val
, buf
, sizeof(val
)))
775 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
781 } else if (count
>= 2 && !(*ppos
% 2)) {
784 if (copy_from_user(&val
, buf
, sizeof(val
)))
787 ret
= intel_vgpu_rw(mdev
, (char *)&val
,
788 sizeof(val
), ppos
, true);
796 if (copy_from_user(&val
, buf
, sizeof(val
)))
799 ret
= intel_vgpu_rw(mdev
, &val
, sizeof(val
),
818 static int intel_vgpu_mmap(struct mdev_device
*mdev
, struct vm_area_struct
*vma
)
822 unsigned long req_size
, pgoff
, req_start
;
824 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
826 index
= vma
->vm_pgoff
>> (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
);
827 if (index
>= VFIO_PCI_ROM_REGION_INDEX
)
830 if (vma
->vm_end
< vma
->vm_start
)
832 if ((vma
->vm_flags
& VM_SHARED
) == 0)
834 if (index
!= VFIO_PCI_BAR2_REGION_INDEX
)
837 pg_prot
= vma
->vm_page_prot
;
838 virtaddr
= vma
->vm_start
;
839 req_size
= vma
->vm_end
- vma
->vm_start
;
840 pgoff
= vma
->vm_pgoff
&
841 ((1U << (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
)) - 1);
842 req_start
= pgoff
<< PAGE_SHIFT
;
844 if (!intel_vgpu_in_aperture(vgpu
, req_start
))
846 if (req_start
+ req_size
>
847 vgpu_aperture_offset(vgpu
) + vgpu_aperture_sz(vgpu
))
850 pgoff
= (gvt_aperture_pa_base(vgpu
->gvt
) >> PAGE_SHIFT
) + pgoff
;
852 return remap_pfn_range(vma
, virtaddr
, pgoff
, req_size
, pg_prot
);
855 static int intel_vgpu_get_irq_count(struct intel_vgpu
*vgpu
, int type
)
857 if (type
== VFIO_PCI_INTX_IRQ_INDEX
|| type
== VFIO_PCI_MSI_IRQ_INDEX
)
863 static int intel_vgpu_set_intx_mask(struct intel_vgpu
*vgpu
,
864 unsigned int index
, unsigned int start
,
865 unsigned int count
, uint32_t flags
,
871 static int intel_vgpu_set_intx_unmask(struct intel_vgpu
*vgpu
,
872 unsigned int index
, unsigned int start
,
873 unsigned int count
, uint32_t flags
, void *data
)
878 static int intel_vgpu_set_intx_trigger(struct intel_vgpu
*vgpu
,
879 unsigned int index
, unsigned int start
, unsigned int count
,
880 uint32_t flags
, void *data
)
885 static int intel_vgpu_set_msi_trigger(struct intel_vgpu
*vgpu
,
886 unsigned int index
, unsigned int start
, unsigned int count
,
887 uint32_t flags
, void *data
)
889 struct eventfd_ctx
*trigger
;
891 if (flags
& VFIO_IRQ_SET_DATA_EVENTFD
) {
892 int fd
= *(int *)data
;
894 trigger
= eventfd_ctx_fdget(fd
);
895 if (IS_ERR(trigger
)) {
896 gvt_vgpu_err("eventfd_ctx_fdget failed\n");
897 return PTR_ERR(trigger
);
899 vgpu
->vdev
.msi_trigger
= trigger
;
905 static int intel_vgpu_set_irqs(struct intel_vgpu
*vgpu
, uint32_t flags
,
906 unsigned int index
, unsigned int start
, unsigned int count
,
909 int (*func
)(struct intel_vgpu
*vgpu
, unsigned int index
,
910 unsigned int start
, unsigned int count
, uint32_t flags
,
914 case VFIO_PCI_INTX_IRQ_INDEX
:
915 switch (flags
& VFIO_IRQ_SET_ACTION_TYPE_MASK
) {
916 case VFIO_IRQ_SET_ACTION_MASK
:
917 func
= intel_vgpu_set_intx_mask
;
919 case VFIO_IRQ_SET_ACTION_UNMASK
:
920 func
= intel_vgpu_set_intx_unmask
;
922 case VFIO_IRQ_SET_ACTION_TRIGGER
:
923 func
= intel_vgpu_set_intx_trigger
;
927 case VFIO_PCI_MSI_IRQ_INDEX
:
928 switch (flags
& VFIO_IRQ_SET_ACTION_TYPE_MASK
) {
929 case VFIO_IRQ_SET_ACTION_MASK
:
930 case VFIO_IRQ_SET_ACTION_UNMASK
:
931 /* XXX Need masking support exported */
933 case VFIO_IRQ_SET_ACTION_TRIGGER
:
934 func
= intel_vgpu_set_msi_trigger
;
943 return func(vgpu
, index
, start
, count
, flags
, data
);
946 static long intel_vgpu_ioctl(struct mdev_device
*mdev
, unsigned int cmd
,
949 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
952 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu
->id
, cmd
);
954 if (cmd
== VFIO_DEVICE_GET_INFO
) {
955 struct vfio_device_info info
;
957 minsz
= offsetofend(struct vfio_device_info
, num_irqs
);
959 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
962 if (info
.argsz
< minsz
)
965 info
.flags
= VFIO_DEVICE_FLAGS_PCI
;
966 info
.flags
|= VFIO_DEVICE_FLAGS_RESET
;
967 info
.num_regions
= VFIO_PCI_NUM_REGIONS
;
968 info
.num_irqs
= VFIO_PCI_NUM_IRQS
;
970 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
973 } else if (cmd
== VFIO_DEVICE_GET_REGION_INFO
) {
974 struct vfio_region_info info
;
975 struct vfio_info_cap caps
= { .buf
= NULL
, .size
= 0 };
978 struct vfio_region_info_cap_sparse_mmap
*sparse
= NULL
;
983 minsz
= offsetofend(struct vfio_region_info
, offset
);
985 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
988 if (info
.argsz
< minsz
)
991 switch (info
.index
) {
992 case VFIO_PCI_CONFIG_REGION_INDEX
:
993 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
994 info
.size
= vgpu
->gvt
->device_info
.cfg_space_size
;
995 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
996 VFIO_REGION_INFO_FLAG_WRITE
;
998 case VFIO_PCI_BAR0_REGION_INDEX
:
999 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1000 info
.size
= vgpu
->cfg_space
.bar
[info
.index
].size
;
1006 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
1007 VFIO_REGION_INFO_FLAG_WRITE
;
1009 case VFIO_PCI_BAR1_REGION_INDEX
:
1010 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1014 case VFIO_PCI_BAR2_REGION_INDEX
:
1015 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1016 info
.flags
= VFIO_REGION_INFO_FLAG_CAPS
|
1017 VFIO_REGION_INFO_FLAG_MMAP
|
1018 VFIO_REGION_INFO_FLAG_READ
|
1019 VFIO_REGION_INFO_FLAG_WRITE
;
1020 info
.size
= gvt_aperture_sz(vgpu
->gvt
);
1022 size
= sizeof(*sparse
) +
1023 (nr_areas
* sizeof(*sparse
->areas
));
1024 sparse
= kzalloc(size
, GFP_KERNEL
);
1028 sparse
->nr_areas
= nr_areas
;
1029 cap_type_id
= VFIO_REGION_INFO_CAP_SPARSE_MMAP
;
1030 sparse
->areas
[0].offset
=
1031 PAGE_ALIGN(vgpu_aperture_offset(vgpu
));
1032 sparse
->areas
[0].size
= vgpu_aperture_sz(vgpu
);
1035 case VFIO_PCI_BAR3_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
1036 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1040 gvt_dbg_core("get region info bar:%d\n", info
.index
);
1043 case VFIO_PCI_ROM_REGION_INDEX
:
1044 case VFIO_PCI_VGA_REGION_INDEX
:
1045 gvt_dbg_core("get region info index:%d\n", info
.index
);
1049 struct vfio_region_info_cap_type cap_type
;
1051 if (info
.index
>= VFIO_PCI_NUM_REGIONS
+
1052 vgpu
->vdev
.num_regions
)
1055 array_index_nospec(info
.index
,
1056 VFIO_PCI_NUM_REGIONS
+
1057 vgpu
->vdev
.num_regions
);
1059 i
= info
.index
- VFIO_PCI_NUM_REGIONS
;
1062 VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1063 info
.size
= vgpu
->vdev
.region
[i
].size
;
1064 info
.flags
= vgpu
->vdev
.region
[i
].flags
;
1066 cap_type
.type
= vgpu
->vdev
.region
[i
].type
;
1067 cap_type
.subtype
= vgpu
->vdev
.region
[i
].subtype
;
1069 ret
= vfio_info_add_capability(&caps
,
1070 VFIO_REGION_INFO_CAP_TYPE
,
1077 if ((info
.flags
& VFIO_REGION_INFO_FLAG_CAPS
) && sparse
) {
1078 switch (cap_type_id
) {
1079 case VFIO_REGION_INFO_CAP_SPARSE_MMAP
:
1080 ret
= vfio_info_add_capability(&caps
,
1081 VFIO_REGION_INFO_CAP_SPARSE_MMAP
,
1093 if (info
.argsz
< sizeof(info
) + caps
.size
) {
1094 info
.argsz
= sizeof(info
) + caps
.size
;
1095 info
.cap_offset
= 0;
1097 vfio_info_cap_shift(&caps
, sizeof(info
));
1098 if (copy_to_user((void __user
*)arg
+
1099 sizeof(info
), caps
.buf
,
1104 info
.cap_offset
= sizeof(info
);
1110 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
1112 } else if (cmd
== VFIO_DEVICE_GET_IRQ_INFO
) {
1113 struct vfio_irq_info info
;
1115 minsz
= offsetofend(struct vfio_irq_info
, count
);
1117 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
1120 if (info
.argsz
< minsz
|| info
.index
>= VFIO_PCI_NUM_IRQS
)
1123 switch (info
.index
) {
1124 case VFIO_PCI_INTX_IRQ_INDEX
:
1125 case VFIO_PCI_MSI_IRQ_INDEX
:
1131 info
.flags
= VFIO_IRQ_INFO_EVENTFD
;
1133 info
.count
= intel_vgpu_get_irq_count(vgpu
, info
.index
);
1135 if (info
.index
== VFIO_PCI_INTX_IRQ_INDEX
)
1136 info
.flags
|= (VFIO_IRQ_INFO_MASKABLE
|
1137 VFIO_IRQ_INFO_AUTOMASKED
);
1139 info
.flags
|= VFIO_IRQ_INFO_NORESIZE
;
1141 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
1143 } else if (cmd
== VFIO_DEVICE_SET_IRQS
) {
1144 struct vfio_irq_set hdr
;
1147 size_t data_size
= 0;
1149 minsz
= offsetofend(struct vfio_irq_set
, count
);
1151 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
1154 if (!(hdr
.flags
& VFIO_IRQ_SET_DATA_NONE
)) {
1155 int max
= intel_vgpu_get_irq_count(vgpu
, hdr
.index
);
1157 ret
= vfio_set_irqs_validate_and_prepare(&hdr
, max
,
1158 VFIO_PCI_NUM_IRQS
, &data_size
);
1160 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1164 data
= memdup_user((void __user
*)(arg
+ minsz
),
1167 return PTR_ERR(data
);
1171 ret
= intel_vgpu_set_irqs(vgpu
, hdr
.flags
, hdr
.index
,
1172 hdr
.start
, hdr
.count
, data
);
1176 } else if (cmd
== VFIO_DEVICE_RESET
) {
1177 intel_gvt_ops
->vgpu_reset(vgpu
);
1185 vgpu_id_show(struct device
*dev
, struct device_attribute
*attr
,
1188 struct mdev_device
*mdev
= mdev_from_dev(dev
);
1191 struct intel_vgpu
*vgpu
= (struct intel_vgpu
*)
1192 mdev_get_drvdata(mdev
);
1193 return sprintf(buf
, "%d\n", vgpu
->id
);
1195 return sprintf(buf
, "\n");
1199 hw_id_show(struct device
*dev
, struct device_attribute
*attr
,
1202 struct mdev_device
*mdev
= mdev_from_dev(dev
);
1205 struct intel_vgpu
*vgpu
= (struct intel_vgpu
*)
1206 mdev_get_drvdata(mdev
);
1207 return sprintf(buf
, "%u\n",
1208 vgpu
->shadow_ctx
->hw_id
);
1210 return sprintf(buf
, "\n");
1213 static DEVICE_ATTR_RO(vgpu_id
);
1214 static DEVICE_ATTR_RO(hw_id
);
1216 static struct attribute
*intel_vgpu_attrs
[] = {
1217 &dev_attr_vgpu_id
.attr
,
1218 &dev_attr_hw_id
.attr
,
1222 static const struct attribute_group intel_vgpu_group
= {
1223 .name
= "intel_vgpu",
1224 .attrs
= intel_vgpu_attrs
,
1227 static const struct attribute_group
*intel_vgpu_groups
[] = {
1232 static const struct mdev_parent_ops intel_vgpu_ops
= {
1233 .supported_type_groups
= intel_vgpu_type_groups
,
1234 .mdev_attr_groups
= intel_vgpu_groups
,
1235 .create
= intel_vgpu_create
,
1236 .remove
= intel_vgpu_remove
,
1238 .open
= intel_vgpu_open
,
1239 .release
= intel_vgpu_release
,
1241 .read
= intel_vgpu_read
,
1242 .write
= intel_vgpu_write
,
1243 .mmap
= intel_vgpu_mmap
,
1244 .ioctl
= intel_vgpu_ioctl
,
1247 static int kvmgt_host_init(struct device
*dev
, void *gvt
, const void *ops
)
1249 if (!intel_gvt_init_vgpu_type_groups(gvt
))
1252 intel_gvt_ops
= ops
;
1254 return mdev_register_device(dev
, &intel_vgpu_ops
);
1257 static void kvmgt_host_exit(struct device
*dev
, void *gvt
)
1259 intel_gvt_cleanup_vgpu_type_groups(gvt
);
1260 mdev_unregister_device(dev
);
1263 static int kvmgt_write_protect_add(unsigned long handle
, u64 gfn
)
1265 struct kvmgt_guest_info
*info
;
1267 struct kvm_memory_slot
*slot
;
1270 if (!handle_valid(handle
))
1273 info
= (struct kvmgt_guest_info
*)handle
;
1276 idx
= srcu_read_lock(&kvm
->srcu
);
1277 slot
= gfn_to_memslot(kvm
, gfn
);
1279 srcu_read_unlock(&kvm
->srcu
, idx
);
1283 spin_lock(&kvm
->mmu_lock
);
1285 if (kvmgt_gfn_is_write_protected(info
, gfn
))
1288 kvm_slot_page_track_add_page(kvm
, slot
, gfn
, KVM_PAGE_TRACK_WRITE
);
1289 kvmgt_protect_table_add(info
, gfn
);
1292 spin_unlock(&kvm
->mmu_lock
);
1293 srcu_read_unlock(&kvm
->srcu
, idx
);
1297 static int kvmgt_write_protect_remove(unsigned long handle
, u64 gfn
)
1299 struct kvmgt_guest_info
*info
;
1301 struct kvm_memory_slot
*slot
;
1304 if (!handle_valid(handle
))
1307 info
= (struct kvmgt_guest_info
*)handle
;
1310 idx
= srcu_read_lock(&kvm
->srcu
);
1311 slot
= gfn_to_memslot(kvm
, gfn
);
1313 srcu_read_unlock(&kvm
->srcu
, idx
);
1317 spin_lock(&kvm
->mmu_lock
);
1319 if (!kvmgt_gfn_is_write_protected(info
, gfn
))
1322 kvm_slot_page_track_remove_page(kvm
, slot
, gfn
, KVM_PAGE_TRACK_WRITE
);
1323 kvmgt_protect_table_del(info
, gfn
);
1326 spin_unlock(&kvm
->mmu_lock
);
1327 srcu_read_unlock(&kvm
->srcu
, idx
);
1331 static void kvmgt_page_track_write(struct kvm_vcpu
*vcpu
, gpa_t gpa
,
1332 const u8
*val
, int len
,
1333 struct kvm_page_track_notifier_node
*node
)
1335 struct kvmgt_guest_info
*info
= container_of(node
,
1336 struct kvmgt_guest_info
, track_node
);
1338 if (kvmgt_gfn_is_write_protected(info
, gpa_to_gfn(gpa
)))
1339 intel_gvt_ops
->emulate_mmio_write(info
->vgpu
, gpa
,
1343 static void kvmgt_page_track_flush_slot(struct kvm
*kvm
,
1344 struct kvm_memory_slot
*slot
,
1345 struct kvm_page_track_notifier_node
*node
)
1349 struct kvmgt_guest_info
*info
= container_of(node
,
1350 struct kvmgt_guest_info
, track_node
);
1352 spin_lock(&kvm
->mmu_lock
);
1353 for (i
= 0; i
< slot
->npages
; i
++) {
1354 gfn
= slot
->base_gfn
+ i
;
1355 if (kvmgt_gfn_is_write_protected(info
, gfn
)) {
1356 kvm_slot_page_track_remove_page(kvm
, slot
, gfn
,
1357 KVM_PAGE_TRACK_WRITE
);
1358 kvmgt_protect_table_del(info
, gfn
);
1361 spin_unlock(&kvm
->mmu_lock
);
1364 static bool __kvmgt_vgpu_exist(struct intel_vgpu
*vgpu
, struct kvm
*kvm
)
1366 struct intel_vgpu
*itr
;
1367 struct kvmgt_guest_info
*info
;
1371 mutex_lock(&vgpu
->gvt
->lock
);
1372 for_each_active_vgpu(vgpu
->gvt
, itr
, id
) {
1373 if (!handle_valid(itr
->handle
))
1376 info
= (struct kvmgt_guest_info
*)itr
->handle
;
1377 if (kvm
&& kvm
== info
->kvm
) {
1383 mutex_unlock(&vgpu
->gvt
->lock
);
1387 static int kvmgt_guest_init(struct mdev_device
*mdev
)
1389 struct kvmgt_guest_info
*info
;
1390 struct intel_vgpu
*vgpu
;
1393 vgpu
= mdev_get_drvdata(mdev
);
1394 if (handle_valid(vgpu
->handle
))
1397 kvm
= vgpu
->vdev
.kvm
;
1398 if (!kvm
|| kvm
->mm
!= current
->mm
) {
1399 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1403 if (__kvmgt_vgpu_exist(vgpu
, kvm
))
1406 info
= vzalloc(sizeof(struct kvmgt_guest_info
));
1410 vgpu
->handle
= (unsigned long)info
;
1413 kvm_get_kvm(info
->kvm
);
1415 kvmgt_protect_table_init(info
);
1416 gvt_cache_init(vgpu
);
1418 info
->track_node
.track_write
= kvmgt_page_track_write
;
1419 info
->track_node
.track_flush_slot
= kvmgt_page_track_flush_slot
;
1420 kvm_page_track_register_notifier(kvm
, &info
->track_node
);
1425 static bool kvmgt_guest_exit(struct kvmgt_guest_info
*info
)
1427 kvm_page_track_unregister_notifier(info
->kvm
, &info
->track_node
);
1428 kvm_put_kvm(info
->kvm
);
1429 kvmgt_protect_table_destroy(info
);
1430 gvt_cache_destroy(info
->vgpu
);
1436 static int kvmgt_attach_vgpu(void *vgpu
, unsigned long *handle
)
1438 /* nothing to do here */
1442 static void kvmgt_detach_vgpu(unsigned long handle
)
1444 /* nothing to do here */
1447 static int kvmgt_inject_msi(unsigned long handle
, u32 addr
, u16 data
)
1449 struct kvmgt_guest_info
*info
;
1450 struct intel_vgpu
*vgpu
;
1452 if (!handle_valid(handle
))
1455 info
= (struct kvmgt_guest_info
*)handle
;
1458 if (eventfd_signal(vgpu
->vdev
.msi_trigger
, 1) == 1)
1464 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle
, unsigned long gfn
)
1466 unsigned long iova
, pfn
;
1467 struct kvmgt_guest_info
*info
;
1469 struct intel_vgpu
*vgpu
;
1472 if (!handle_valid(handle
))
1473 return INTEL_GVT_INVALID_ADDR
;
1475 info
= (struct kvmgt_guest_info
*)handle
;
1477 iova
= gvt_cache_find(info
->vgpu
, gfn
);
1478 if (iova
!= INTEL_GVT_INVALID_ADDR
)
1481 pfn
= INTEL_GVT_INVALID_ADDR
;
1482 dev
= mdev_dev(info
->vgpu
->vdev
.mdev
);
1483 rc
= vfio_pin_pages(dev
, &gfn
, 1, IOMMU_READ
| IOMMU_WRITE
, &pfn
);
1485 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
1487 return INTEL_GVT_INVALID_ADDR
;
1489 /* transfer to host iova for GFX to use DMA */
1490 rc
= gvt_dma_map_iova(info
->vgpu
, pfn
, &iova
);
1492 gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn
);
1493 vfio_unpin_pages(dev
, &gfn
, 1);
1494 return INTEL_GVT_INVALID_ADDR
;
1497 gvt_cache_add(info
->vgpu
, gfn
, iova
);
1501 static int kvmgt_rw_gpa(unsigned long handle
, unsigned long gpa
,
1502 void *buf
, unsigned long len
, bool write
)
1504 struct kvmgt_guest_info
*info
;
1507 bool kthread
= current
->mm
== NULL
;
1509 if (!handle_valid(handle
))
1512 info
= (struct kvmgt_guest_info
*)handle
;
1518 idx
= srcu_read_lock(&kvm
->srcu
);
1519 ret
= write
? kvm_write_guest(kvm
, gpa
, buf
, len
) :
1520 kvm_read_guest(kvm
, gpa
, buf
, len
);
1521 srcu_read_unlock(&kvm
->srcu
, idx
);
1529 static int kvmgt_read_gpa(unsigned long handle
, unsigned long gpa
,
1530 void *buf
, unsigned long len
)
1532 return kvmgt_rw_gpa(handle
, gpa
, buf
, len
, false);
1535 static int kvmgt_write_gpa(unsigned long handle
, unsigned long gpa
,
1536 void *buf
, unsigned long len
)
1538 return kvmgt_rw_gpa(handle
, gpa
, buf
, len
, true);
1541 static unsigned long kvmgt_virt_to_pfn(void *addr
)
1543 return PFN_DOWN(__pa(addr
));
1546 struct intel_gvt_mpt kvmgt_mpt
= {
1547 .host_init
= kvmgt_host_init
,
1548 .host_exit
= kvmgt_host_exit
,
1549 .attach_vgpu
= kvmgt_attach_vgpu
,
1550 .detach_vgpu
= kvmgt_detach_vgpu
,
1551 .inject_msi
= kvmgt_inject_msi
,
1552 .from_virt_to_mfn
= kvmgt_virt_to_pfn
,
1553 .set_wp_page
= kvmgt_write_protect_add
,
1554 .unset_wp_page
= kvmgt_write_protect_remove
,
1555 .read_gpa
= kvmgt_read_gpa
,
1556 .write_gpa
= kvmgt_write_gpa
,
1557 .gfn_to_mfn
= kvmgt_gfn_to_pfn
,
1559 EXPORT_SYMBOL_GPL(kvmgt_mpt
);
1561 static int __init
kvmgt_init(void)
1566 static void __exit
kvmgt_exit(void)
1570 module_init(kvmgt_init
);
1571 module_exit(kvmgt_exit
);
1573 MODULE_LICENSE("GPL and additional rights");
1574 MODULE_AUTHOR("Intel Corporation");