2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26 * Kevin Tian <kevin.tian@intel.com>
27 * Jike Song <jike.song@intel.com>
28 * Xiaoguang Chen <xiaoguang.chen@intel.com>
31 #include <linux/init.h>
32 #include <linux/device.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
48 static const struct intel_gvt_ops
*intel_gvt_ops
;
50 /* helper macros copied from vfio-pci */
51 #define VFIO_PCI_OFFSET_SHIFT 40
52 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
53 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
65 struct hlist_node hnode
;
68 struct kvmgt_guest_info
{
70 struct intel_vgpu
*vgpu
;
71 struct kvm_page_track_notifier_node track_node
;
72 #define NR_BKT (1 << 18)
73 struct hlist_head ptable
[NR_BKT
];
81 struct list_head list
;
84 static inline bool handle_valid(unsigned long handle
)
86 return !!(handle
& ~0xff);
89 static int kvmgt_guest_init(struct mdev_device
*mdev
);
90 static void intel_vgpu_release_work(struct work_struct
*work
);
91 static bool kvmgt_guest_exit(struct kvmgt_guest_info
*info
);
93 static int gvt_dma_map_iova(struct intel_vgpu
*vgpu
, kvm_pfn_t pfn
,
97 struct device
*dev
= &vgpu
->gvt
->dev_priv
->drm
.pdev
->dev
;
100 if (unlikely(!pfn_valid(pfn
)))
103 page
= pfn_to_page(pfn
);
104 daddr
= dma_map_page(dev
, page
, 0, PAGE_SIZE
,
105 PCI_DMA_BIDIRECTIONAL
);
106 if (dma_mapping_error(dev
, daddr
))
109 *iova
= (unsigned long)(daddr
>> PAGE_SHIFT
);
113 static void gvt_dma_unmap_iova(struct intel_vgpu
*vgpu
, unsigned long iova
)
115 struct device
*dev
= &vgpu
->gvt
->dev_priv
->drm
.pdev
->dev
;
118 daddr
= (dma_addr_t
)(iova
<< PAGE_SHIFT
);
119 dma_unmap_page(dev
, daddr
, PAGE_SIZE
, PCI_DMA_BIDIRECTIONAL
);
122 static struct gvt_dma
*__gvt_cache_find(struct intel_vgpu
*vgpu
, gfn_t gfn
)
124 struct rb_node
*node
= vgpu
->vdev
.cache
.rb_node
;
125 struct gvt_dma
*ret
= NULL
;
128 struct gvt_dma
*itr
= rb_entry(node
, struct gvt_dma
, node
);
131 node
= node
->rb_left
;
132 else if (gfn
> itr
->gfn
)
133 node
= node
->rb_right
;
144 static unsigned long gvt_cache_find(struct intel_vgpu
*vgpu
, gfn_t gfn
)
146 struct gvt_dma
*entry
;
149 mutex_lock(&vgpu
->vdev
.cache_lock
);
151 entry
= __gvt_cache_find(vgpu
, gfn
);
152 iova
= (entry
== NULL
) ? INTEL_GVT_INVALID_ADDR
: entry
->iova
;
154 mutex_unlock(&vgpu
->vdev
.cache_lock
);
158 static void gvt_cache_add(struct intel_vgpu
*vgpu
, gfn_t gfn
,
161 struct gvt_dma
*new, *itr
;
162 struct rb_node
**link
= &vgpu
->vdev
.cache
.rb_node
, *parent
= NULL
;
164 new = kzalloc(sizeof(struct gvt_dma
), GFP_KERNEL
);
170 INIT_LIST_HEAD(&new->list
);
172 mutex_lock(&vgpu
->vdev
.cache_lock
);
175 itr
= rb_entry(parent
, struct gvt_dma
, node
);
179 else if (gfn
< itr
->gfn
)
180 link
= &parent
->rb_left
;
182 link
= &parent
->rb_right
;
185 rb_link_node(&new->node
, parent
, link
);
186 rb_insert_color(&new->node
, &vgpu
->vdev
.cache
);
187 mutex_unlock(&vgpu
->vdev
.cache_lock
);
191 mutex_unlock(&vgpu
->vdev
.cache_lock
);
195 static void __gvt_cache_remove_entry(struct intel_vgpu
*vgpu
,
196 struct gvt_dma
*entry
)
198 rb_erase(&entry
->node
, &vgpu
->vdev
.cache
);
202 static void intel_vgpu_unpin_work(struct work_struct
*work
)
204 struct intel_vgpu
*vgpu
= container_of(work
, struct intel_vgpu
,
206 struct device
*dev
= mdev_dev(vgpu
->vdev
.mdev
);
207 struct gvt_dma
*this;
211 spin_lock(&vgpu
->vdev
.unpin_lock
);
212 if (list_empty(&vgpu
->vdev
.unpin_list
)) {
213 spin_unlock(&vgpu
->vdev
.unpin_lock
);
216 this = list_first_entry(&vgpu
->vdev
.unpin_list
,
217 struct gvt_dma
, list
);
218 list_del(&this->list
);
219 spin_unlock(&vgpu
->vdev
.unpin_lock
);
222 vfio_unpin_pages(dev
, &gfn
, 1);
227 static bool gvt_cache_mark_remove(struct intel_vgpu
*vgpu
, gfn_t gfn
)
229 struct gvt_dma
*this;
231 mutex_lock(&vgpu
->vdev
.cache_lock
);
232 this = __gvt_cache_find(vgpu
, gfn
);
234 mutex_unlock(&vgpu
->vdev
.cache_lock
);
237 gvt_dma_unmap_iova(vgpu
, this->iova
);
238 /* remove this from rb tree */
239 rb_erase(&this->node
, &vgpu
->vdev
.cache
);
240 mutex_unlock(&vgpu
->vdev
.cache_lock
);
242 /* put this to the unpin_list */
243 spin_lock(&vgpu
->vdev
.unpin_lock
);
244 list_move_tail(&this->list
, &vgpu
->vdev
.unpin_list
);
245 spin_unlock(&vgpu
->vdev
.unpin_lock
);
250 static void gvt_cache_init(struct intel_vgpu
*vgpu
)
252 vgpu
->vdev
.cache
= RB_ROOT
;
253 mutex_init(&vgpu
->vdev
.cache_lock
);
256 static void gvt_cache_destroy(struct intel_vgpu
*vgpu
)
259 struct rb_node
*node
= NULL
;
260 struct device
*dev
= mdev_dev(vgpu
->vdev
.mdev
);
264 mutex_lock(&vgpu
->vdev
.cache_lock
);
265 node
= rb_first(&vgpu
->vdev
.cache
);
267 mutex_unlock(&vgpu
->vdev
.cache_lock
);
270 dma
= rb_entry(node
, struct gvt_dma
, node
);
271 gvt_dma_unmap_iova(vgpu
, dma
->iova
);
273 __gvt_cache_remove_entry(vgpu
, dma
);
274 mutex_unlock(&vgpu
->vdev
.cache_lock
);
275 vfio_unpin_pages(dev
, &gfn
, 1);
279 static struct intel_vgpu_type
*intel_gvt_find_vgpu_type(struct intel_gvt
*gvt
,
283 struct intel_vgpu_type
*t
;
284 const char *driver_name
= dev_driver_string(
285 &gvt
->dev_priv
->drm
.pdev
->dev
);
287 for (i
= 0; i
< gvt
->num_types
; i
++) {
289 if (!strncmp(t
->name
, name
+ strlen(driver_name
) + 1,
297 static ssize_t
available_instances_show(struct kobject
*kobj
,
298 struct device
*dev
, char *buf
)
300 struct intel_vgpu_type
*type
;
301 unsigned int num
= 0;
302 void *gvt
= kdev_to_i915(dev
)->gvt
;
304 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
308 num
= type
->avail_instance
;
310 return sprintf(buf
, "%u\n", num
);
313 static ssize_t
device_api_show(struct kobject
*kobj
, struct device
*dev
,
316 return sprintf(buf
, "%s\n", VFIO_DEVICE_API_PCI_STRING
);
319 static ssize_t
description_show(struct kobject
*kobj
, struct device
*dev
,
322 struct intel_vgpu_type
*type
;
323 void *gvt
= kdev_to_i915(dev
)->gvt
;
325 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
329 return sprintf(buf
, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
330 "fence: %d\nresolution: %s\n"
332 BYTES_TO_MB(type
->low_gm_size
),
333 BYTES_TO_MB(type
->high_gm_size
),
334 type
->fence
, vgpu_edid_str(type
->resolution
),
338 static MDEV_TYPE_ATTR_RO(available_instances
);
339 static MDEV_TYPE_ATTR_RO(device_api
);
340 static MDEV_TYPE_ATTR_RO(description
);
342 static struct attribute
*type_attrs
[] = {
343 &mdev_type_attr_available_instances
.attr
,
344 &mdev_type_attr_device_api
.attr
,
345 &mdev_type_attr_description
.attr
,
349 static struct attribute_group
*intel_vgpu_type_groups
[] = {
350 [0 ... NR_MAX_INTEL_VGPU_TYPES
- 1] = NULL
,
353 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt
*gvt
)
356 struct intel_vgpu_type
*type
;
357 struct attribute_group
*group
;
359 for (i
= 0; i
< gvt
->num_types
; i
++) {
360 type
= &gvt
->types
[i
];
362 group
= kzalloc(sizeof(struct attribute_group
), GFP_KERNEL
);
366 group
->name
= type
->name
;
367 group
->attrs
= type_attrs
;
368 intel_vgpu_type_groups
[i
] = group
;
374 for (j
= 0; j
< i
; j
++) {
375 group
= intel_vgpu_type_groups
[j
];
382 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt
*gvt
)
385 struct attribute_group
*group
;
387 for (i
= 0; i
< gvt
->num_types
; i
++) {
388 group
= intel_vgpu_type_groups
[i
];
393 static void kvmgt_protect_table_init(struct kvmgt_guest_info
*info
)
395 hash_init(info
->ptable
);
398 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info
*info
)
400 struct kvmgt_pgfn
*p
;
401 struct hlist_node
*tmp
;
404 hash_for_each_safe(info
->ptable
, i
, tmp
, p
, hnode
) {
410 static struct kvmgt_pgfn
*
411 __kvmgt_protect_table_find(struct kvmgt_guest_info
*info
, gfn_t gfn
)
413 struct kvmgt_pgfn
*p
, *res
= NULL
;
415 hash_for_each_possible(info
->ptable
, p
, hnode
, gfn
) {
425 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info
*info
,
428 struct kvmgt_pgfn
*p
;
430 p
= __kvmgt_protect_table_find(info
, gfn
);
434 static void kvmgt_protect_table_add(struct kvmgt_guest_info
*info
, gfn_t gfn
)
436 struct kvmgt_pgfn
*p
;
438 if (kvmgt_gfn_is_write_protected(info
, gfn
))
441 p
= kzalloc(sizeof(struct kvmgt_pgfn
), GFP_ATOMIC
);
442 if (WARN(!p
, "gfn: 0x%llx\n", gfn
))
446 hash_add(info
->ptable
, &p
->hnode
, gfn
);
449 static void kvmgt_protect_table_del(struct kvmgt_guest_info
*info
,
452 struct kvmgt_pgfn
*p
;
454 p
= __kvmgt_protect_table_find(info
, gfn
);
461 static int intel_vgpu_create(struct kobject
*kobj
, struct mdev_device
*mdev
)
463 struct intel_vgpu
*vgpu
= NULL
;
464 struct intel_vgpu_type
*type
;
469 pdev
= mdev_parent_dev(mdev
);
470 gvt
= kdev_to_i915(pdev
)->gvt
;
472 type
= intel_gvt_find_vgpu_type(gvt
, kobject_name(kobj
));
474 gvt_vgpu_err("failed to find type %s to create\n",
480 vgpu
= intel_gvt_ops
->vgpu_create(gvt
, type
);
481 if (IS_ERR_OR_NULL(vgpu
)) {
482 ret
= vgpu
== NULL
? -EFAULT
: PTR_ERR(vgpu
);
483 gvt_vgpu_err("failed to create intel vgpu: %d\n", ret
);
487 INIT_WORK(&vgpu
->vdev
.release_work
, intel_vgpu_release_work
);
488 INIT_WORK(&vgpu
->vdev
.unpin_work
, intel_vgpu_unpin_work
);
489 spin_lock_init(&vgpu
->vdev
.unpin_lock
);
490 INIT_LIST_HEAD(&vgpu
->vdev
.unpin_list
);
492 vgpu
->vdev
.mdev
= mdev
;
493 mdev_set_drvdata(mdev
, vgpu
);
495 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
496 dev_name(mdev_dev(mdev
)));
503 static int intel_vgpu_remove(struct mdev_device
*mdev
)
505 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
507 if (handle_valid(vgpu
->handle
))
510 intel_gvt_ops
->vgpu_destroy(vgpu
);
514 static int intel_vgpu_iommu_notifier(struct notifier_block
*nb
,
515 unsigned long action
, void *data
)
517 struct intel_vgpu
*vgpu
= container_of(nb
,
519 vdev
.iommu_notifier
);
520 bool sched_unmap
= false;
522 if (action
== VFIO_IOMMU_NOTIFY_DMA_UNMAP
) {
523 struct vfio_iommu_type1_dma_unmap
*unmap
= data
;
524 unsigned long gfn
, end_gfn
;
526 gfn
= unmap
->iova
>> PAGE_SHIFT
;
527 end_gfn
= gfn
+ unmap
->size
/ PAGE_SIZE
;
529 while (gfn
< end_gfn
)
530 sched_unmap
|= gvt_cache_mark_remove(vgpu
, gfn
++);
533 schedule_work(&vgpu
->vdev
.unpin_work
);
539 static int intel_vgpu_group_notifier(struct notifier_block
*nb
,
540 unsigned long action
, void *data
)
542 struct intel_vgpu
*vgpu
= container_of(nb
,
544 vdev
.group_notifier
);
546 /* the only action we care about */
547 if (action
== VFIO_GROUP_NOTIFY_SET_KVM
) {
548 vgpu
->vdev
.kvm
= data
;
551 schedule_work(&vgpu
->vdev
.release_work
);
557 static int intel_vgpu_open(struct mdev_device
*mdev
)
559 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
560 unsigned long events
;
563 vgpu
->vdev
.iommu_notifier
.notifier_call
= intel_vgpu_iommu_notifier
;
564 vgpu
->vdev
.group_notifier
.notifier_call
= intel_vgpu_group_notifier
;
566 events
= VFIO_IOMMU_NOTIFY_DMA_UNMAP
;
567 ret
= vfio_register_notifier(mdev_dev(mdev
), VFIO_IOMMU_NOTIFY
, &events
,
568 &vgpu
->vdev
.iommu_notifier
);
570 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
575 events
= VFIO_GROUP_NOTIFY_SET_KVM
;
576 ret
= vfio_register_notifier(mdev_dev(mdev
), VFIO_GROUP_NOTIFY
, &events
,
577 &vgpu
->vdev
.group_notifier
);
579 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
584 ret
= kvmgt_guest_init(mdev
);
588 intel_gvt_ops
->vgpu_activate(vgpu
);
590 atomic_set(&vgpu
->vdev
.released
, 0);
594 vfio_unregister_notifier(mdev_dev(mdev
), VFIO_GROUP_NOTIFY
,
595 &vgpu
->vdev
.group_notifier
);
598 vfio_unregister_notifier(mdev_dev(mdev
), VFIO_IOMMU_NOTIFY
,
599 &vgpu
->vdev
.iommu_notifier
);
604 static void __intel_vgpu_release(struct intel_vgpu
*vgpu
)
606 struct kvmgt_guest_info
*info
;
609 if (!handle_valid(vgpu
->handle
))
612 if (atomic_cmpxchg(&vgpu
->vdev
.released
, 0, 1))
615 intel_gvt_ops
->vgpu_deactivate(vgpu
);
617 ret
= vfio_unregister_notifier(mdev_dev(vgpu
->vdev
.mdev
), VFIO_IOMMU_NOTIFY
,
618 &vgpu
->vdev
.iommu_notifier
);
619 WARN(ret
, "vfio_unregister_notifier for iommu failed: %d\n", ret
);
621 ret
= vfio_unregister_notifier(mdev_dev(vgpu
->vdev
.mdev
), VFIO_GROUP_NOTIFY
,
622 &vgpu
->vdev
.group_notifier
);
623 WARN(ret
, "vfio_unregister_notifier for group failed: %d\n", ret
);
625 info
= (struct kvmgt_guest_info
*)vgpu
->handle
;
626 kvmgt_guest_exit(info
);
628 vgpu
->vdev
.kvm
= NULL
;
632 static void intel_vgpu_release(struct mdev_device
*mdev
)
634 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
636 __intel_vgpu_release(vgpu
);
639 static void intel_vgpu_release_work(struct work_struct
*work
)
641 struct intel_vgpu
*vgpu
= container_of(work
, struct intel_vgpu
,
644 __intel_vgpu_release(vgpu
);
647 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu
*vgpu
)
649 u32 start_lo
, start_hi
;
651 int pos
= PCI_BASE_ADDRESS_0
;
653 start_lo
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
+ pos
)) &
654 PCI_BASE_ADDRESS_MEM_MASK
;
655 mem_type
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
+ pos
)) &
656 PCI_BASE_ADDRESS_MEM_TYPE_MASK
;
659 case PCI_BASE_ADDRESS_MEM_TYPE_64
:
660 start_hi
= (*(u32
*)(vgpu
->cfg_space
.virtual_cfg_space
663 case PCI_BASE_ADDRESS_MEM_TYPE_32
:
664 case PCI_BASE_ADDRESS_MEM_TYPE_1M
:
665 /* 1M mem BAR treated as 32-bit BAR */
667 /* mem unknown type treated as 32-bit BAR */
672 return ((u64
)start_hi
<< 32) | start_lo
;
675 static ssize_t
intel_vgpu_rw(struct mdev_device
*mdev
, char *buf
,
676 size_t count
, loff_t
*ppos
, bool is_write
)
678 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
679 unsigned int index
= VFIO_PCI_OFFSET_TO_INDEX(*ppos
);
680 uint64_t pos
= *ppos
& VFIO_PCI_OFFSET_MASK
;
684 if (index
>= VFIO_PCI_NUM_REGIONS
) {
685 gvt_vgpu_err("invalid index: %u\n", index
);
690 case VFIO_PCI_CONFIG_REGION_INDEX
:
692 ret
= intel_gvt_ops
->emulate_cfg_write(vgpu
, pos
,
695 ret
= intel_gvt_ops
->emulate_cfg_read(vgpu
, pos
,
698 case VFIO_PCI_BAR0_REGION_INDEX
:
699 case VFIO_PCI_BAR1_REGION_INDEX
:
701 uint64_t bar0_start
= intel_vgpu_get_bar0_addr(vgpu
);
703 ret
= intel_gvt_ops
->emulate_mmio_write(vgpu
,
704 bar0_start
+ pos
, buf
, count
);
706 uint64_t bar0_start
= intel_vgpu_get_bar0_addr(vgpu
);
708 ret
= intel_gvt_ops
->emulate_mmio_read(vgpu
,
709 bar0_start
+ pos
, buf
, count
);
712 case VFIO_PCI_BAR2_REGION_INDEX
:
713 case VFIO_PCI_BAR3_REGION_INDEX
:
714 case VFIO_PCI_BAR4_REGION_INDEX
:
715 case VFIO_PCI_BAR5_REGION_INDEX
:
716 case VFIO_PCI_VGA_REGION_INDEX
:
717 case VFIO_PCI_ROM_REGION_INDEX
:
719 gvt_vgpu_err("unsupported region: %u\n", index
);
722 return ret
== 0 ? count
: ret
;
725 static ssize_t
intel_vgpu_read(struct mdev_device
*mdev
, char __user
*buf
,
726 size_t count
, loff_t
*ppos
)
728 unsigned int done
= 0;
734 if (count
>= 4 && !(*ppos
% 4)) {
737 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
742 if (copy_to_user(buf
, &val
, sizeof(val
)))
746 } else if (count
>= 2 && !(*ppos
% 2)) {
749 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
754 if (copy_to_user(buf
, &val
, sizeof(val
)))
761 ret
= intel_vgpu_rw(mdev
, &val
, sizeof(val
), ppos
,
766 if (copy_to_user(buf
, &val
, sizeof(val
)))
784 static ssize_t
intel_vgpu_write(struct mdev_device
*mdev
,
785 const char __user
*buf
,
786 size_t count
, loff_t
*ppos
)
788 unsigned int done
= 0;
794 if (count
>= 4 && !(*ppos
% 4)) {
797 if (copy_from_user(&val
, buf
, sizeof(val
)))
800 ret
= intel_vgpu_rw(mdev
, (char *)&val
, sizeof(val
),
806 } else if (count
>= 2 && !(*ppos
% 2)) {
809 if (copy_from_user(&val
, buf
, sizeof(val
)))
812 ret
= intel_vgpu_rw(mdev
, (char *)&val
,
813 sizeof(val
), ppos
, true);
821 if (copy_from_user(&val
, buf
, sizeof(val
)))
824 ret
= intel_vgpu_rw(mdev
, &val
, sizeof(val
),
843 static int intel_vgpu_mmap(struct mdev_device
*mdev
, struct vm_area_struct
*vma
)
847 unsigned long req_size
, pgoff
= 0;
849 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
851 index
= vma
->vm_pgoff
>> (VFIO_PCI_OFFSET_SHIFT
- PAGE_SHIFT
);
852 if (index
>= VFIO_PCI_ROM_REGION_INDEX
)
855 if (vma
->vm_end
< vma
->vm_start
)
857 if ((vma
->vm_flags
& VM_SHARED
) == 0)
859 if (index
!= VFIO_PCI_BAR2_REGION_INDEX
)
862 pg_prot
= vma
->vm_page_prot
;
863 virtaddr
= vma
->vm_start
;
864 req_size
= vma
->vm_end
- vma
->vm_start
;
865 pgoff
= vgpu_aperture_pa_base(vgpu
) >> PAGE_SHIFT
;
867 return remap_pfn_range(vma
, virtaddr
, pgoff
, req_size
, pg_prot
);
870 static int intel_vgpu_get_irq_count(struct intel_vgpu
*vgpu
, int type
)
872 if (type
== VFIO_PCI_INTX_IRQ_INDEX
|| type
== VFIO_PCI_MSI_IRQ_INDEX
)
878 static int intel_vgpu_set_intx_mask(struct intel_vgpu
*vgpu
,
879 unsigned int index
, unsigned int start
,
880 unsigned int count
, uint32_t flags
,
886 static int intel_vgpu_set_intx_unmask(struct intel_vgpu
*vgpu
,
887 unsigned int index
, unsigned int start
,
888 unsigned int count
, uint32_t flags
, void *data
)
893 static int intel_vgpu_set_intx_trigger(struct intel_vgpu
*vgpu
,
894 unsigned int index
, unsigned int start
, unsigned int count
,
895 uint32_t flags
, void *data
)
900 static int intel_vgpu_set_msi_trigger(struct intel_vgpu
*vgpu
,
901 unsigned int index
, unsigned int start
, unsigned int count
,
902 uint32_t flags
, void *data
)
904 struct eventfd_ctx
*trigger
;
906 if (flags
& VFIO_IRQ_SET_DATA_EVENTFD
) {
907 int fd
= *(int *)data
;
909 trigger
= eventfd_ctx_fdget(fd
);
910 if (IS_ERR(trigger
)) {
911 gvt_vgpu_err("eventfd_ctx_fdget failed\n");
912 return PTR_ERR(trigger
);
914 vgpu
->vdev
.msi_trigger
= trigger
;
920 static int intel_vgpu_set_irqs(struct intel_vgpu
*vgpu
, uint32_t flags
,
921 unsigned int index
, unsigned int start
, unsigned int count
,
924 int (*func
)(struct intel_vgpu
*vgpu
, unsigned int index
,
925 unsigned int start
, unsigned int count
, uint32_t flags
,
929 case VFIO_PCI_INTX_IRQ_INDEX
:
930 switch (flags
& VFIO_IRQ_SET_ACTION_TYPE_MASK
) {
931 case VFIO_IRQ_SET_ACTION_MASK
:
932 func
= intel_vgpu_set_intx_mask
;
934 case VFIO_IRQ_SET_ACTION_UNMASK
:
935 func
= intel_vgpu_set_intx_unmask
;
937 case VFIO_IRQ_SET_ACTION_TRIGGER
:
938 func
= intel_vgpu_set_intx_trigger
;
942 case VFIO_PCI_MSI_IRQ_INDEX
:
943 switch (flags
& VFIO_IRQ_SET_ACTION_TYPE_MASK
) {
944 case VFIO_IRQ_SET_ACTION_MASK
:
945 case VFIO_IRQ_SET_ACTION_UNMASK
:
946 /* XXX Need masking support exported */
948 case VFIO_IRQ_SET_ACTION_TRIGGER
:
949 func
= intel_vgpu_set_msi_trigger
;
958 return func(vgpu
, index
, start
, count
, flags
, data
);
961 static long intel_vgpu_ioctl(struct mdev_device
*mdev
, unsigned int cmd
,
964 struct intel_vgpu
*vgpu
= mdev_get_drvdata(mdev
);
967 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu
->id
, cmd
);
969 if (cmd
== VFIO_DEVICE_GET_INFO
) {
970 struct vfio_device_info info
;
972 minsz
= offsetofend(struct vfio_device_info
, num_irqs
);
974 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
977 if (info
.argsz
< minsz
)
980 info
.flags
= VFIO_DEVICE_FLAGS_PCI
;
981 info
.flags
|= VFIO_DEVICE_FLAGS_RESET
;
982 info
.num_regions
= VFIO_PCI_NUM_REGIONS
;
983 info
.num_irqs
= VFIO_PCI_NUM_IRQS
;
985 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
988 } else if (cmd
== VFIO_DEVICE_GET_REGION_INFO
) {
989 struct vfio_region_info info
;
990 struct vfio_info_cap caps
= { .buf
= NULL
, .size
= 0 };
992 struct vfio_region_info_cap_sparse_mmap
*sparse
= NULL
;
997 minsz
= offsetofend(struct vfio_region_info
, offset
);
999 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
1002 if (info
.argsz
< minsz
)
1005 switch (info
.index
) {
1006 case VFIO_PCI_CONFIG_REGION_INDEX
:
1007 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1008 info
.size
= INTEL_GVT_MAX_CFG_SPACE_SZ
;
1009 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
1010 VFIO_REGION_INFO_FLAG_WRITE
;
1012 case VFIO_PCI_BAR0_REGION_INDEX
:
1013 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1014 info
.size
= vgpu
->cfg_space
.bar
[info
.index
].size
;
1020 info
.flags
= VFIO_REGION_INFO_FLAG_READ
|
1021 VFIO_REGION_INFO_FLAG_WRITE
;
1023 case VFIO_PCI_BAR1_REGION_INDEX
:
1024 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1028 case VFIO_PCI_BAR2_REGION_INDEX
:
1029 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1030 info
.flags
= VFIO_REGION_INFO_FLAG_CAPS
|
1031 VFIO_REGION_INFO_FLAG_MMAP
|
1032 VFIO_REGION_INFO_FLAG_READ
|
1033 VFIO_REGION_INFO_FLAG_WRITE
;
1034 info
.size
= gvt_aperture_sz(vgpu
->gvt
);
1036 size
= sizeof(*sparse
) +
1037 (nr_areas
* sizeof(*sparse
->areas
));
1038 sparse
= kzalloc(size
, GFP_KERNEL
);
1042 sparse
->nr_areas
= nr_areas
;
1043 cap_type_id
= VFIO_REGION_INFO_CAP_SPARSE_MMAP
;
1044 sparse
->areas
[0].offset
=
1045 PAGE_ALIGN(vgpu_aperture_offset(vgpu
));
1046 sparse
->areas
[0].size
= vgpu_aperture_sz(vgpu
);
1049 case VFIO_PCI_BAR3_REGION_INDEX
... VFIO_PCI_BAR5_REGION_INDEX
:
1050 info
.offset
= VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1054 gvt_dbg_core("get region info bar:%d\n", info
.index
);
1057 case VFIO_PCI_ROM_REGION_INDEX
:
1058 case VFIO_PCI_VGA_REGION_INDEX
:
1059 gvt_dbg_core("get region info index:%d\n", info
.index
);
1063 struct vfio_region_info_cap_type cap_type
;
1065 if (info
.index
>= VFIO_PCI_NUM_REGIONS
+
1066 vgpu
->vdev
.num_regions
)
1069 i
= info
.index
- VFIO_PCI_NUM_REGIONS
;
1072 VFIO_PCI_INDEX_TO_OFFSET(info
.index
);
1073 info
.size
= vgpu
->vdev
.region
[i
].size
;
1074 info
.flags
= vgpu
->vdev
.region
[i
].flags
;
1076 cap_type
.type
= vgpu
->vdev
.region
[i
].type
;
1077 cap_type
.subtype
= vgpu
->vdev
.region
[i
].subtype
;
1079 ret
= vfio_info_add_capability(&caps
,
1080 VFIO_REGION_INFO_CAP_TYPE
,
1087 if ((info
.flags
& VFIO_REGION_INFO_FLAG_CAPS
) && sparse
) {
1088 switch (cap_type_id
) {
1089 case VFIO_REGION_INFO_CAP_SPARSE_MMAP
:
1090 ret
= vfio_info_add_capability(&caps
,
1091 VFIO_REGION_INFO_CAP_SPARSE_MMAP
,
1103 if (info
.argsz
< sizeof(info
) + caps
.size
) {
1104 info
.argsz
= sizeof(info
) + caps
.size
;
1105 info
.cap_offset
= 0;
1107 vfio_info_cap_shift(&caps
, sizeof(info
));
1108 if (copy_to_user((void __user
*)arg
+
1109 sizeof(info
), caps
.buf
,
1114 info
.cap_offset
= sizeof(info
);
1120 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
1122 } else if (cmd
== VFIO_DEVICE_GET_IRQ_INFO
) {
1123 struct vfio_irq_info info
;
1125 minsz
= offsetofend(struct vfio_irq_info
, count
);
1127 if (copy_from_user(&info
, (void __user
*)arg
, minsz
))
1130 if (info
.argsz
< minsz
|| info
.index
>= VFIO_PCI_NUM_IRQS
)
1133 switch (info
.index
) {
1134 case VFIO_PCI_INTX_IRQ_INDEX
:
1135 case VFIO_PCI_MSI_IRQ_INDEX
:
1141 info
.flags
= VFIO_IRQ_INFO_EVENTFD
;
1143 info
.count
= intel_vgpu_get_irq_count(vgpu
, info
.index
);
1145 if (info
.index
== VFIO_PCI_INTX_IRQ_INDEX
)
1146 info
.flags
|= (VFIO_IRQ_INFO_MASKABLE
|
1147 VFIO_IRQ_INFO_AUTOMASKED
);
1149 info
.flags
|= VFIO_IRQ_INFO_NORESIZE
;
1151 return copy_to_user((void __user
*)arg
, &info
, minsz
) ?
1153 } else if (cmd
== VFIO_DEVICE_SET_IRQS
) {
1154 struct vfio_irq_set hdr
;
1157 size_t data_size
= 0;
1159 minsz
= offsetofend(struct vfio_irq_set
, count
);
1161 if (copy_from_user(&hdr
, (void __user
*)arg
, minsz
))
1164 if (!(hdr
.flags
& VFIO_IRQ_SET_DATA_NONE
)) {
1165 int max
= intel_vgpu_get_irq_count(vgpu
, hdr
.index
);
1167 ret
= vfio_set_irqs_validate_and_prepare(&hdr
, max
,
1168 VFIO_PCI_NUM_IRQS
, &data_size
);
1170 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1174 data
= memdup_user((void __user
*)(arg
+ minsz
),
1177 return PTR_ERR(data
);
1181 ret
= intel_vgpu_set_irqs(vgpu
, hdr
.flags
, hdr
.index
,
1182 hdr
.start
, hdr
.count
, data
);
1186 } else if (cmd
== VFIO_DEVICE_RESET
) {
1187 intel_gvt_ops
->vgpu_reset(vgpu
);
1195 vgpu_id_show(struct device
*dev
, struct device_attribute
*attr
,
1198 struct mdev_device
*mdev
= mdev_from_dev(dev
);
1201 struct intel_vgpu
*vgpu
= (struct intel_vgpu
*)
1202 mdev_get_drvdata(mdev
);
1203 return sprintf(buf
, "%d\n", vgpu
->id
);
1205 return sprintf(buf
, "\n");
1208 static DEVICE_ATTR_RO(vgpu_id
);
1210 static struct attribute
*intel_vgpu_attrs
[] = {
1211 &dev_attr_vgpu_id
.attr
,
1215 static const struct attribute_group intel_vgpu_group
= {
1216 .name
= "intel_vgpu",
1217 .attrs
= intel_vgpu_attrs
,
1220 static const struct attribute_group
*intel_vgpu_groups
[] = {
1225 static const struct mdev_parent_ops intel_vgpu_ops
= {
1226 .supported_type_groups
= intel_vgpu_type_groups
,
1227 .mdev_attr_groups
= intel_vgpu_groups
,
1228 .create
= intel_vgpu_create
,
1229 .remove
= intel_vgpu_remove
,
1231 .open
= intel_vgpu_open
,
1232 .release
= intel_vgpu_release
,
1234 .read
= intel_vgpu_read
,
1235 .write
= intel_vgpu_write
,
1236 .mmap
= intel_vgpu_mmap
,
1237 .ioctl
= intel_vgpu_ioctl
,
1240 static int kvmgt_host_init(struct device
*dev
, void *gvt
, const void *ops
)
1242 if (!intel_gvt_init_vgpu_type_groups(gvt
))
1245 intel_gvt_ops
= ops
;
1247 return mdev_register_device(dev
, &intel_vgpu_ops
);
1250 static void kvmgt_host_exit(struct device
*dev
, void *gvt
)
1252 intel_gvt_cleanup_vgpu_type_groups(gvt
);
1253 mdev_unregister_device(dev
);
1256 static int kvmgt_write_protect_add(unsigned long handle
, u64 gfn
)
1258 struct kvmgt_guest_info
*info
;
1260 struct kvm_memory_slot
*slot
;
1263 if (!handle_valid(handle
))
1266 info
= (struct kvmgt_guest_info
*)handle
;
1269 idx
= srcu_read_lock(&kvm
->srcu
);
1270 slot
= gfn_to_memslot(kvm
, gfn
);
1272 srcu_read_unlock(&kvm
->srcu
, idx
);
1276 spin_lock(&kvm
->mmu_lock
);
1278 if (kvmgt_gfn_is_write_protected(info
, gfn
))
1281 kvm_slot_page_track_add_page(kvm
, slot
, gfn
, KVM_PAGE_TRACK_WRITE
);
1282 kvmgt_protect_table_add(info
, gfn
);
1285 spin_unlock(&kvm
->mmu_lock
);
1286 srcu_read_unlock(&kvm
->srcu
, idx
);
1290 static int kvmgt_write_protect_remove(unsigned long handle
, u64 gfn
)
1292 struct kvmgt_guest_info
*info
;
1294 struct kvm_memory_slot
*slot
;
1297 if (!handle_valid(handle
))
1300 info
= (struct kvmgt_guest_info
*)handle
;
1303 idx
= srcu_read_lock(&kvm
->srcu
);
1304 slot
= gfn_to_memslot(kvm
, gfn
);
1306 srcu_read_unlock(&kvm
->srcu
, idx
);
1310 spin_lock(&kvm
->mmu_lock
);
1312 if (!kvmgt_gfn_is_write_protected(info
, gfn
))
1315 kvm_slot_page_track_remove_page(kvm
, slot
, gfn
, KVM_PAGE_TRACK_WRITE
);
1316 kvmgt_protect_table_del(info
, gfn
);
1319 spin_unlock(&kvm
->mmu_lock
);
1320 srcu_read_unlock(&kvm
->srcu
, idx
);
1324 static void kvmgt_page_track_write(struct kvm_vcpu
*vcpu
, gpa_t gpa
,
1325 const u8
*val
, int len
,
1326 struct kvm_page_track_notifier_node
*node
)
1328 struct kvmgt_guest_info
*info
= container_of(node
,
1329 struct kvmgt_guest_info
, track_node
);
1331 if (kvmgt_gfn_is_write_protected(info
, gpa_to_gfn(gpa
)))
1332 intel_gvt_ops
->emulate_mmio_write(info
->vgpu
, gpa
,
1336 static void kvmgt_page_track_flush_slot(struct kvm
*kvm
,
1337 struct kvm_memory_slot
*slot
,
1338 struct kvm_page_track_notifier_node
*node
)
1342 struct kvmgt_guest_info
*info
= container_of(node
,
1343 struct kvmgt_guest_info
, track_node
);
1345 spin_lock(&kvm
->mmu_lock
);
1346 for (i
= 0; i
< slot
->npages
; i
++) {
1347 gfn
= slot
->base_gfn
+ i
;
1348 if (kvmgt_gfn_is_write_protected(info
, gfn
)) {
1349 kvm_slot_page_track_remove_page(kvm
, slot
, gfn
,
1350 KVM_PAGE_TRACK_WRITE
);
1351 kvmgt_protect_table_del(info
, gfn
);
1354 spin_unlock(&kvm
->mmu_lock
);
1357 static bool __kvmgt_vgpu_exist(struct intel_vgpu
*vgpu
, struct kvm
*kvm
)
1359 struct intel_vgpu
*itr
;
1360 struct kvmgt_guest_info
*info
;
1364 mutex_lock(&vgpu
->gvt
->lock
);
1365 for_each_active_vgpu(vgpu
->gvt
, itr
, id
) {
1366 if (!handle_valid(itr
->handle
))
1369 info
= (struct kvmgt_guest_info
*)itr
->handle
;
1370 if (kvm
&& kvm
== info
->kvm
) {
1376 mutex_unlock(&vgpu
->gvt
->lock
);
1380 static int kvmgt_guest_init(struct mdev_device
*mdev
)
1382 struct kvmgt_guest_info
*info
;
1383 struct intel_vgpu
*vgpu
;
1386 vgpu
= mdev_get_drvdata(mdev
);
1387 if (handle_valid(vgpu
->handle
))
1390 kvm
= vgpu
->vdev
.kvm
;
1391 if (!kvm
|| kvm
->mm
!= current
->mm
) {
1392 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1396 if (__kvmgt_vgpu_exist(vgpu
, kvm
))
1399 info
= vzalloc(sizeof(struct kvmgt_guest_info
));
1403 vgpu
->handle
= (unsigned long)info
;
1406 kvm_get_kvm(info
->kvm
);
1408 kvmgt_protect_table_init(info
);
1409 gvt_cache_init(vgpu
);
1411 info
->track_node
.track_write
= kvmgt_page_track_write
;
1412 info
->track_node
.track_flush_slot
= kvmgt_page_track_flush_slot
;
1413 kvm_page_track_register_notifier(kvm
, &info
->track_node
);
1418 static bool kvmgt_guest_exit(struct kvmgt_guest_info
*info
)
1420 kvm_page_track_unregister_notifier(info
->kvm
, &info
->track_node
);
1421 kvm_put_kvm(info
->kvm
);
1422 kvmgt_protect_table_destroy(info
);
1423 gvt_cache_destroy(info
->vgpu
);
1429 static int kvmgt_attach_vgpu(void *vgpu
, unsigned long *handle
)
1431 /* nothing to do here */
1435 static void kvmgt_detach_vgpu(unsigned long handle
)
1437 /* nothing to do here */
1440 static int kvmgt_inject_msi(unsigned long handle
, u32 addr
, u16 data
)
1442 struct kvmgt_guest_info
*info
;
1443 struct intel_vgpu
*vgpu
;
1445 if (!handle_valid(handle
))
1448 info
= (struct kvmgt_guest_info
*)handle
;
1451 if (eventfd_signal(vgpu
->vdev
.msi_trigger
, 1) == 1)
1457 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle
, unsigned long gfn
)
1459 unsigned long iova
, pfn
;
1460 struct kvmgt_guest_info
*info
;
1462 struct intel_vgpu
*vgpu
;
1465 if (!handle_valid(handle
))
1466 return INTEL_GVT_INVALID_ADDR
;
1468 info
= (struct kvmgt_guest_info
*)handle
;
1470 iova
= gvt_cache_find(info
->vgpu
, gfn
);
1471 if (iova
!= INTEL_GVT_INVALID_ADDR
)
1474 pfn
= INTEL_GVT_INVALID_ADDR
;
1475 dev
= mdev_dev(info
->vgpu
->vdev
.mdev
);
1476 rc
= vfio_pin_pages(dev
, &gfn
, 1, IOMMU_READ
| IOMMU_WRITE
, &pfn
);
1478 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
1480 return INTEL_GVT_INVALID_ADDR
;
1482 /* transfer to host iova for GFX to use DMA */
1483 rc
= gvt_dma_map_iova(info
->vgpu
, pfn
, &iova
);
1485 gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn
);
1486 vfio_unpin_pages(dev
, &gfn
, 1);
1487 return INTEL_GVT_INVALID_ADDR
;
1490 gvt_cache_add(info
->vgpu
, gfn
, iova
);
1494 static int kvmgt_rw_gpa(unsigned long handle
, unsigned long gpa
,
1495 void *buf
, unsigned long len
, bool write
)
1497 struct kvmgt_guest_info
*info
;
1500 bool kthread
= current
->mm
== NULL
;
1502 if (!handle_valid(handle
))
1505 info
= (struct kvmgt_guest_info
*)handle
;
1511 idx
= srcu_read_lock(&kvm
->srcu
);
1512 ret
= write
? kvm_write_guest(kvm
, gpa
, buf
, len
) :
1513 kvm_read_guest(kvm
, gpa
, buf
, len
);
1514 srcu_read_unlock(&kvm
->srcu
, idx
);
1522 static int kvmgt_read_gpa(unsigned long handle
, unsigned long gpa
,
1523 void *buf
, unsigned long len
)
1525 return kvmgt_rw_gpa(handle
, gpa
, buf
, len
, false);
1528 static int kvmgt_write_gpa(unsigned long handle
, unsigned long gpa
,
1529 void *buf
, unsigned long len
)
1531 return kvmgt_rw_gpa(handle
, gpa
, buf
, len
, true);
1534 static unsigned long kvmgt_virt_to_pfn(void *addr
)
1536 return PFN_DOWN(__pa(addr
));
1539 struct intel_gvt_mpt kvmgt_mpt
= {
1540 .host_init
= kvmgt_host_init
,
1541 .host_exit
= kvmgt_host_exit
,
1542 .attach_vgpu
= kvmgt_attach_vgpu
,
1543 .detach_vgpu
= kvmgt_detach_vgpu
,
1544 .inject_msi
= kvmgt_inject_msi
,
1545 .from_virt_to_mfn
= kvmgt_virt_to_pfn
,
1546 .set_wp_page
= kvmgt_write_protect_add
,
1547 .unset_wp_page
= kvmgt_write_protect_remove
,
1548 .read_gpa
= kvmgt_read_gpa
,
1549 .write_gpa
= kvmgt_write_gpa
,
1550 .gfn_to_mfn
= kvmgt_gfn_to_pfn
,
1552 EXPORT_SYMBOL_GPL(kvmgt_mpt
);
1554 static int __init
kvmgt_init(void)
1559 static void __exit
kvmgt_exit(void)
1563 module_init(kvmgt_init
);
1564 module_exit(kvmgt_exit
);
1566 MODULE_LICENSE("GPL and additional rights");
1567 MODULE_AUTHOR("Intel Corporation");