]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/gpu/drm/i915/gvt/kvmgt.c
drm/i915/gvt: Fix inconsistent locks holding sequence
[mirror_ubuntu-bionic-kernel.git] / drivers / gpu / drm / i915 / gvt / kvmgt.c
1 /*
2 * KVMGT - the implementation of Intel mediated pass-through framework for KVM
3 *
4 * Copyright(c) 2014-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Kevin Tian <kevin.tian@intel.com>
27 * Jike Song <jike.song@intel.com>
28 * Xiaoguang Chen <xiaoguang.chen@intel.com>
29 */
30
31 #include <linux/init.h>
32 #include <linux/device.h>
33 #include <linux/mm.h>
34 #include <linux/mmu_context.h>
35 #include <linux/types.h>
36 #include <linux/list.h>
37 #include <linux/rbtree.h>
38 #include <linux/spinlock.h>
39 #include <linux/eventfd.h>
40 #include <linux/uuid.h>
41 #include <linux/kvm_host.h>
42 #include <linux/vfio.h>
43 #include <linux/mdev.h>
44
45 #include "i915_drv.h"
46 #include "gvt.h"
47
48 static const struct intel_gvt_ops *intel_gvt_ops;
49
50 /* helper macros copied from vfio-pci */
51 #define VFIO_PCI_OFFSET_SHIFT 40
52 #define VFIO_PCI_OFFSET_TO_INDEX(off) (off >> VFIO_PCI_OFFSET_SHIFT)
53 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
54 #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
55
56 struct vfio_region {
57 u32 type;
58 u32 subtype;
59 size_t size;
60 u32 flags;
61 };
62
63 struct kvmgt_pgfn {
64 gfn_t gfn;
65 struct hlist_node hnode;
66 };
67
68 struct kvmgt_guest_info {
69 struct kvm *kvm;
70 struct intel_vgpu *vgpu;
71 struct kvm_page_track_notifier_node track_node;
72 #define NR_BKT (1 << 18)
73 struct hlist_head ptable[NR_BKT];
74 #undef NR_BKT
75 };
76
77 struct gvt_dma {
78 struct rb_node node;
79 gfn_t gfn;
80 unsigned long iova;
81 struct list_head list;
82 };
83
84 static inline bool handle_valid(unsigned long handle)
85 {
86 return !!(handle & ~0xff);
87 }
88
89 static int kvmgt_guest_init(struct mdev_device *mdev);
90 static void intel_vgpu_release_work(struct work_struct *work);
91 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info);
92
93 static int gvt_dma_map_iova(struct intel_vgpu *vgpu, kvm_pfn_t pfn,
94 unsigned long *iova)
95 {
96 struct page *page;
97 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
98 dma_addr_t daddr;
99
100 if (unlikely(!pfn_valid(pfn)))
101 return -EFAULT;
102
103 page = pfn_to_page(pfn);
104 daddr = dma_map_page(dev, page, 0, PAGE_SIZE,
105 PCI_DMA_BIDIRECTIONAL);
106 if (dma_mapping_error(dev, daddr))
107 return -ENOMEM;
108
109 *iova = (unsigned long)(daddr >> PAGE_SHIFT);
110 return 0;
111 }
112
113 static void gvt_dma_unmap_iova(struct intel_vgpu *vgpu, unsigned long iova)
114 {
115 struct device *dev = &vgpu->gvt->dev_priv->drm.pdev->dev;
116 dma_addr_t daddr;
117
118 daddr = (dma_addr_t)(iova << PAGE_SHIFT);
119 dma_unmap_page(dev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
120 }
121
122 static struct gvt_dma *__gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
123 {
124 struct rb_node *node = vgpu->vdev.cache.rb_node;
125 struct gvt_dma *ret = NULL;
126
127 while (node) {
128 struct gvt_dma *itr = rb_entry(node, struct gvt_dma, node);
129
130 if (gfn < itr->gfn)
131 node = node->rb_left;
132 else if (gfn > itr->gfn)
133 node = node->rb_right;
134 else {
135 ret = itr;
136 goto out;
137 }
138 }
139
140 out:
141 return ret;
142 }
143
144 static unsigned long gvt_cache_find(struct intel_vgpu *vgpu, gfn_t gfn)
145 {
146 struct gvt_dma *entry;
147 unsigned long iova;
148
149 mutex_lock(&vgpu->vdev.cache_lock);
150
151 entry = __gvt_cache_find(vgpu, gfn);
152 iova = (entry == NULL) ? INTEL_GVT_INVALID_ADDR : entry->iova;
153
154 mutex_unlock(&vgpu->vdev.cache_lock);
155 return iova;
156 }
157
158 static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
159 unsigned long iova)
160 {
161 struct gvt_dma *new, *itr;
162 struct rb_node **link = &vgpu->vdev.cache.rb_node, *parent = NULL;
163
164 new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
165 if (!new)
166 return;
167
168 new->gfn = gfn;
169 new->iova = iova;
170 INIT_LIST_HEAD(&new->list);
171
172 mutex_lock(&vgpu->vdev.cache_lock);
173 while (*link) {
174 parent = *link;
175 itr = rb_entry(parent, struct gvt_dma, node);
176
177 if (gfn == itr->gfn)
178 goto out;
179 else if (gfn < itr->gfn)
180 link = &parent->rb_left;
181 else
182 link = &parent->rb_right;
183 }
184
185 rb_link_node(&new->node, parent, link);
186 rb_insert_color(&new->node, &vgpu->vdev.cache);
187 mutex_unlock(&vgpu->vdev.cache_lock);
188 return;
189
190 out:
191 mutex_unlock(&vgpu->vdev.cache_lock);
192 kfree(new);
193 }
194
195 static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
196 struct gvt_dma *entry)
197 {
198 rb_erase(&entry->node, &vgpu->vdev.cache);
199 kfree(entry);
200 }
201
202 static void intel_vgpu_unpin_work(struct work_struct *work)
203 {
204 struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
205 vdev.unpin_work);
206 struct device *dev = mdev_dev(vgpu->vdev.mdev);
207 struct gvt_dma *this;
208 unsigned long gfn;
209
210 for (;;) {
211 spin_lock(&vgpu->vdev.unpin_lock);
212 if (list_empty(&vgpu->vdev.unpin_list)) {
213 spin_unlock(&vgpu->vdev.unpin_lock);
214 break;
215 }
216 this = list_first_entry(&vgpu->vdev.unpin_list,
217 struct gvt_dma, list);
218 list_del(&this->list);
219 spin_unlock(&vgpu->vdev.unpin_lock);
220
221 gfn = this->gfn;
222 vfio_unpin_pages(dev, &gfn, 1);
223 kfree(this);
224 }
225 }
226
227 static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
228 {
229 struct gvt_dma *this;
230
231 mutex_lock(&vgpu->vdev.cache_lock);
232 this = __gvt_cache_find(vgpu, gfn);
233 if (!this) {
234 mutex_unlock(&vgpu->vdev.cache_lock);
235 return false;
236 }
237 gvt_dma_unmap_iova(vgpu, this->iova);
238 /* remove this from rb tree */
239 rb_erase(&this->node, &vgpu->vdev.cache);
240 mutex_unlock(&vgpu->vdev.cache_lock);
241
242 /* put this to the unpin_list */
243 spin_lock(&vgpu->vdev.unpin_lock);
244 list_move_tail(&this->list, &vgpu->vdev.unpin_list);
245 spin_unlock(&vgpu->vdev.unpin_lock);
246
247 return true;
248 }
249
250 static void gvt_cache_init(struct intel_vgpu *vgpu)
251 {
252 vgpu->vdev.cache = RB_ROOT;
253 mutex_init(&vgpu->vdev.cache_lock);
254 }
255
256 static void gvt_cache_destroy(struct intel_vgpu *vgpu)
257 {
258 struct gvt_dma *dma;
259 struct rb_node *node = NULL;
260 struct device *dev = mdev_dev(vgpu->vdev.mdev);
261 unsigned long gfn;
262
263 for (;;) {
264 mutex_lock(&vgpu->vdev.cache_lock);
265 node = rb_first(&vgpu->vdev.cache);
266 if (!node) {
267 mutex_unlock(&vgpu->vdev.cache_lock);
268 break;
269 }
270 dma = rb_entry(node, struct gvt_dma, node);
271 gvt_dma_unmap_iova(vgpu, dma->iova);
272 gfn = dma->gfn;
273 __gvt_cache_remove_entry(vgpu, dma);
274 mutex_unlock(&vgpu->vdev.cache_lock);
275 vfio_unpin_pages(dev, &gfn, 1);
276 }
277 }
278
279 static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
280 const char *name)
281 {
282 int i;
283 struct intel_vgpu_type *t;
284 const char *driver_name = dev_driver_string(
285 &gvt->dev_priv->drm.pdev->dev);
286
287 for (i = 0; i < gvt->num_types; i++) {
288 t = &gvt->types[i];
289 if (!strncmp(t->name, name + strlen(driver_name) + 1,
290 sizeof(t->name)))
291 return t;
292 }
293
294 return NULL;
295 }
296
297 static ssize_t available_instances_show(struct kobject *kobj,
298 struct device *dev, char *buf)
299 {
300 struct intel_vgpu_type *type;
301 unsigned int num = 0;
302 void *gvt = kdev_to_i915(dev)->gvt;
303
304 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
305 if (!type)
306 num = 0;
307 else
308 num = type->avail_instance;
309
310 return sprintf(buf, "%u\n", num);
311 }
312
313 static ssize_t device_api_show(struct kobject *kobj, struct device *dev,
314 char *buf)
315 {
316 return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
317 }
318
319 static ssize_t description_show(struct kobject *kobj, struct device *dev,
320 char *buf)
321 {
322 struct intel_vgpu_type *type;
323 void *gvt = kdev_to_i915(dev)->gvt;
324
325 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
326 if (!type)
327 return 0;
328
329 return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
330 "fence: %d\nresolution: %s\n"
331 "weight: %d\n",
332 BYTES_TO_MB(type->low_gm_size),
333 BYTES_TO_MB(type->high_gm_size),
334 type->fence, vgpu_edid_str(type->resolution),
335 type->weight);
336 }
337
338 static MDEV_TYPE_ATTR_RO(available_instances);
339 static MDEV_TYPE_ATTR_RO(device_api);
340 static MDEV_TYPE_ATTR_RO(description);
341
342 static struct attribute *type_attrs[] = {
343 &mdev_type_attr_available_instances.attr,
344 &mdev_type_attr_device_api.attr,
345 &mdev_type_attr_description.attr,
346 NULL,
347 };
348
349 static struct attribute_group *intel_vgpu_type_groups[] = {
350 [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
351 };
352
353 static bool intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
354 {
355 int i, j;
356 struct intel_vgpu_type *type;
357 struct attribute_group *group;
358
359 for (i = 0; i < gvt->num_types; i++) {
360 type = &gvt->types[i];
361
362 group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
363 if (WARN_ON(!group))
364 goto unwind;
365
366 group->name = type->name;
367 group->attrs = type_attrs;
368 intel_vgpu_type_groups[i] = group;
369 }
370
371 return true;
372
373 unwind:
374 for (j = 0; j < i; j++) {
375 group = intel_vgpu_type_groups[j];
376 kfree(group);
377 }
378
379 return false;
380 }
381
382 static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
383 {
384 int i;
385 struct attribute_group *group;
386
387 for (i = 0; i < gvt->num_types; i++) {
388 group = intel_vgpu_type_groups[i];
389 kfree(group);
390 }
391 }
392
393 static void kvmgt_protect_table_init(struct kvmgt_guest_info *info)
394 {
395 hash_init(info->ptable);
396 }
397
398 static void kvmgt_protect_table_destroy(struct kvmgt_guest_info *info)
399 {
400 struct kvmgt_pgfn *p;
401 struct hlist_node *tmp;
402 int i;
403
404 hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
405 hash_del(&p->hnode);
406 kfree(p);
407 }
408 }
409
410 static struct kvmgt_pgfn *
411 __kvmgt_protect_table_find(struct kvmgt_guest_info *info, gfn_t gfn)
412 {
413 struct kvmgt_pgfn *p, *res = NULL;
414
415 hash_for_each_possible(info->ptable, p, hnode, gfn) {
416 if (gfn == p->gfn) {
417 res = p;
418 break;
419 }
420 }
421
422 return res;
423 }
424
425 static bool kvmgt_gfn_is_write_protected(struct kvmgt_guest_info *info,
426 gfn_t gfn)
427 {
428 struct kvmgt_pgfn *p;
429
430 p = __kvmgt_protect_table_find(info, gfn);
431 return !!p;
432 }
433
434 static void kvmgt_protect_table_add(struct kvmgt_guest_info *info, gfn_t gfn)
435 {
436 struct kvmgt_pgfn *p;
437
438 if (kvmgt_gfn_is_write_protected(info, gfn))
439 return;
440
441 p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
442 if (WARN(!p, "gfn: 0x%llx\n", gfn))
443 return;
444
445 p->gfn = gfn;
446 hash_add(info->ptable, &p->hnode, gfn);
447 }
448
449 static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
450 gfn_t gfn)
451 {
452 struct kvmgt_pgfn *p;
453
454 p = __kvmgt_protect_table_find(info, gfn);
455 if (p) {
456 hash_del(&p->hnode);
457 kfree(p);
458 }
459 }
460
461 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
462 {
463 struct intel_vgpu *vgpu = NULL;
464 struct intel_vgpu_type *type;
465 struct device *pdev;
466 void *gvt;
467 int ret;
468
469 pdev = mdev_parent_dev(mdev);
470 gvt = kdev_to_i915(pdev)->gvt;
471
472 type = intel_gvt_find_vgpu_type(gvt, kobject_name(kobj));
473 if (!type) {
474 gvt_vgpu_err("failed to find type %s to create\n",
475 kobject_name(kobj));
476 ret = -EINVAL;
477 goto out;
478 }
479
480 vgpu = intel_gvt_ops->vgpu_create(gvt, type);
481 if (IS_ERR_OR_NULL(vgpu)) {
482 ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
483 gvt_vgpu_err("failed to create intel vgpu: %d\n", ret);
484 goto out;
485 }
486
487 INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
488 INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
489 spin_lock_init(&vgpu->vdev.unpin_lock);
490 INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
491
492 vgpu->vdev.mdev = mdev;
493 mdev_set_drvdata(mdev, vgpu);
494
495 gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
496 dev_name(mdev_dev(mdev)));
497 ret = 0;
498
499 out:
500 return ret;
501 }
502
503 static int intel_vgpu_remove(struct mdev_device *mdev)
504 {
505 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
506
507 if (handle_valid(vgpu->handle))
508 return -EBUSY;
509
510 intel_gvt_ops->vgpu_destroy(vgpu);
511 return 0;
512 }
513
514 static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
515 unsigned long action, void *data)
516 {
517 struct intel_vgpu *vgpu = container_of(nb,
518 struct intel_vgpu,
519 vdev.iommu_notifier);
520 bool sched_unmap = false;
521
522 if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
523 struct vfio_iommu_type1_dma_unmap *unmap = data;
524 unsigned long gfn, end_gfn;
525
526 gfn = unmap->iova >> PAGE_SHIFT;
527 end_gfn = gfn + unmap->size / PAGE_SIZE;
528
529 while (gfn < end_gfn)
530 sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
531
532 if (sched_unmap)
533 schedule_work(&vgpu->vdev.unpin_work);
534 }
535
536 return NOTIFY_OK;
537 }
538
539 static int intel_vgpu_group_notifier(struct notifier_block *nb,
540 unsigned long action, void *data)
541 {
542 struct intel_vgpu *vgpu = container_of(nb,
543 struct intel_vgpu,
544 vdev.group_notifier);
545
546 /* the only action we care about */
547 if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
548 vgpu->vdev.kvm = data;
549
550 if (!data)
551 schedule_work(&vgpu->vdev.release_work);
552 }
553
554 return NOTIFY_OK;
555 }
556
557 static int intel_vgpu_open(struct mdev_device *mdev)
558 {
559 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
560 unsigned long events;
561 int ret;
562
563 vgpu->vdev.iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
564 vgpu->vdev.group_notifier.notifier_call = intel_vgpu_group_notifier;
565
566 events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
567 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY, &events,
568 &vgpu->vdev.iommu_notifier);
569 if (ret != 0) {
570 gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
571 ret);
572 goto out;
573 }
574
575 events = VFIO_GROUP_NOTIFY_SET_KVM;
576 ret = vfio_register_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY, &events,
577 &vgpu->vdev.group_notifier);
578 if (ret != 0) {
579 gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
580 ret);
581 goto undo_iommu;
582 }
583
584 ret = kvmgt_guest_init(mdev);
585 if (ret)
586 goto undo_group;
587
588 intel_gvt_ops->vgpu_activate(vgpu);
589
590 atomic_set(&vgpu->vdev.released, 0);
591 return ret;
592
593 undo_group:
594 vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
595 &vgpu->vdev.group_notifier);
596
597 undo_iommu:
598 vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
599 &vgpu->vdev.iommu_notifier);
600 out:
601 return ret;
602 }
603
604 static void __intel_vgpu_release(struct intel_vgpu *vgpu)
605 {
606 struct kvmgt_guest_info *info;
607 int ret;
608
609 if (!handle_valid(vgpu->handle))
610 return;
611
612 if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
613 return;
614
615 intel_gvt_ops->vgpu_deactivate(vgpu);
616
617 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
618 &vgpu->vdev.iommu_notifier);
619 WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
620
621 ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_GROUP_NOTIFY,
622 &vgpu->vdev.group_notifier);
623 WARN(ret, "vfio_unregister_notifier for group failed: %d\n", ret);
624
625 info = (struct kvmgt_guest_info *)vgpu->handle;
626 kvmgt_guest_exit(info);
627
628 vgpu->vdev.kvm = NULL;
629 vgpu->handle = 0;
630 }
631
632 static void intel_vgpu_release(struct mdev_device *mdev)
633 {
634 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
635
636 __intel_vgpu_release(vgpu);
637 }
638
639 static void intel_vgpu_release_work(struct work_struct *work)
640 {
641 struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
642 vdev.release_work);
643
644 __intel_vgpu_release(vgpu);
645 }
646
647 static uint64_t intel_vgpu_get_bar0_addr(struct intel_vgpu *vgpu)
648 {
649 u32 start_lo, start_hi;
650 u32 mem_type;
651 int pos = PCI_BASE_ADDRESS_0;
652
653 start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
654 PCI_BASE_ADDRESS_MEM_MASK;
655 mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + pos)) &
656 PCI_BASE_ADDRESS_MEM_TYPE_MASK;
657
658 switch (mem_type) {
659 case PCI_BASE_ADDRESS_MEM_TYPE_64:
660 start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
661 + pos + 4));
662 break;
663 case PCI_BASE_ADDRESS_MEM_TYPE_32:
664 case PCI_BASE_ADDRESS_MEM_TYPE_1M:
665 /* 1M mem BAR treated as 32-bit BAR */
666 default:
667 /* mem unknown type treated as 32-bit BAR */
668 start_hi = 0;
669 break;
670 }
671
672 return ((u64)start_hi << 32) | start_lo;
673 }
674
675 static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
676 size_t count, loff_t *ppos, bool is_write)
677 {
678 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
679 unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
680 uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
681 int ret = -EINVAL;
682
683
684 if (index >= VFIO_PCI_NUM_REGIONS) {
685 gvt_vgpu_err("invalid index: %u\n", index);
686 return -EINVAL;
687 }
688
689 switch (index) {
690 case VFIO_PCI_CONFIG_REGION_INDEX:
691 if (is_write)
692 ret = intel_gvt_ops->emulate_cfg_write(vgpu, pos,
693 buf, count);
694 else
695 ret = intel_gvt_ops->emulate_cfg_read(vgpu, pos,
696 buf, count);
697 break;
698 case VFIO_PCI_BAR0_REGION_INDEX:
699 case VFIO_PCI_BAR1_REGION_INDEX:
700 if (is_write) {
701 uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
702
703 ret = intel_gvt_ops->emulate_mmio_write(vgpu,
704 bar0_start + pos, buf, count);
705 } else {
706 uint64_t bar0_start = intel_vgpu_get_bar0_addr(vgpu);
707
708 ret = intel_gvt_ops->emulate_mmio_read(vgpu,
709 bar0_start + pos, buf, count);
710 }
711 break;
712 case VFIO_PCI_BAR2_REGION_INDEX:
713 case VFIO_PCI_BAR3_REGION_INDEX:
714 case VFIO_PCI_BAR4_REGION_INDEX:
715 case VFIO_PCI_BAR5_REGION_INDEX:
716 case VFIO_PCI_VGA_REGION_INDEX:
717 case VFIO_PCI_ROM_REGION_INDEX:
718 default:
719 gvt_vgpu_err("unsupported region: %u\n", index);
720 }
721
722 return ret == 0 ? count : ret;
723 }
724
725 static ssize_t intel_vgpu_read(struct mdev_device *mdev, char __user *buf,
726 size_t count, loff_t *ppos)
727 {
728 unsigned int done = 0;
729 int ret;
730
731 while (count) {
732 size_t filled;
733
734 if (count >= 4 && !(*ppos % 4)) {
735 u32 val;
736
737 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
738 ppos, false);
739 if (ret <= 0)
740 goto read_err;
741
742 if (copy_to_user(buf, &val, sizeof(val)))
743 goto read_err;
744
745 filled = 4;
746 } else if (count >= 2 && !(*ppos % 2)) {
747 u16 val;
748
749 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
750 ppos, false);
751 if (ret <= 0)
752 goto read_err;
753
754 if (copy_to_user(buf, &val, sizeof(val)))
755 goto read_err;
756
757 filled = 2;
758 } else {
759 u8 val;
760
761 ret = intel_vgpu_rw(mdev, &val, sizeof(val), ppos,
762 false);
763 if (ret <= 0)
764 goto read_err;
765
766 if (copy_to_user(buf, &val, sizeof(val)))
767 goto read_err;
768
769 filled = 1;
770 }
771
772 count -= filled;
773 done += filled;
774 *ppos += filled;
775 buf += filled;
776 }
777
778 return done;
779
780 read_err:
781 return -EFAULT;
782 }
783
784 static ssize_t intel_vgpu_write(struct mdev_device *mdev,
785 const char __user *buf,
786 size_t count, loff_t *ppos)
787 {
788 unsigned int done = 0;
789 int ret;
790
791 while (count) {
792 size_t filled;
793
794 if (count >= 4 && !(*ppos % 4)) {
795 u32 val;
796
797 if (copy_from_user(&val, buf, sizeof(val)))
798 goto write_err;
799
800 ret = intel_vgpu_rw(mdev, (char *)&val, sizeof(val),
801 ppos, true);
802 if (ret <= 0)
803 goto write_err;
804
805 filled = 4;
806 } else if (count >= 2 && !(*ppos % 2)) {
807 u16 val;
808
809 if (copy_from_user(&val, buf, sizeof(val)))
810 goto write_err;
811
812 ret = intel_vgpu_rw(mdev, (char *)&val,
813 sizeof(val), ppos, true);
814 if (ret <= 0)
815 goto write_err;
816
817 filled = 2;
818 } else {
819 u8 val;
820
821 if (copy_from_user(&val, buf, sizeof(val)))
822 goto write_err;
823
824 ret = intel_vgpu_rw(mdev, &val, sizeof(val),
825 ppos, true);
826 if (ret <= 0)
827 goto write_err;
828
829 filled = 1;
830 }
831
832 count -= filled;
833 done += filled;
834 *ppos += filled;
835 buf += filled;
836 }
837
838 return done;
839 write_err:
840 return -EFAULT;
841 }
842
843 static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
844 {
845 unsigned int index;
846 u64 virtaddr;
847 unsigned long req_size, pgoff = 0;
848 pgprot_t pg_prot;
849 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
850
851 index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
852 if (index >= VFIO_PCI_ROM_REGION_INDEX)
853 return -EINVAL;
854
855 if (vma->vm_end < vma->vm_start)
856 return -EINVAL;
857 if ((vma->vm_flags & VM_SHARED) == 0)
858 return -EINVAL;
859 if (index != VFIO_PCI_BAR2_REGION_INDEX)
860 return -EINVAL;
861
862 pg_prot = vma->vm_page_prot;
863 virtaddr = vma->vm_start;
864 req_size = vma->vm_end - vma->vm_start;
865 pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
866
867 return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
868 }
869
870 static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
871 {
872 if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
873 return 1;
874
875 return 0;
876 }
877
878 static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
879 unsigned int index, unsigned int start,
880 unsigned int count, uint32_t flags,
881 void *data)
882 {
883 return 0;
884 }
885
886 static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
887 unsigned int index, unsigned int start,
888 unsigned int count, uint32_t flags, void *data)
889 {
890 return 0;
891 }
892
893 static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
894 unsigned int index, unsigned int start, unsigned int count,
895 uint32_t flags, void *data)
896 {
897 return 0;
898 }
899
900 static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
901 unsigned int index, unsigned int start, unsigned int count,
902 uint32_t flags, void *data)
903 {
904 struct eventfd_ctx *trigger;
905
906 if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
907 int fd = *(int *)data;
908
909 trigger = eventfd_ctx_fdget(fd);
910 if (IS_ERR(trigger)) {
911 gvt_vgpu_err("eventfd_ctx_fdget failed\n");
912 return PTR_ERR(trigger);
913 }
914 vgpu->vdev.msi_trigger = trigger;
915 }
916
917 return 0;
918 }
919
920 static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, uint32_t flags,
921 unsigned int index, unsigned int start, unsigned int count,
922 void *data)
923 {
924 int (*func)(struct intel_vgpu *vgpu, unsigned int index,
925 unsigned int start, unsigned int count, uint32_t flags,
926 void *data) = NULL;
927
928 switch (index) {
929 case VFIO_PCI_INTX_IRQ_INDEX:
930 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
931 case VFIO_IRQ_SET_ACTION_MASK:
932 func = intel_vgpu_set_intx_mask;
933 break;
934 case VFIO_IRQ_SET_ACTION_UNMASK:
935 func = intel_vgpu_set_intx_unmask;
936 break;
937 case VFIO_IRQ_SET_ACTION_TRIGGER:
938 func = intel_vgpu_set_intx_trigger;
939 break;
940 }
941 break;
942 case VFIO_PCI_MSI_IRQ_INDEX:
943 switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
944 case VFIO_IRQ_SET_ACTION_MASK:
945 case VFIO_IRQ_SET_ACTION_UNMASK:
946 /* XXX Need masking support exported */
947 break;
948 case VFIO_IRQ_SET_ACTION_TRIGGER:
949 func = intel_vgpu_set_msi_trigger;
950 break;
951 }
952 break;
953 }
954
955 if (!func)
956 return -ENOTTY;
957
958 return func(vgpu, index, start, count, flags, data);
959 }
960
961 static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
962 unsigned long arg)
963 {
964 struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
965 unsigned long minsz;
966
967 gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
968
969 if (cmd == VFIO_DEVICE_GET_INFO) {
970 struct vfio_device_info info;
971
972 minsz = offsetofend(struct vfio_device_info, num_irqs);
973
974 if (copy_from_user(&info, (void __user *)arg, minsz))
975 return -EFAULT;
976
977 if (info.argsz < minsz)
978 return -EINVAL;
979
980 info.flags = VFIO_DEVICE_FLAGS_PCI;
981 info.flags |= VFIO_DEVICE_FLAGS_RESET;
982 info.num_regions = VFIO_PCI_NUM_REGIONS;
983 info.num_irqs = VFIO_PCI_NUM_IRQS;
984
985 return copy_to_user((void __user *)arg, &info, minsz) ?
986 -EFAULT : 0;
987
988 } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
989 struct vfio_region_info info;
990 struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
991 int i, ret;
992 struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
993 size_t size;
994 int nr_areas = 1;
995 int cap_type_id;
996
997 minsz = offsetofend(struct vfio_region_info, offset);
998
999 if (copy_from_user(&info, (void __user *)arg, minsz))
1000 return -EFAULT;
1001
1002 if (info.argsz < minsz)
1003 return -EINVAL;
1004
1005 switch (info.index) {
1006 case VFIO_PCI_CONFIG_REGION_INDEX:
1007 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1008 info.size = INTEL_GVT_MAX_CFG_SPACE_SZ;
1009 info.flags = VFIO_REGION_INFO_FLAG_READ |
1010 VFIO_REGION_INFO_FLAG_WRITE;
1011 break;
1012 case VFIO_PCI_BAR0_REGION_INDEX:
1013 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1014 info.size = vgpu->cfg_space.bar[info.index].size;
1015 if (!info.size) {
1016 info.flags = 0;
1017 break;
1018 }
1019
1020 info.flags = VFIO_REGION_INFO_FLAG_READ |
1021 VFIO_REGION_INFO_FLAG_WRITE;
1022 break;
1023 case VFIO_PCI_BAR1_REGION_INDEX:
1024 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1025 info.size = 0;
1026 info.flags = 0;
1027 break;
1028 case VFIO_PCI_BAR2_REGION_INDEX:
1029 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1030 info.flags = VFIO_REGION_INFO_FLAG_CAPS |
1031 VFIO_REGION_INFO_FLAG_MMAP |
1032 VFIO_REGION_INFO_FLAG_READ |
1033 VFIO_REGION_INFO_FLAG_WRITE;
1034 info.size = gvt_aperture_sz(vgpu->gvt);
1035
1036 size = sizeof(*sparse) +
1037 (nr_areas * sizeof(*sparse->areas));
1038 sparse = kzalloc(size, GFP_KERNEL);
1039 if (!sparse)
1040 return -ENOMEM;
1041
1042 sparse->nr_areas = nr_areas;
1043 cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
1044 sparse->areas[0].offset =
1045 PAGE_ALIGN(vgpu_aperture_offset(vgpu));
1046 sparse->areas[0].size = vgpu_aperture_sz(vgpu);
1047 break;
1048
1049 case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
1050 info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
1051 info.size = 0;
1052
1053 info.flags = 0;
1054 gvt_dbg_core("get region info bar:%d\n", info.index);
1055 break;
1056
1057 case VFIO_PCI_ROM_REGION_INDEX:
1058 case VFIO_PCI_VGA_REGION_INDEX:
1059 gvt_dbg_core("get region info index:%d\n", info.index);
1060 break;
1061 default:
1062 {
1063 struct vfio_region_info_cap_type cap_type;
1064
1065 if (info.index >= VFIO_PCI_NUM_REGIONS +
1066 vgpu->vdev.num_regions)
1067 return -EINVAL;
1068
1069 i = info.index - VFIO_PCI_NUM_REGIONS;
1070
1071 info.offset =
1072 VFIO_PCI_INDEX_TO_OFFSET(info.index);
1073 info.size = vgpu->vdev.region[i].size;
1074 info.flags = vgpu->vdev.region[i].flags;
1075
1076 cap_type.type = vgpu->vdev.region[i].type;
1077 cap_type.subtype = vgpu->vdev.region[i].subtype;
1078
1079 ret = vfio_info_add_capability(&caps,
1080 VFIO_REGION_INFO_CAP_TYPE,
1081 &cap_type);
1082 if (ret)
1083 return ret;
1084 }
1085 }
1086
1087 if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
1088 switch (cap_type_id) {
1089 case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
1090 ret = vfio_info_add_capability(&caps,
1091 VFIO_REGION_INFO_CAP_SPARSE_MMAP,
1092 sparse);
1093 kfree(sparse);
1094 if (ret)
1095 return ret;
1096 break;
1097 default:
1098 return -EINVAL;
1099 }
1100 }
1101
1102 if (caps.size) {
1103 if (info.argsz < sizeof(info) + caps.size) {
1104 info.argsz = sizeof(info) + caps.size;
1105 info.cap_offset = 0;
1106 } else {
1107 vfio_info_cap_shift(&caps, sizeof(info));
1108 if (copy_to_user((void __user *)arg +
1109 sizeof(info), caps.buf,
1110 caps.size)) {
1111 kfree(caps.buf);
1112 return -EFAULT;
1113 }
1114 info.cap_offset = sizeof(info);
1115 }
1116
1117 kfree(caps.buf);
1118 }
1119
1120 return copy_to_user((void __user *)arg, &info, minsz) ?
1121 -EFAULT : 0;
1122 } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
1123 struct vfio_irq_info info;
1124
1125 minsz = offsetofend(struct vfio_irq_info, count);
1126
1127 if (copy_from_user(&info, (void __user *)arg, minsz))
1128 return -EFAULT;
1129
1130 if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
1131 return -EINVAL;
1132
1133 switch (info.index) {
1134 case VFIO_PCI_INTX_IRQ_INDEX:
1135 case VFIO_PCI_MSI_IRQ_INDEX:
1136 break;
1137 default:
1138 return -EINVAL;
1139 }
1140
1141 info.flags = VFIO_IRQ_INFO_EVENTFD;
1142
1143 info.count = intel_vgpu_get_irq_count(vgpu, info.index);
1144
1145 if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
1146 info.flags |= (VFIO_IRQ_INFO_MASKABLE |
1147 VFIO_IRQ_INFO_AUTOMASKED);
1148 else
1149 info.flags |= VFIO_IRQ_INFO_NORESIZE;
1150
1151 return copy_to_user((void __user *)arg, &info, minsz) ?
1152 -EFAULT : 0;
1153 } else if (cmd == VFIO_DEVICE_SET_IRQS) {
1154 struct vfio_irq_set hdr;
1155 u8 *data = NULL;
1156 int ret = 0;
1157 size_t data_size = 0;
1158
1159 minsz = offsetofend(struct vfio_irq_set, count);
1160
1161 if (copy_from_user(&hdr, (void __user *)arg, minsz))
1162 return -EFAULT;
1163
1164 if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
1165 int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
1166
1167 ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
1168 VFIO_PCI_NUM_IRQS, &data_size);
1169 if (ret) {
1170 gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
1171 return -EINVAL;
1172 }
1173 if (data_size) {
1174 data = memdup_user((void __user *)(arg + minsz),
1175 data_size);
1176 if (IS_ERR(data))
1177 return PTR_ERR(data);
1178 }
1179 }
1180
1181 ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
1182 hdr.start, hdr.count, data);
1183 kfree(data);
1184
1185 return ret;
1186 } else if (cmd == VFIO_DEVICE_RESET) {
1187 intel_gvt_ops->vgpu_reset(vgpu);
1188 return 0;
1189 }
1190
1191 return 0;
1192 }
1193
1194 static ssize_t
1195 vgpu_id_show(struct device *dev, struct device_attribute *attr,
1196 char *buf)
1197 {
1198 struct mdev_device *mdev = mdev_from_dev(dev);
1199
1200 if (mdev) {
1201 struct intel_vgpu *vgpu = (struct intel_vgpu *)
1202 mdev_get_drvdata(mdev);
1203 return sprintf(buf, "%d\n", vgpu->id);
1204 }
1205 return sprintf(buf, "\n");
1206 }
1207
1208 static DEVICE_ATTR_RO(vgpu_id);
1209
1210 static struct attribute *intel_vgpu_attrs[] = {
1211 &dev_attr_vgpu_id.attr,
1212 NULL
1213 };
1214
1215 static const struct attribute_group intel_vgpu_group = {
1216 .name = "intel_vgpu",
1217 .attrs = intel_vgpu_attrs,
1218 };
1219
1220 static const struct attribute_group *intel_vgpu_groups[] = {
1221 &intel_vgpu_group,
1222 NULL,
1223 };
1224
1225 static const struct mdev_parent_ops intel_vgpu_ops = {
1226 .supported_type_groups = intel_vgpu_type_groups,
1227 .mdev_attr_groups = intel_vgpu_groups,
1228 .create = intel_vgpu_create,
1229 .remove = intel_vgpu_remove,
1230
1231 .open = intel_vgpu_open,
1232 .release = intel_vgpu_release,
1233
1234 .read = intel_vgpu_read,
1235 .write = intel_vgpu_write,
1236 .mmap = intel_vgpu_mmap,
1237 .ioctl = intel_vgpu_ioctl,
1238 };
1239
1240 static int kvmgt_host_init(struct device *dev, void *gvt, const void *ops)
1241 {
1242 if (!intel_gvt_init_vgpu_type_groups(gvt))
1243 return -EFAULT;
1244
1245 intel_gvt_ops = ops;
1246
1247 return mdev_register_device(dev, &intel_vgpu_ops);
1248 }
1249
1250 static void kvmgt_host_exit(struct device *dev, void *gvt)
1251 {
1252 intel_gvt_cleanup_vgpu_type_groups(gvt);
1253 mdev_unregister_device(dev);
1254 }
1255
1256 static int kvmgt_write_protect_add(unsigned long handle, u64 gfn)
1257 {
1258 struct kvmgt_guest_info *info;
1259 struct kvm *kvm;
1260 struct kvm_memory_slot *slot;
1261 int idx;
1262
1263 if (!handle_valid(handle))
1264 return -ESRCH;
1265
1266 info = (struct kvmgt_guest_info *)handle;
1267 kvm = info->kvm;
1268
1269 idx = srcu_read_lock(&kvm->srcu);
1270 slot = gfn_to_memslot(kvm, gfn);
1271 if (!slot) {
1272 srcu_read_unlock(&kvm->srcu, idx);
1273 return -EINVAL;
1274 }
1275
1276 spin_lock(&kvm->mmu_lock);
1277
1278 if (kvmgt_gfn_is_write_protected(info, gfn))
1279 goto out;
1280
1281 kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1282 kvmgt_protect_table_add(info, gfn);
1283
1284 out:
1285 spin_unlock(&kvm->mmu_lock);
1286 srcu_read_unlock(&kvm->srcu, idx);
1287 return 0;
1288 }
1289
1290 static int kvmgt_write_protect_remove(unsigned long handle, u64 gfn)
1291 {
1292 struct kvmgt_guest_info *info;
1293 struct kvm *kvm;
1294 struct kvm_memory_slot *slot;
1295 int idx;
1296
1297 if (!handle_valid(handle))
1298 return 0;
1299
1300 info = (struct kvmgt_guest_info *)handle;
1301 kvm = info->kvm;
1302
1303 idx = srcu_read_lock(&kvm->srcu);
1304 slot = gfn_to_memslot(kvm, gfn);
1305 if (!slot) {
1306 srcu_read_unlock(&kvm->srcu, idx);
1307 return -EINVAL;
1308 }
1309
1310 spin_lock(&kvm->mmu_lock);
1311
1312 if (!kvmgt_gfn_is_write_protected(info, gfn))
1313 goto out;
1314
1315 kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
1316 kvmgt_protect_table_del(info, gfn);
1317
1318 out:
1319 spin_unlock(&kvm->mmu_lock);
1320 srcu_read_unlock(&kvm->srcu, idx);
1321 return 0;
1322 }
1323
1324 static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
1325 const u8 *val, int len,
1326 struct kvm_page_track_notifier_node *node)
1327 {
1328 struct kvmgt_guest_info *info = container_of(node,
1329 struct kvmgt_guest_info, track_node);
1330
1331 if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
1332 intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
1333 (void *)val, len);
1334 }
1335
1336 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
1337 struct kvm_memory_slot *slot,
1338 struct kvm_page_track_notifier_node *node)
1339 {
1340 int i;
1341 gfn_t gfn;
1342 struct kvmgt_guest_info *info = container_of(node,
1343 struct kvmgt_guest_info, track_node);
1344
1345 spin_lock(&kvm->mmu_lock);
1346 for (i = 0; i < slot->npages; i++) {
1347 gfn = slot->base_gfn + i;
1348 if (kvmgt_gfn_is_write_protected(info, gfn)) {
1349 kvm_slot_page_track_remove_page(kvm, slot, gfn,
1350 KVM_PAGE_TRACK_WRITE);
1351 kvmgt_protect_table_del(info, gfn);
1352 }
1353 }
1354 spin_unlock(&kvm->mmu_lock);
1355 }
1356
1357 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu, struct kvm *kvm)
1358 {
1359 struct intel_vgpu *itr;
1360 struct kvmgt_guest_info *info;
1361 int id;
1362 bool ret = false;
1363
1364 mutex_lock(&vgpu->gvt->lock);
1365 for_each_active_vgpu(vgpu->gvt, itr, id) {
1366 if (!handle_valid(itr->handle))
1367 continue;
1368
1369 info = (struct kvmgt_guest_info *)itr->handle;
1370 if (kvm && kvm == info->kvm) {
1371 ret = true;
1372 goto out;
1373 }
1374 }
1375 out:
1376 mutex_unlock(&vgpu->gvt->lock);
1377 return ret;
1378 }
1379
1380 static int kvmgt_guest_init(struct mdev_device *mdev)
1381 {
1382 struct kvmgt_guest_info *info;
1383 struct intel_vgpu *vgpu;
1384 struct kvm *kvm;
1385
1386 vgpu = mdev_get_drvdata(mdev);
1387 if (handle_valid(vgpu->handle))
1388 return -EEXIST;
1389
1390 kvm = vgpu->vdev.kvm;
1391 if (!kvm || kvm->mm != current->mm) {
1392 gvt_vgpu_err("KVM is required to use Intel vGPU\n");
1393 return -ESRCH;
1394 }
1395
1396 if (__kvmgt_vgpu_exist(vgpu, kvm))
1397 return -EEXIST;
1398
1399 info = vzalloc(sizeof(struct kvmgt_guest_info));
1400 if (!info)
1401 return -ENOMEM;
1402
1403 vgpu->handle = (unsigned long)info;
1404 info->vgpu = vgpu;
1405 info->kvm = kvm;
1406 kvm_get_kvm(info->kvm);
1407
1408 kvmgt_protect_table_init(info);
1409 gvt_cache_init(vgpu);
1410
1411 info->track_node.track_write = kvmgt_page_track_write;
1412 info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
1413 kvm_page_track_register_notifier(kvm, &info->track_node);
1414
1415 return 0;
1416 }
1417
1418 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
1419 {
1420 kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
1421 kvm_put_kvm(info->kvm);
1422 kvmgt_protect_table_destroy(info);
1423 gvt_cache_destroy(info->vgpu);
1424 vfree(info);
1425
1426 return true;
1427 }
1428
1429 static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
1430 {
1431 /* nothing to do here */
1432 return 0;
1433 }
1434
1435 static void kvmgt_detach_vgpu(unsigned long handle)
1436 {
1437 /* nothing to do here */
1438 }
1439
1440 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
1441 {
1442 struct kvmgt_guest_info *info;
1443 struct intel_vgpu *vgpu;
1444
1445 if (!handle_valid(handle))
1446 return -ESRCH;
1447
1448 info = (struct kvmgt_guest_info *)handle;
1449 vgpu = info->vgpu;
1450
1451 if (eventfd_signal(vgpu->vdev.msi_trigger, 1) == 1)
1452 return 0;
1453
1454 return -EFAULT;
1455 }
1456
1457 static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
1458 {
1459 unsigned long iova, pfn;
1460 struct kvmgt_guest_info *info;
1461 struct device *dev;
1462 struct intel_vgpu *vgpu;
1463 int rc;
1464
1465 if (!handle_valid(handle))
1466 return INTEL_GVT_INVALID_ADDR;
1467
1468 info = (struct kvmgt_guest_info *)handle;
1469 vgpu = info->vgpu;
1470 iova = gvt_cache_find(info->vgpu, gfn);
1471 if (iova != INTEL_GVT_INVALID_ADDR)
1472 return iova;
1473
1474 pfn = INTEL_GVT_INVALID_ADDR;
1475 dev = mdev_dev(info->vgpu->vdev.mdev);
1476 rc = vfio_pin_pages(dev, &gfn, 1, IOMMU_READ | IOMMU_WRITE, &pfn);
1477 if (rc != 1) {
1478 gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx: %d\n",
1479 gfn, rc);
1480 return INTEL_GVT_INVALID_ADDR;
1481 }
1482 /* transfer to host iova for GFX to use DMA */
1483 rc = gvt_dma_map_iova(info->vgpu, pfn, &iova);
1484 if (rc) {
1485 gvt_vgpu_err("gvt_dma_map_iova failed for gfn: 0x%lx\n", gfn);
1486 vfio_unpin_pages(dev, &gfn, 1);
1487 return INTEL_GVT_INVALID_ADDR;
1488 }
1489
1490 gvt_cache_add(info->vgpu, gfn, iova);
1491 return iova;
1492 }
1493
1494 static int kvmgt_rw_gpa(unsigned long handle, unsigned long gpa,
1495 void *buf, unsigned long len, bool write)
1496 {
1497 struct kvmgt_guest_info *info;
1498 struct kvm *kvm;
1499 int idx, ret;
1500 bool kthread = current->mm == NULL;
1501
1502 if (!handle_valid(handle))
1503 return -ESRCH;
1504
1505 info = (struct kvmgt_guest_info *)handle;
1506 kvm = info->kvm;
1507
1508 if (kthread)
1509 use_mm(kvm->mm);
1510
1511 idx = srcu_read_lock(&kvm->srcu);
1512 ret = write ? kvm_write_guest(kvm, gpa, buf, len) :
1513 kvm_read_guest(kvm, gpa, buf, len);
1514 srcu_read_unlock(&kvm->srcu, idx);
1515
1516 if (kthread)
1517 unuse_mm(kvm->mm);
1518
1519 return ret;
1520 }
1521
1522 static int kvmgt_read_gpa(unsigned long handle, unsigned long gpa,
1523 void *buf, unsigned long len)
1524 {
1525 return kvmgt_rw_gpa(handle, gpa, buf, len, false);
1526 }
1527
1528 static int kvmgt_write_gpa(unsigned long handle, unsigned long gpa,
1529 void *buf, unsigned long len)
1530 {
1531 return kvmgt_rw_gpa(handle, gpa, buf, len, true);
1532 }
1533
1534 static unsigned long kvmgt_virt_to_pfn(void *addr)
1535 {
1536 return PFN_DOWN(__pa(addr));
1537 }
1538
1539 struct intel_gvt_mpt kvmgt_mpt = {
1540 .host_init = kvmgt_host_init,
1541 .host_exit = kvmgt_host_exit,
1542 .attach_vgpu = kvmgt_attach_vgpu,
1543 .detach_vgpu = kvmgt_detach_vgpu,
1544 .inject_msi = kvmgt_inject_msi,
1545 .from_virt_to_mfn = kvmgt_virt_to_pfn,
1546 .set_wp_page = kvmgt_write_protect_add,
1547 .unset_wp_page = kvmgt_write_protect_remove,
1548 .read_gpa = kvmgt_read_gpa,
1549 .write_gpa = kvmgt_write_gpa,
1550 .gfn_to_mfn = kvmgt_gfn_to_pfn,
1551 };
1552 EXPORT_SYMBOL_GPL(kvmgt_mpt);
1553
1554 static int __init kvmgt_init(void)
1555 {
1556 return 0;
1557 }
1558
1559 static void __exit kvmgt_exit(void)
1560 {
1561 }
1562
1563 module_init(kvmgt_init);
1564 module_exit(kvmgt_exit);
1565
1566 MODULE_LICENSE("GPL and additional rights");
1567 MODULE_AUTHOR("Intel Corporation");