1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2018-2020 Intel Corporation.
4 * Copyright (C) 2020 Red Hat, Inc.
6 * Author: Tiwei Bie <tiwei.bie@intel.com>
7 * Jason Wang <jasowang@redhat.com>
9 * Thanks Michael S. Tsirkin for the valuable comments and
10 * suggestions. And thanks to Cunming Liang and Zhihong Wang for all
14 #include <linux/kernel.h>
15 #include <linux/module.h>
16 #include <linux/cdev.h>
17 #include <linux/device.h>
19 #include <linux/slab.h>
20 #include <linux/iommu.h>
21 #include <linux/uuid.h>
22 #include <linux/vdpa.h>
23 #include <linux/nospec.h>
24 #include <linux/vhost.h>
29 VHOST_VDPA_BACKEND_FEATURES
=
30 (1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2
) |
31 (1ULL << VHOST_BACKEND_F_IOTLB_BATCH
),
34 #define VHOST_VDPA_DEV_MAX (1U << MINORBITS)
37 struct vhost_dev vdev
;
38 struct iommu_domain
*domain
;
39 struct vhost_virtqueue
*vqs
;
40 struct completion completion
;
41 struct vdpa_device
*vdpa
;
48 struct eventfd_ctx
*config_ctx
;
50 struct vdpa_iova_range range
;
53 static DEFINE_IDA(vhost_vdpa_ida
);
55 static dev_t vhost_vdpa_major
;
57 static void handle_vq_kick(struct vhost_work
*work
)
59 struct vhost_virtqueue
*vq
= container_of(work
, struct vhost_virtqueue
,
61 struct vhost_vdpa
*v
= container_of(vq
->dev
, struct vhost_vdpa
, vdev
);
62 const struct vdpa_config_ops
*ops
= v
->vdpa
->config
;
64 ops
->kick_vq(v
->vdpa
, vq
- v
->vqs
);
67 static irqreturn_t
vhost_vdpa_virtqueue_cb(void *private)
69 struct vhost_virtqueue
*vq
= private;
70 struct eventfd_ctx
*call_ctx
= vq
->call_ctx
.ctx
;
73 eventfd_signal(call_ctx
, 1);
78 static irqreturn_t
vhost_vdpa_config_cb(void *private)
80 struct vhost_vdpa
*v
= private;
81 struct eventfd_ctx
*config_ctx
= v
->config_ctx
;
84 eventfd_signal(config_ctx
, 1);
89 static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa
*v
, u16 qid
)
91 struct vhost_virtqueue
*vq
= &v
->vqs
[qid
];
92 const struct vdpa_config_ops
*ops
= v
->vdpa
->config
;
93 struct vdpa_device
*vdpa
= v
->vdpa
;
99 irq
= ops
->get_vq_irq(vdpa
, qid
);
100 irq_bypass_unregister_producer(&vq
->call_ctx
.producer
);
101 if (!vq
->call_ctx
.ctx
|| irq
< 0)
104 vq
->call_ctx
.producer
.token
= vq
->call_ctx
.ctx
;
105 vq
->call_ctx
.producer
.irq
= irq
;
106 ret
= irq_bypass_register_producer(&vq
->call_ctx
.producer
);
108 dev_info(&v
->dev
, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
109 qid
, vq
->call_ctx
.producer
.token
, ret
);
112 static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa
*v
, u16 qid
)
114 struct vhost_virtqueue
*vq
= &v
->vqs
[qid
];
116 irq_bypass_unregister_producer(&vq
->call_ctx
.producer
);
119 static void vhost_vdpa_reset(struct vhost_vdpa
*v
)
121 struct vdpa_device
*vdpa
= v
->vdpa
;
127 static long vhost_vdpa_get_device_id(struct vhost_vdpa
*v
, u8 __user
*argp
)
129 struct vdpa_device
*vdpa
= v
->vdpa
;
130 const struct vdpa_config_ops
*ops
= vdpa
->config
;
133 device_id
= ops
->get_device_id(vdpa
);
135 if (copy_to_user(argp
, &device_id
, sizeof(device_id
)))
141 static long vhost_vdpa_get_status(struct vhost_vdpa
*v
, u8 __user
*statusp
)
143 struct vdpa_device
*vdpa
= v
->vdpa
;
144 const struct vdpa_config_ops
*ops
= vdpa
->config
;
147 status
= ops
->get_status(vdpa
);
149 if (copy_to_user(statusp
, &status
, sizeof(status
)))
155 static long vhost_vdpa_set_status(struct vhost_vdpa
*v
, u8 __user
*statusp
)
157 struct vdpa_device
*vdpa
= v
->vdpa
;
158 const struct vdpa_config_ops
*ops
= vdpa
->config
;
159 u8 status
, status_old
;
163 if (copy_from_user(&status
, statusp
, sizeof(status
)))
166 status_old
= ops
->get_status(vdpa
);
169 * Userspace shouldn't remove status bits unless reset the
172 if (status
!= 0 && (ops
->get_status(vdpa
) & ~status
) != 0)
175 ops
->set_status(vdpa
, status
);
177 if ((status
& VIRTIO_CONFIG_S_DRIVER_OK
) && !(status_old
& VIRTIO_CONFIG_S_DRIVER_OK
))
178 for (i
= 0; i
< nvqs
; i
++)
179 vhost_vdpa_setup_vq_irq(v
, i
);
181 if ((status_old
& VIRTIO_CONFIG_S_DRIVER_OK
) && !(status
& VIRTIO_CONFIG_S_DRIVER_OK
))
182 for (i
= 0; i
< nvqs
; i
++)
183 vhost_vdpa_unsetup_vq_irq(v
, i
);
188 static int vhost_vdpa_config_validate(struct vhost_vdpa
*v
,
189 struct vhost_vdpa_config
*c
)
191 struct vdpa_device
*vdpa
= v
->vdpa
;
192 long size
= vdpa
->config
->get_config_size(vdpa
);
197 if (c
->len
> size
- c
->off
)
203 static long vhost_vdpa_get_config(struct vhost_vdpa
*v
,
204 struct vhost_vdpa_config __user
*c
)
206 struct vdpa_device
*vdpa
= v
->vdpa
;
207 struct vhost_vdpa_config config
;
208 unsigned long size
= offsetof(struct vhost_vdpa_config
, buf
);
211 if (copy_from_user(&config
, c
, size
))
213 if (vhost_vdpa_config_validate(v
, &config
))
215 buf
= kvzalloc(config
.len
, GFP_KERNEL
);
219 vdpa_get_config(vdpa
, config
.off
, buf
, config
.len
);
221 if (copy_to_user(c
->buf
, buf
, config
.len
)) {
230 static long vhost_vdpa_set_config(struct vhost_vdpa
*v
,
231 struct vhost_vdpa_config __user
*c
)
233 struct vdpa_device
*vdpa
= v
->vdpa
;
234 const struct vdpa_config_ops
*ops
= vdpa
->config
;
235 struct vhost_vdpa_config config
;
236 unsigned long size
= offsetof(struct vhost_vdpa_config
, buf
);
239 if (copy_from_user(&config
, c
, size
))
241 if (vhost_vdpa_config_validate(v
, &config
))
244 buf
= vmemdup_user(c
->buf
, config
.len
);
248 ops
->set_config(vdpa
, config
.off
, buf
, config
.len
);
254 static long vhost_vdpa_get_features(struct vhost_vdpa
*v
, u64 __user
*featurep
)
256 struct vdpa_device
*vdpa
= v
->vdpa
;
257 const struct vdpa_config_ops
*ops
= vdpa
->config
;
260 features
= ops
->get_features(vdpa
);
262 if (copy_to_user(featurep
, &features
, sizeof(features
)))
268 static long vhost_vdpa_set_features(struct vhost_vdpa
*v
, u64 __user
*featurep
)
270 struct vdpa_device
*vdpa
= v
->vdpa
;
271 const struct vdpa_config_ops
*ops
= vdpa
->config
;
275 * It's not allowed to change the features after they have
278 if (ops
->get_status(vdpa
) & VIRTIO_CONFIG_S_FEATURES_OK
)
281 if (copy_from_user(&features
, featurep
, sizeof(features
)))
284 if (vdpa_set_features(vdpa
, features
))
290 static long vhost_vdpa_get_vring_num(struct vhost_vdpa
*v
, u16 __user
*argp
)
292 struct vdpa_device
*vdpa
= v
->vdpa
;
293 const struct vdpa_config_ops
*ops
= vdpa
->config
;
296 num
= ops
->get_vq_num_max(vdpa
);
298 if (copy_to_user(argp
, &num
, sizeof(num
)))
304 static void vhost_vdpa_config_put(struct vhost_vdpa
*v
)
307 eventfd_ctx_put(v
->config_ctx
);
308 v
->config_ctx
= NULL
;
312 static long vhost_vdpa_set_config_call(struct vhost_vdpa
*v
, u32 __user
*argp
)
314 struct vdpa_callback cb
;
316 struct eventfd_ctx
*ctx
;
318 cb
.callback
= vhost_vdpa_config_cb
;
319 cb
.private = v
->vdpa
;
320 if (copy_from_user(&fd
, argp
, sizeof(fd
)))
323 ctx
= fd
== VHOST_FILE_UNBIND
? NULL
: eventfd_ctx_fdget(fd
);
324 swap(ctx
, v
->config_ctx
);
326 if (!IS_ERR_OR_NULL(ctx
))
327 eventfd_ctx_put(ctx
);
329 if (IS_ERR(v
->config_ctx
)) {
330 long ret
= PTR_ERR(v
->config_ctx
);
332 v
->config_ctx
= NULL
;
336 v
->vdpa
->config
->set_config_cb(v
->vdpa
, &cb
);
341 static long vhost_vdpa_get_iova_range(struct vhost_vdpa
*v
, u32 __user
*argp
)
343 struct vhost_vdpa_iova_range range
= {
344 .first
= v
->range
.first
,
345 .last
= v
->range
.last
,
348 if (copy_to_user(argp
, &range
, sizeof(range
)))
353 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa
*v
, unsigned int cmd
,
356 struct vdpa_device
*vdpa
= v
->vdpa
;
357 const struct vdpa_config_ops
*ops
= vdpa
->config
;
358 struct vdpa_vq_state vq_state
;
359 struct vdpa_callback cb
;
360 struct vhost_virtqueue
*vq
;
361 struct vhost_vring_state s
;
365 r
= get_user(idx
, (u32 __user
*)argp
);
372 idx
= array_index_nospec(idx
, v
->nvqs
);
376 case VHOST_VDPA_SET_VRING_ENABLE
:
377 if (copy_from_user(&s
, argp
, sizeof(s
)))
379 ops
->set_vq_ready(vdpa
, idx
, s
.num
);
381 case VHOST_GET_VRING_BASE
:
382 r
= ops
->get_vq_state(v
->vdpa
, idx
, &vq_state
);
386 vq
->last_avail_idx
= vq_state
.split
.avail_index
;
390 r
= vhost_vring_ioctl(&v
->vdev
, cmd
, argp
);
395 case VHOST_SET_VRING_ADDR
:
396 if (ops
->set_vq_address(vdpa
, idx
,
397 (u64
)(uintptr_t)vq
->desc
,
398 (u64
)(uintptr_t)vq
->avail
,
399 (u64
)(uintptr_t)vq
->used
))
403 case VHOST_SET_VRING_BASE
:
404 vq_state
.split
.avail_index
= vq
->last_avail_idx
;
405 if (ops
->set_vq_state(vdpa
, idx
, &vq_state
))
409 case VHOST_SET_VRING_CALL
:
410 if (vq
->call_ctx
.ctx
) {
411 cb
.callback
= vhost_vdpa_virtqueue_cb
;
417 ops
->set_vq_cb(vdpa
, idx
, &cb
);
418 vhost_vdpa_setup_vq_irq(v
, idx
);
421 case VHOST_SET_VRING_NUM
:
422 ops
->set_vq_num(vdpa
, idx
, vq
->num
);
429 static long vhost_vdpa_unlocked_ioctl(struct file
*filep
,
430 unsigned int cmd
, unsigned long arg
)
432 struct vhost_vdpa
*v
= filep
->private_data
;
433 struct vhost_dev
*d
= &v
->vdev
;
434 void __user
*argp
= (void __user
*)arg
;
435 u64 __user
*featurep
= argp
;
439 if (cmd
== VHOST_SET_BACKEND_FEATURES
) {
440 if (copy_from_user(&features
, featurep
, sizeof(features
)))
442 if (features
& ~VHOST_VDPA_BACKEND_FEATURES
)
444 vhost_set_backend_features(&v
->vdev
, features
);
448 mutex_lock(&d
->mutex
);
451 case VHOST_VDPA_GET_DEVICE_ID
:
452 r
= vhost_vdpa_get_device_id(v
, argp
);
454 case VHOST_VDPA_GET_STATUS
:
455 r
= vhost_vdpa_get_status(v
, argp
);
457 case VHOST_VDPA_SET_STATUS
:
458 r
= vhost_vdpa_set_status(v
, argp
);
460 case VHOST_VDPA_GET_CONFIG
:
461 r
= vhost_vdpa_get_config(v
, argp
);
463 case VHOST_VDPA_SET_CONFIG
:
464 r
= vhost_vdpa_set_config(v
, argp
);
466 case VHOST_GET_FEATURES
:
467 r
= vhost_vdpa_get_features(v
, argp
);
469 case VHOST_SET_FEATURES
:
470 r
= vhost_vdpa_set_features(v
, argp
);
472 case VHOST_VDPA_GET_VRING_NUM
:
473 r
= vhost_vdpa_get_vring_num(v
, argp
);
475 case VHOST_SET_LOG_BASE
:
476 case VHOST_SET_LOG_FD
:
479 case VHOST_VDPA_SET_CONFIG_CALL
:
480 r
= vhost_vdpa_set_config_call(v
, argp
);
482 case VHOST_GET_BACKEND_FEATURES
:
483 features
= VHOST_VDPA_BACKEND_FEATURES
;
484 if (copy_to_user(featurep
, &features
, sizeof(features
)))
487 case VHOST_VDPA_GET_IOVA_RANGE
:
488 r
= vhost_vdpa_get_iova_range(v
, argp
);
491 r
= vhost_dev_ioctl(&v
->vdev
, cmd
, argp
);
492 if (r
== -ENOIOCTLCMD
)
493 r
= vhost_vdpa_vring_ioctl(v
, cmd
, argp
);
497 mutex_unlock(&d
->mutex
);
501 static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa
*v
, u64 start
, u64 last
)
503 struct vhost_dev
*dev
= &v
->vdev
;
504 struct vhost_iotlb
*iotlb
= dev
->iotlb
;
505 struct vhost_iotlb_map
*map
;
507 unsigned long pfn
, pinned
;
509 while ((map
= vhost_iotlb_itree_first(iotlb
, start
, last
)) != NULL
) {
510 pinned
= map
->size
>> PAGE_SHIFT
;
511 for (pfn
= map
->addr
>> PAGE_SHIFT
;
512 pinned
> 0; pfn
++, pinned
--) {
513 page
= pfn_to_page(pfn
);
514 if (map
->perm
& VHOST_ACCESS_WO
)
515 set_page_dirty_lock(page
);
516 unpin_user_page(page
);
518 atomic64_sub(map
->size
>> PAGE_SHIFT
, &dev
->mm
->pinned_vm
);
519 vhost_iotlb_map_free(iotlb
, map
);
523 static void vhost_vdpa_iotlb_free(struct vhost_vdpa
*v
)
525 struct vhost_dev
*dev
= &v
->vdev
;
527 vhost_vdpa_iotlb_unmap(v
, 0ULL, 0ULL - 1);
532 static int perm_to_iommu_flags(u32 perm
)
537 case VHOST_ACCESS_WO
:
538 flags
|= IOMMU_WRITE
;
540 case VHOST_ACCESS_RO
:
543 case VHOST_ACCESS_RW
:
544 flags
|= (IOMMU_WRITE
| IOMMU_READ
);
547 WARN(1, "invalidate vhost IOTLB permission\n");
551 return flags
| IOMMU_CACHE
;
554 static int vhost_vdpa_map(struct vhost_vdpa
*v
,
555 u64 iova
, u64 size
, u64 pa
, u32 perm
)
557 struct vhost_dev
*dev
= &v
->vdev
;
558 struct vdpa_device
*vdpa
= v
->vdpa
;
559 const struct vdpa_config_ops
*ops
= vdpa
->config
;
562 r
= vhost_iotlb_add_range(dev
->iotlb
, iova
, iova
+ size
- 1,
568 r
= ops
->dma_map(vdpa
, iova
, size
, pa
, perm
);
569 } else if (ops
->set_map
) {
571 r
= ops
->set_map(vdpa
, dev
->iotlb
);
573 r
= iommu_map(v
->domain
, iova
, pa
, size
,
574 perm_to_iommu_flags(perm
));
578 vhost_iotlb_del_range(dev
->iotlb
, iova
, iova
+ size
- 1);
580 atomic64_add(size
>> PAGE_SHIFT
, &dev
->mm
->pinned_vm
);
585 static void vhost_vdpa_unmap(struct vhost_vdpa
*v
, u64 iova
, u64 size
)
587 struct vhost_dev
*dev
= &v
->vdev
;
588 struct vdpa_device
*vdpa
= v
->vdpa
;
589 const struct vdpa_config_ops
*ops
= vdpa
->config
;
591 vhost_vdpa_iotlb_unmap(v
, iova
, iova
+ size
- 1);
594 ops
->dma_unmap(vdpa
, iova
, size
);
595 } else if (ops
->set_map
) {
597 ops
->set_map(vdpa
, dev
->iotlb
);
599 iommu_unmap(v
->domain
, iova
, size
);
603 static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa
*v
,
604 struct vhost_iotlb_msg
*msg
)
606 struct vhost_dev
*dev
= &v
->vdev
;
607 struct vhost_iotlb
*iotlb
= dev
->iotlb
;
608 struct page
**page_list
;
609 unsigned long list_size
= PAGE_SIZE
/ sizeof(struct page
*);
610 unsigned int gup_flags
= FOLL_LONGTERM
;
611 unsigned long npages
, cur_base
, map_pfn
, last_pfn
= 0;
612 unsigned long lock_limit
, sz2pin
, nchunks
, i
;
613 u64 iova
= msg
->iova
;
617 if (msg
->iova
< v
->range
.first
|| !msg
->size
||
618 msg
->iova
> U64_MAX
- msg
->size
+ 1 ||
619 msg
->iova
+ msg
->size
- 1 > v
->range
.last
)
622 if (vhost_iotlb_itree_first(iotlb
, msg
->iova
,
623 msg
->iova
+ msg
->size
- 1))
626 /* Limit the use of memory for bookkeeping */
627 page_list
= (struct page
**) __get_free_page(GFP_KERNEL
);
631 if (msg
->perm
& VHOST_ACCESS_WO
)
632 gup_flags
|= FOLL_WRITE
;
634 npages
= PAGE_ALIGN(msg
->size
+ (iova
& ~PAGE_MASK
)) >> PAGE_SHIFT
;
640 mmap_read_lock(dev
->mm
);
642 lock_limit
= rlimit(RLIMIT_MEMLOCK
) >> PAGE_SHIFT
;
643 if (npages
+ atomic64_read(&dev
->mm
->pinned_vm
) > lock_limit
) {
648 cur_base
= msg
->uaddr
& PAGE_MASK
;
653 sz2pin
= min_t(unsigned long, npages
, list_size
);
654 pinned
= pin_user_pages(cur_base
, sz2pin
,
655 gup_flags
, page_list
, NULL
);
656 if (sz2pin
!= pinned
) {
660 unpin_user_pages(page_list
, pinned
);
668 map_pfn
= page_to_pfn(page_list
[0]);
670 for (i
= 0; i
< pinned
; i
++) {
671 unsigned long this_pfn
= page_to_pfn(page_list
[i
]);
674 if (last_pfn
&& (this_pfn
!= last_pfn
+ 1)) {
675 /* Pin a contiguous chunk of memory */
676 csize
= (last_pfn
- map_pfn
+ 1) << PAGE_SHIFT
;
677 ret
= vhost_vdpa_map(v
, iova
, csize
,
678 map_pfn
<< PAGE_SHIFT
,
682 * Unpin the pages that are left unmapped
683 * from this point on in the current
684 * page_list. The remaining outstanding
685 * ones which may stride across several
686 * chunks will be covered in the common
687 * error path subsequently.
689 unpin_user_pages(&page_list
[i
],
702 cur_base
+= pinned
<< PAGE_SHIFT
;
706 /* Pin the rest chunk */
707 ret
= vhost_vdpa_map(v
, iova
, (last_pfn
- map_pfn
+ 1) << PAGE_SHIFT
,
708 map_pfn
<< PAGE_SHIFT
, msg
->perm
);
715 * Unpin the outstanding pages which are yet to be
716 * mapped but haven't due to vdpa_map() or
717 * pin_user_pages() failure.
719 * Mapped pages are accounted in vdpa_map(), hence
720 * the corresponding unpinning will be handled by
724 for (pfn
= map_pfn
; pfn
<= last_pfn
; pfn
++)
725 unpin_user_page(pfn_to_page(pfn
));
727 vhost_vdpa_unmap(v
, msg
->iova
, msg
->size
);
730 mmap_read_unlock(dev
->mm
);
732 free_page((unsigned long)page_list
);
736 static int vhost_vdpa_process_iotlb_msg(struct vhost_dev
*dev
,
737 struct vhost_iotlb_msg
*msg
)
739 struct vhost_vdpa
*v
= container_of(dev
, struct vhost_vdpa
, vdev
);
740 struct vdpa_device
*vdpa
= v
->vdpa
;
741 const struct vdpa_config_ops
*ops
= vdpa
->config
;
744 mutex_lock(&dev
->mutex
);
746 r
= vhost_dev_check_owner(dev
);
751 case VHOST_IOTLB_UPDATE
:
752 r
= vhost_vdpa_process_iotlb_update(v
, msg
);
754 case VHOST_IOTLB_INVALIDATE
:
755 vhost_vdpa_unmap(v
, msg
->iova
, msg
->size
);
757 case VHOST_IOTLB_BATCH_BEGIN
:
760 case VHOST_IOTLB_BATCH_END
:
761 if (v
->in_batch
&& ops
->set_map
)
762 ops
->set_map(vdpa
, dev
->iotlb
);
770 mutex_unlock(&dev
->mutex
);
775 static ssize_t
vhost_vdpa_chr_write_iter(struct kiocb
*iocb
,
776 struct iov_iter
*from
)
778 struct file
*file
= iocb
->ki_filp
;
779 struct vhost_vdpa
*v
= file
->private_data
;
780 struct vhost_dev
*dev
= &v
->vdev
;
782 return vhost_chr_write_iter(dev
, from
);
785 static int vhost_vdpa_alloc_domain(struct vhost_vdpa
*v
)
787 struct vdpa_device
*vdpa
= v
->vdpa
;
788 const struct vdpa_config_ops
*ops
= vdpa
->config
;
789 struct device
*dma_dev
= vdpa_get_dma_dev(vdpa
);
790 struct bus_type
*bus
;
793 /* Device want to do DMA by itself */
794 if (ops
->set_map
|| ops
->dma_map
)
801 if (!iommu_capable(bus
, IOMMU_CAP_CACHE_COHERENCY
))
804 v
->domain
= iommu_domain_alloc(bus
);
808 ret
= iommu_attach_device(v
->domain
, dma_dev
);
815 iommu_domain_free(v
->domain
);
819 static void vhost_vdpa_free_domain(struct vhost_vdpa
*v
)
821 struct vdpa_device
*vdpa
= v
->vdpa
;
822 struct device
*dma_dev
= vdpa_get_dma_dev(vdpa
);
825 iommu_detach_device(v
->domain
, dma_dev
);
826 iommu_domain_free(v
->domain
);
832 static void vhost_vdpa_set_iova_range(struct vhost_vdpa
*v
)
834 struct vdpa_iova_range
*range
= &v
->range
;
835 struct vdpa_device
*vdpa
= v
->vdpa
;
836 const struct vdpa_config_ops
*ops
= vdpa
->config
;
838 if (ops
->get_iova_range
) {
839 *range
= ops
->get_iova_range(vdpa
);
840 } else if (v
->domain
&& v
->domain
->geometry
.force_aperture
) {
841 range
->first
= v
->domain
->geometry
.aperture_start
;
842 range
->last
= v
->domain
->geometry
.aperture_end
;
845 range
->last
= ULLONG_MAX
;
849 static int vhost_vdpa_open(struct inode
*inode
, struct file
*filep
)
851 struct vhost_vdpa
*v
;
852 struct vhost_dev
*dev
;
853 struct vhost_virtqueue
**vqs
;
854 int nvqs
, i
, r
, opened
;
856 v
= container_of(inode
->i_cdev
, struct vhost_vdpa
, cdev
);
858 opened
= atomic_cmpxchg(&v
->opened
, 0, 1);
865 vqs
= kmalloc_array(nvqs
, sizeof(*vqs
), GFP_KERNEL
);
872 for (i
= 0; i
< nvqs
; i
++) {
874 vqs
[i
]->handle_kick
= handle_vq_kick
;
876 vhost_dev_init(dev
, vqs
, nvqs
, 0, 0, 0, false,
877 vhost_vdpa_process_iotlb_msg
);
879 dev
->iotlb
= vhost_iotlb_alloc(0, 0);
885 r
= vhost_vdpa_alloc_domain(v
);
889 vhost_vdpa_set_iova_range(v
);
891 filep
->private_data
= v
;
896 vhost_dev_cleanup(&v
->vdev
);
899 atomic_dec(&v
->opened
);
903 static void vhost_vdpa_clean_irq(struct vhost_vdpa
*v
)
907 for (i
= 0; i
< v
->nvqs
; i
++)
908 vhost_vdpa_unsetup_vq_irq(v
, i
);
911 static int vhost_vdpa_release(struct inode
*inode
, struct file
*filep
)
913 struct vhost_vdpa
*v
= filep
->private_data
;
914 struct vhost_dev
*d
= &v
->vdev
;
916 mutex_lock(&d
->mutex
);
917 filep
->private_data
= NULL
;
919 vhost_dev_stop(&v
->vdev
);
920 vhost_vdpa_iotlb_free(v
);
921 vhost_vdpa_free_domain(v
);
922 vhost_vdpa_config_put(v
);
923 vhost_vdpa_clean_irq(v
);
924 vhost_dev_cleanup(&v
->vdev
);
926 mutex_unlock(&d
->mutex
);
928 atomic_dec(&v
->opened
);
929 complete(&v
->completion
);
935 static vm_fault_t
vhost_vdpa_fault(struct vm_fault
*vmf
)
937 struct vhost_vdpa
*v
= vmf
->vma
->vm_file
->private_data
;
938 struct vdpa_device
*vdpa
= v
->vdpa
;
939 const struct vdpa_config_ops
*ops
= vdpa
->config
;
940 struct vdpa_notification_area notify
;
941 struct vm_area_struct
*vma
= vmf
->vma
;
942 u16 index
= vma
->vm_pgoff
;
944 notify
= ops
->get_vq_notification(vdpa
, index
);
946 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
947 if (remap_pfn_range(vma
, vmf
->address
& PAGE_MASK
,
948 notify
.addr
>> PAGE_SHIFT
, PAGE_SIZE
,
950 return VM_FAULT_SIGBUS
;
952 return VM_FAULT_NOPAGE
;
955 static const struct vm_operations_struct vhost_vdpa_vm_ops
= {
956 .fault
= vhost_vdpa_fault
,
959 static int vhost_vdpa_mmap(struct file
*file
, struct vm_area_struct
*vma
)
961 struct vhost_vdpa
*v
= vma
->vm_file
->private_data
;
962 struct vdpa_device
*vdpa
= v
->vdpa
;
963 const struct vdpa_config_ops
*ops
= vdpa
->config
;
964 struct vdpa_notification_area notify
;
965 unsigned long index
= vma
->vm_pgoff
;
967 if (vma
->vm_end
- vma
->vm_start
!= PAGE_SIZE
)
969 if ((vma
->vm_flags
& VM_SHARED
) == 0)
971 if (vma
->vm_flags
& VM_READ
)
975 if (!ops
->get_vq_notification
)
978 /* To be safe and easily modelled by userspace, We only
979 * support the doorbell which sits on the page boundary and
980 * does not share the page with other registers.
982 notify
= ops
->get_vq_notification(vdpa
, index
);
983 if (notify
.addr
& (PAGE_SIZE
- 1))
985 if (vma
->vm_end
- vma
->vm_start
!= notify
.size
)
988 vma
->vm_flags
|= VM_IO
| VM_PFNMAP
| VM_DONTEXPAND
| VM_DONTDUMP
;
989 vma
->vm_ops
= &vhost_vdpa_vm_ops
;
992 #endif /* CONFIG_MMU */
994 static const struct file_operations vhost_vdpa_fops
= {
995 .owner
= THIS_MODULE
,
996 .open
= vhost_vdpa_open
,
997 .release
= vhost_vdpa_release
,
998 .write_iter
= vhost_vdpa_chr_write_iter
,
999 .unlocked_ioctl
= vhost_vdpa_unlocked_ioctl
,
1001 .mmap
= vhost_vdpa_mmap
,
1002 #endif /* CONFIG_MMU */
1003 .compat_ioctl
= compat_ptr_ioctl
,
1006 static void vhost_vdpa_release_dev(struct device
*device
)
1008 struct vhost_vdpa
*v
=
1009 container_of(device
, struct vhost_vdpa
, dev
);
1011 ida_simple_remove(&vhost_vdpa_ida
, v
->minor
);
1016 static int vhost_vdpa_probe(struct vdpa_device
*vdpa
)
1018 const struct vdpa_config_ops
*ops
= vdpa
->config
;
1019 struct vhost_vdpa
*v
;
1023 v
= kzalloc(sizeof(*v
), GFP_KERNEL
| __GFP_RETRY_MAYFAIL
);
1027 minor
= ida_simple_get(&vhost_vdpa_ida
, 0,
1028 VHOST_VDPA_DEV_MAX
, GFP_KERNEL
);
1034 atomic_set(&v
->opened
, 0);
1037 v
->nvqs
= vdpa
->nvqs
;
1038 v
->virtio_id
= ops
->get_device_id(vdpa
);
1040 device_initialize(&v
->dev
);
1041 v
->dev
.release
= vhost_vdpa_release_dev
;
1042 v
->dev
.parent
= &vdpa
->dev
;
1043 v
->dev
.devt
= MKDEV(MAJOR(vhost_vdpa_major
), minor
);
1044 v
->vqs
= kmalloc_array(v
->nvqs
, sizeof(struct vhost_virtqueue
),
1051 r
= dev_set_name(&v
->dev
, "vhost-vdpa-%u", minor
);
1055 cdev_init(&v
->cdev
, &vhost_vdpa_fops
);
1056 v
->cdev
.owner
= THIS_MODULE
;
1058 r
= cdev_device_add(&v
->cdev
, &v
->dev
);
1062 init_completion(&v
->completion
);
1063 vdpa_set_drvdata(vdpa
, v
);
1068 put_device(&v
->dev
);
1072 static void vhost_vdpa_remove(struct vdpa_device
*vdpa
)
1074 struct vhost_vdpa
*v
= vdpa_get_drvdata(vdpa
);
1077 cdev_device_del(&v
->cdev
, &v
->dev
);
1080 opened
= atomic_cmpxchg(&v
->opened
, 0, 1);
1083 wait_for_completion(&v
->completion
);
1086 put_device(&v
->dev
);
1089 static struct vdpa_driver vhost_vdpa_driver
= {
1091 .name
= "vhost_vdpa",
1093 .probe
= vhost_vdpa_probe
,
1094 .remove
= vhost_vdpa_remove
,
1097 static int __init
vhost_vdpa_init(void)
1101 r
= alloc_chrdev_region(&vhost_vdpa_major
, 0, VHOST_VDPA_DEV_MAX
,
1104 goto err_alloc_chrdev
;
1106 r
= vdpa_register_driver(&vhost_vdpa_driver
);
1108 goto err_vdpa_register_driver
;
1112 err_vdpa_register_driver
:
1113 unregister_chrdev_region(vhost_vdpa_major
, VHOST_VDPA_DEV_MAX
);
1117 module_init(vhost_vdpa_init
);
1119 static void __exit
vhost_vdpa_exit(void)
1121 vdpa_unregister_driver(&vhost_vdpa_driver
);
1122 unregister_chrdev_region(vhost_vdpa_major
, VHOST_VDPA_DEV_MAX
);
1124 module_exit(vhost_vdpa_exit
);
1126 MODULE_VERSION("0.0.1");
1127 MODULE_LICENSE("GPL v2");
1128 MODULE_AUTHOR("Intel Corporation");
1129 MODULE_DESCRIPTION("vDPA-based vhost backend for virtio");