]>
Commit | Line | Data |
---|---|---|
4c8cf318 TB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2018-2020 Intel Corporation. | |
4 | * Copyright (C) 2020 Red Hat, Inc. | |
5 | * | |
6 | * Author: Tiwei Bie <tiwei.bie@intel.com> | |
7 | * Jason Wang <jasowang@redhat.com> | |
8 | * | |
9 | * Thanks Michael S. Tsirkin for the valuable comments and | |
10 | * suggestions. And thanks to Cunming Liang and Zhihong Wang for all | |
11 | * their supports. | |
12 | */ | |
13 | ||
14 | #include <linux/kernel.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/cdev.h> | |
17 | #include <linux/device.h> | |
18 | #include <linux/iommu.h> | |
19 | #include <linux/uuid.h> | |
20 | #include <linux/vdpa.h> | |
21 | #include <linux/nospec.h> | |
22 | #include <linux/vhost.h> | |
23 | #include <linux/virtio_net.h> | |
24 | ||
25 | #include "vhost.h" | |
26 | ||
27 | enum { | |
28 | VHOST_VDPA_FEATURES = | |
29 | (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | | |
30 | (1ULL << VIRTIO_F_ANY_LAYOUT) | | |
31 | (1ULL << VIRTIO_F_VERSION_1) | | |
32 | (1ULL << VIRTIO_F_IOMMU_PLATFORM) | | |
33 | (1ULL << VIRTIO_F_RING_PACKED) | | |
34 | (1ULL << VIRTIO_F_ORDER_PLATFORM) | | |
35 | (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | | |
36 | (1ULL << VIRTIO_RING_F_EVENT_IDX), | |
37 | ||
38 | VHOST_VDPA_NET_FEATURES = VHOST_VDPA_FEATURES | | |
39 | (1ULL << VIRTIO_NET_F_CSUM) | | |
40 | (1ULL << VIRTIO_NET_F_GUEST_CSUM) | | |
41 | (1ULL << VIRTIO_NET_F_MTU) | | |
42 | (1ULL << VIRTIO_NET_F_MAC) | | |
43 | (1ULL << VIRTIO_NET_F_GUEST_TSO4) | | |
44 | (1ULL << VIRTIO_NET_F_GUEST_TSO6) | | |
45 | (1ULL << VIRTIO_NET_F_GUEST_ECN) | | |
46 | (1ULL << VIRTIO_NET_F_GUEST_UFO) | | |
47 | (1ULL << VIRTIO_NET_F_HOST_TSO4) | | |
48 | (1ULL << VIRTIO_NET_F_HOST_TSO6) | | |
49 | (1ULL << VIRTIO_NET_F_HOST_ECN) | | |
50 | (1ULL << VIRTIO_NET_F_HOST_UFO) | | |
51 | (1ULL << VIRTIO_NET_F_MRG_RXBUF) | | |
52 | (1ULL << VIRTIO_NET_F_STATUS) | | |
53 | (1ULL << VIRTIO_NET_F_SPEED_DUPLEX), | |
54 | }; | |
55 | ||
56 | /* Currently, only network backend w/o multiqueue is supported. */ | |
57 | #define VHOST_VDPA_VQ_MAX 2 | |
58 | ||
59 | #define VHOST_VDPA_DEV_MAX (1U << MINORBITS) | |
60 | ||
61 | struct vhost_vdpa { | |
62 | struct vhost_dev vdev; | |
63 | struct iommu_domain *domain; | |
64 | struct vhost_virtqueue *vqs; | |
65 | struct completion completion; | |
66 | struct vdpa_device *vdpa; | |
67 | struct device dev; | |
68 | struct cdev cdev; | |
69 | atomic_t opened; | |
70 | int nvqs; | |
71 | int virtio_id; | |
72 | int minor; | |
73 | }; | |
74 | ||
75 | static DEFINE_IDA(vhost_vdpa_ida); | |
76 | ||
77 | static dev_t vhost_vdpa_major; | |
78 | ||
79 | static const u64 vhost_vdpa_features[] = { | |
80 | [VIRTIO_ID_NET] = VHOST_VDPA_NET_FEATURES, | |
81 | }; | |
82 | ||
83 | static void handle_vq_kick(struct vhost_work *work) | |
84 | { | |
85 | struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue, | |
86 | poll.work); | |
87 | struct vhost_vdpa *v = container_of(vq->dev, struct vhost_vdpa, vdev); | |
88 | const struct vdpa_config_ops *ops = v->vdpa->config; | |
89 | ||
90 | ops->kick_vq(v->vdpa, vq - v->vqs); | |
91 | } | |
92 | ||
93 | static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) | |
94 | { | |
95 | struct vhost_virtqueue *vq = private; | |
96 | struct eventfd_ctx *call_ctx = vq->call_ctx; | |
97 | ||
98 | if (call_ctx) | |
99 | eventfd_signal(call_ctx, 1); | |
100 | ||
101 | return IRQ_HANDLED; | |
102 | } | |
103 | ||
104 | static void vhost_vdpa_reset(struct vhost_vdpa *v) | |
105 | { | |
106 | struct vdpa_device *vdpa = v->vdpa; | |
107 | const struct vdpa_config_ops *ops = vdpa->config; | |
108 | ||
109 | ops->set_status(vdpa, 0); | |
110 | } | |
111 | ||
112 | static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp) | |
113 | { | |
114 | struct vdpa_device *vdpa = v->vdpa; | |
115 | const struct vdpa_config_ops *ops = vdpa->config; | |
116 | u32 device_id; | |
117 | ||
118 | device_id = ops->get_device_id(vdpa); | |
119 | ||
120 | if (copy_to_user(argp, &device_id, sizeof(device_id))) | |
121 | return -EFAULT; | |
122 | ||
123 | return 0; | |
124 | } | |
125 | ||
126 | static long vhost_vdpa_get_status(struct vhost_vdpa *v, u8 __user *statusp) | |
127 | { | |
128 | struct vdpa_device *vdpa = v->vdpa; | |
129 | const struct vdpa_config_ops *ops = vdpa->config; | |
130 | u8 status; | |
131 | ||
132 | status = ops->get_status(vdpa); | |
133 | ||
134 | if (copy_to_user(statusp, &status, sizeof(status))) | |
135 | return -EFAULT; | |
136 | ||
137 | return 0; | |
138 | } | |
139 | ||
140 | static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) | |
141 | { | |
142 | struct vdpa_device *vdpa = v->vdpa; | |
143 | const struct vdpa_config_ops *ops = vdpa->config; | |
144 | u8 status; | |
145 | ||
146 | if (copy_from_user(&status, statusp, sizeof(status))) | |
147 | return -EFAULT; | |
148 | ||
149 | /* | |
150 | * Userspace shouldn't remove status bits unless reset the | |
151 | * status to 0. | |
152 | */ | |
153 | if (status != 0 && (ops->get_status(vdpa) & ~status) != 0) | |
154 | return -EINVAL; | |
155 | ||
156 | ops->set_status(vdpa, status); | |
157 | ||
158 | return 0; | |
159 | } | |
160 | ||
161 | static int vhost_vdpa_config_validate(struct vhost_vdpa *v, | |
162 | struct vhost_vdpa_config *c) | |
163 | { | |
164 | long size = 0; | |
165 | ||
166 | switch (v->virtio_id) { | |
167 | case VIRTIO_ID_NET: | |
168 | size = sizeof(struct virtio_net_config); | |
169 | break; | |
170 | } | |
171 | ||
172 | if (c->len == 0) | |
173 | return -EINVAL; | |
174 | ||
175 | if (c->len > size - c->off) | |
176 | return -E2BIG; | |
177 | ||
178 | return 0; | |
179 | } | |
180 | ||
181 | static long vhost_vdpa_get_config(struct vhost_vdpa *v, | |
182 | struct vhost_vdpa_config __user *c) | |
183 | { | |
184 | struct vdpa_device *vdpa = v->vdpa; | |
185 | const struct vdpa_config_ops *ops = vdpa->config; | |
186 | struct vhost_vdpa_config config; | |
187 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
188 | u8 *buf; | |
189 | ||
190 | if (copy_from_user(&config, c, size)) | |
191 | return -EFAULT; | |
192 | if (vhost_vdpa_config_validate(v, &config)) | |
193 | return -EINVAL; | |
194 | buf = kvzalloc(config.len, GFP_KERNEL); | |
195 | if (!buf) | |
196 | return -ENOMEM; | |
197 | ||
198 | ops->get_config(vdpa, config.off, buf, config.len); | |
199 | ||
200 | if (copy_to_user(c->buf, buf, config.len)) { | |
201 | kvfree(buf); | |
202 | return -EFAULT; | |
203 | } | |
204 | ||
205 | kvfree(buf); | |
206 | return 0; | |
207 | } | |
208 | ||
209 | static long vhost_vdpa_set_config(struct vhost_vdpa *v, | |
210 | struct vhost_vdpa_config __user *c) | |
211 | { | |
212 | struct vdpa_device *vdpa = v->vdpa; | |
213 | const struct vdpa_config_ops *ops = vdpa->config; | |
214 | struct vhost_vdpa_config config; | |
215 | unsigned long size = offsetof(struct vhost_vdpa_config, buf); | |
216 | u8 *buf; | |
217 | ||
218 | if (copy_from_user(&config, c, size)) | |
219 | return -EFAULT; | |
220 | if (vhost_vdpa_config_validate(v, &config)) | |
221 | return -EINVAL; | |
222 | buf = kvzalloc(config.len, GFP_KERNEL); | |
223 | if (!buf) | |
224 | return -ENOMEM; | |
225 | ||
226 | if (copy_from_user(buf, c->buf, config.len)) { | |
227 | kvfree(buf); | |
228 | return -EFAULT; | |
229 | } | |
230 | ||
231 | ops->set_config(vdpa, config.off, buf, config.len); | |
232 | ||
233 | kvfree(buf); | |
234 | return 0; | |
235 | } | |
236 | ||
237 | static long vhost_vdpa_get_features(struct vhost_vdpa *v, u64 __user *featurep) | |
238 | { | |
239 | struct vdpa_device *vdpa = v->vdpa; | |
240 | const struct vdpa_config_ops *ops = vdpa->config; | |
241 | u64 features; | |
242 | ||
243 | features = ops->get_features(vdpa); | |
244 | features &= vhost_vdpa_features[v->virtio_id]; | |
245 | ||
246 | if (copy_to_user(featurep, &features, sizeof(features))) | |
247 | return -EFAULT; | |
248 | ||
249 | return 0; | |
250 | } | |
251 | ||
252 | static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) | |
253 | { | |
254 | struct vdpa_device *vdpa = v->vdpa; | |
255 | const struct vdpa_config_ops *ops = vdpa->config; | |
256 | u64 features; | |
257 | ||
258 | /* | |
259 | * It's not allowed to change the features after they have | |
260 | * been negotiated. | |
261 | */ | |
262 | if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) | |
263 | return -EBUSY; | |
264 | ||
265 | if (copy_from_user(&features, featurep, sizeof(features))) | |
266 | return -EFAULT; | |
267 | ||
268 | if (features & ~vhost_vdpa_features[v->virtio_id]) | |
269 | return -EINVAL; | |
270 | ||
271 | if (ops->set_features(vdpa, features)) | |
272 | return -EINVAL; | |
273 | ||
274 | return 0; | |
275 | } | |
276 | ||
277 | static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) | |
278 | { | |
279 | struct vdpa_device *vdpa = v->vdpa; | |
280 | const struct vdpa_config_ops *ops = vdpa->config; | |
281 | u16 num; | |
282 | ||
283 | num = ops->get_vq_num_max(vdpa); | |
284 | ||
285 | if (copy_to_user(argp, &num, sizeof(num))) | |
286 | return -EFAULT; | |
287 | ||
288 | return 0; | |
289 | } | |
290 | ||
291 | static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, | |
292 | void __user *argp) | |
293 | { | |
294 | struct vdpa_device *vdpa = v->vdpa; | |
295 | const struct vdpa_config_ops *ops = vdpa->config; | |
296 | struct vdpa_callback cb; | |
297 | struct vhost_virtqueue *vq; | |
298 | struct vhost_vring_state s; | |
299 | u8 status; | |
300 | u32 idx; | |
301 | long r; | |
302 | ||
303 | r = get_user(idx, (u32 __user *)argp); | |
304 | if (r < 0) | |
305 | return r; | |
306 | ||
307 | if (idx >= v->nvqs) | |
308 | return -ENOBUFS; | |
309 | ||
310 | idx = array_index_nospec(idx, v->nvqs); | |
311 | vq = &v->vqs[idx]; | |
312 | ||
313 | status = ops->get_status(vdpa); | |
314 | ||
315 | if (cmd == VHOST_VDPA_SET_VRING_ENABLE) { | |
316 | if (copy_from_user(&s, argp, sizeof(s))) | |
317 | return -EFAULT; | |
318 | ops->set_vq_ready(vdpa, idx, s.num); | |
319 | return 0; | |
320 | } | |
321 | ||
322 | if (cmd == VHOST_GET_VRING_BASE) | |
323 | vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx); | |
324 | ||
325 | r = vhost_vring_ioctl(&v->vdev, cmd, argp); | |
326 | if (r) | |
327 | return r; | |
328 | ||
329 | switch (cmd) { | |
330 | case VHOST_SET_VRING_ADDR: | |
331 | if (ops->set_vq_address(vdpa, idx, | |
332 | (u64)(uintptr_t)vq->desc, | |
333 | (u64)(uintptr_t)vq->avail, | |
334 | (u64)(uintptr_t)vq->used)) | |
335 | r = -EINVAL; | |
336 | break; | |
337 | ||
338 | case VHOST_SET_VRING_BASE: | |
339 | if (ops->set_vq_state(vdpa, idx, vq->last_avail_idx)) | |
340 | r = -EINVAL; | |
341 | break; | |
342 | ||
343 | case VHOST_SET_VRING_CALL: | |
344 | if (vq->call_ctx) { | |
345 | cb.callback = vhost_vdpa_virtqueue_cb; | |
346 | cb.private = vq; | |
347 | } else { | |
348 | cb.callback = NULL; | |
349 | cb.private = NULL; | |
350 | } | |
351 | ops->set_vq_cb(vdpa, idx, &cb); | |
352 | break; | |
353 | ||
354 | case VHOST_SET_VRING_NUM: | |
355 | ops->set_vq_num(vdpa, idx, vq->num); | |
356 | break; | |
357 | } | |
358 | ||
359 | return r; | |
360 | } | |
361 | ||
362 | static long vhost_vdpa_unlocked_ioctl(struct file *filep, | |
363 | unsigned int cmd, unsigned long arg) | |
364 | { | |
365 | struct vhost_vdpa *v = filep->private_data; | |
366 | struct vhost_dev *d = &v->vdev; | |
367 | void __user *argp = (void __user *)arg; | |
368 | long r; | |
369 | ||
370 | mutex_lock(&d->mutex); | |
371 | ||
372 | switch (cmd) { | |
373 | case VHOST_VDPA_GET_DEVICE_ID: | |
374 | r = vhost_vdpa_get_device_id(v, argp); | |
375 | break; | |
376 | case VHOST_VDPA_GET_STATUS: | |
377 | r = vhost_vdpa_get_status(v, argp); | |
378 | break; | |
379 | case VHOST_VDPA_SET_STATUS: | |
380 | r = vhost_vdpa_set_status(v, argp); | |
381 | break; | |
382 | case VHOST_VDPA_GET_CONFIG: | |
383 | r = vhost_vdpa_get_config(v, argp); | |
384 | break; | |
385 | case VHOST_VDPA_SET_CONFIG: | |
386 | r = vhost_vdpa_set_config(v, argp); | |
387 | break; | |
388 | case VHOST_GET_FEATURES: | |
389 | r = vhost_vdpa_get_features(v, argp); | |
390 | break; | |
391 | case VHOST_SET_FEATURES: | |
392 | r = vhost_vdpa_set_features(v, argp); | |
393 | break; | |
394 | case VHOST_VDPA_GET_VRING_NUM: | |
395 | r = vhost_vdpa_get_vring_num(v, argp); | |
396 | break; | |
397 | case VHOST_SET_LOG_BASE: | |
398 | case VHOST_SET_LOG_FD: | |
399 | r = -ENOIOCTLCMD; | |
400 | break; | |
401 | default: | |
402 | r = vhost_dev_ioctl(&v->vdev, cmd, argp); | |
403 | if (r == -ENOIOCTLCMD) | |
404 | r = vhost_vdpa_vring_ioctl(v, cmd, argp); | |
405 | break; | |
406 | } | |
407 | ||
408 | mutex_unlock(&d->mutex); | |
409 | return r; | |
410 | } | |
411 | ||
412 | static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last) | |
413 | { | |
414 | struct vhost_dev *dev = &v->vdev; | |
415 | struct vhost_iotlb *iotlb = dev->iotlb; | |
416 | struct vhost_iotlb_map *map; | |
417 | struct page *page; | |
418 | unsigned long pfn, pinned; | |
419 | ||
420 | while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) { | |
421 | pinned = map->size >> PAGE_SHIFT; | |
422 | for (pfn = map->addr >> PAGE_SHIFT; | |
423 | pinned > 0; pfn++, pinned--) { | |
424 | page = pfn_to_page(pfn); | |
425 | if (map->perm & VHOST_ACCESS_WO) | |
426 | set_page_dirty_lock(page); | |
427 | unpin_user_page(page); | |
428 | } | |
429 | atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm); | |
430 | vhost_iotlb_map_free(iotlb, map); | |
431 | } | |
432 | } | |
433 | ||
434 | static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v) | |
435 | { | |
436 | struct vhost_dev *dev = &v->vdev; | |
437 | ||
438 | vhost_vdpa_iotlb_unmap(v, 0ULL, 0ULL - 1); | |
439 | kfree(dev->iotlb); | |
440 | dev->iotlb = NULL; | |
441 | } | |
442 | ||
443 | static int perm_to_iommu_flags(u32 perm) | |
444 | { | |
445 | int flags = 0; | |
446 | ||
447 | switch (perm) { | |
448 | case VHOST_ACCESS_WO: | |
449 | flags |= IOMMU_WRITE; | |
450 | break; | |
451 | case VHOST_ACCESS_RO: | |
452 | flags |= IOMMU_READ; | |
453 | break; | |
454 | case VHOST_ACCESS_RW: | |
455 | flags |= (IOMMU_WRITE | IOMMU_READ); | |
456 | break; | |
457 | default: | |
458 | WARN(1, "invalidate vhost IOTLB permission\n"); | |
459 | break; | |
460 | } | |
461 | ||
462 | return flags | IOMMU_CACHE; | |
463 | } | |
464 | ||
465 | static int vhost_vdpa_map(struct vhost_vdpa *v, | |
466 | u64 iova, u64 size, u64 pa, u32 perm) | |
467 | { | |
468 | struct vhost_dev *dev = &v->vdev; | |
469 | struct vdpa_device *vdpa = v->vdpa; | |
470 | const struct vdpa_config_ops *ops = vdpa->config; | |
471 | int r = 0; | |
472 | ||
473 | r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1, | |
474 | pa, perm); | |
475 | if (r) | |
476 | return r; | |
477 | ||
478 | if (ops->dma_map) | |
479 | r = ops->dma_map(vdpa, iova, size, pa, perm); | |
480 | else if (ops->set_map) | |
481 | r = ops->set_map(vdpa, dev->iotlb); | |
482 | else | |
483 | r = iommu_map(v->domain, iova, pa, size, | |
484 | perm_to_iommu_flags(perm)); | |
485 | ||
486 | return r; | |
487 | } | |
488 | ||
489 | static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size) | |
490 | { | |
491 | struct vhost_dev *dev = &v->vdev; | |
492 | struct vdpa_device *vdpa = v->vdpa; | |
493 | const struct vdpa_config_ops *ops = vdpa->config; | |
494 | ||
495 | vhost_vdpa_iotlb_unmap(v, iova, iova + size - 1); | |
496 | ||
497 | if (ops->dma_map) | |
498 | ops->dma_unmap(vdpa, iova, size); | |
499 | else if (ops->set_map) | |
500 | ops->set_map(vdpa, dev->iotlb); | |
501 | else | |
502 | iommu_unmap(v->domain, iova, size); | |
503 | } | |
504 | ||
505 | static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, | |
506 | struct vhost_iotlb_msg *msg) | |
507 | { | |
508 | struct vhost_dev *dev = &v->vdev; | |
509 | struct vhost_iotlb *iotlb = dev->iotlb; | |
510 | struct page **page_list; | |
511 | unsigned long list_size = PAGE_SIZE / sizeof(struct page *); | |
512 | unsigned int gup_flags = FOLL_LONGTERM; | |
513 | unsigned long npages, cur_base, map_pfn, last_pfn = 0; | |
514 | unsigned long locked, lock_limit, pinned, i; | |
515 | u64 iova = msg->iova; | |
516 | int ret = 0; | |
517 | ||
518 | if (vhost_iotlb_itree_first(iotlb, msg->iova, | |
519 | msg->iova + msg->size - 1)) | |
520 | return -EEXIST; | |
521 | ||
522 | page_list = (struct page **) __get_free_page(GFP_KERNEL); | |
523 | if (!page_list) | |
524 | return -ENOMEM; | |
525 | ||
526 | if (msg->perm & VHOST_ACCESS_WO) | |
527 | gup_flags |= FOLL_WRITE; | |
528 | ||
529 | npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT; | |
530 | if (!npages) | |
531 | return -EINVAL; | |
532 | ||
533 | down_read(&dev->mm->mmap_sem); | |
534 | ||
535 | locked = atomic64_add_return(npages, &dev->mm->pinned_vm); | |
536 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; | |
537 | ||
538 | if (locked > lock_limit) { | |
539 | ret = -ENOMEM; | |
540 | goto out; | |
541 | } | |
542 | ||
543 | cur_base = msg->uaddr & PAGE_MASK; | |
544 | iova &= PAGE_MASK; | |
545 | ||
546 | while (npages) { | |
547 | pinned = min_t(unsigned long, npages, list_size); | |
548 | ret = pin_user_pages(cur_base, pinned, | |
549 | gup_flags, page_list, NULL); | |
550 | if (ret != pinned) | |
551 | goto out; | |
552 | ||
553 | if (!last_pfn) | |
554 | map_pfn = page_to_pfn(page_list[0]); | |
555 | ||
556 | for (i = 0; i < ret; i++) { | |
557 | unsigned long this_pfn = page_to_pfn(page_list[i]); | |
558 | u64 csize; | |
559 | ||
560 | if (last_pfn && (this_pfn != last_pfn + 1)) { | |
561 | /* Pin a contiguous chunk of memory */ | |
562 | csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT; | |
563 | if (vhost_vdpa_map(v, iova, csize, | |
564 | map_pfn << PAGE_SHIFT, | |
565 | msg->perm)) | |
566 | goto out; | |
567 | map_pfn = this_pfn; | |
568 | iova += csize; | |
569 | } | |
570 | ||
571 | last_pfn = this_pfn; | |
572 | } | |
573 | ||
574 | cur_base += ret << PAGE_SHIFT; | |
575 | npages -= ret; | |
576 | } | |
577 | ||
578 | /* Pin the rest chunk */ | |
579 | ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT, | |
580 | map_pfn << PAGE_SHIFT, msg->perm); | |
581 | out: | |
582 | if (ret) { | |
583 | vhost_vdpa_unmap(v, msg->iova, msg->size); | |
584 | atomic64_sub(npages, &dev->mm->pinned_vm); | |
585 | } | |
586 | up_read(&dev->mm->mmap_sem); | |
587 | free_page((unsigned long)page_list); | |
588 | return ret; | |
589 | } | |
590 | ||
591 | static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev, | |
592 | struct vhost_iotlb_msg *msg) | |
593 | { | |
594 | struct vhost_vdpa *v = container_of(dev, struct vhost_vdpa, vdev); | |
595 | int r = 0; | |
596 | ||
597 | r = vhost_dev_check_owner(dev); | |
598 | if (r) | |
599 | return r; | |
600 | ||
601 | switch (msg->type) { | |
602 | case VHOST_IOTLB_UPDATE: | |
603 | r = vhost_vdpa_process_iotlb_update(v, msg); | |
604 | break; | |
605 | case VHOST_IOTLB_INVALIDATE: | |
606 | vhost_vdpa_unmap(v, msg->iova, msg->size); | |
607 | break; | |
608 | default: | |
609 | r = -EINVAL; | |
610 | break; | |
611 | } | |
612 | ||
613 | return r; | |
614 | } | |
615 | ||
616 | static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, | |
617 | struct iov_iter *from) | |
618 | { | |
619 | struct file *file = iocb->ki_filp; | |
620 | struct vhost_vdpa *v = file->private_data; | |
621 | struct vhost_dev *dev = &v->vdev; | |
622 | ||
623 | return vhost_chr_write_iter(dev, from); | |
624 | } | |
625 | ||
626 | static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) | |
627 | { | |
628 | struct vdpa_device *vdpa = v->vdpa; | |
629 | const struct vdpa_config_ops *ops = vdpa->config; | |
630 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
631 | struct bus_type *bus; | |
632 | int ret; | |
633 | ||
634 | /* Device want to do DMA by itself */ | |
635 | if (ops->set_map || ops->dma_map) | |
636 | return 0; | |
637 | ||
638 | bus = dma_dev->bus; | |
639 | if (!bus) | |
640 | return -EFAULT; | |
641 | ||
642 | if (!iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) | |
643 | return -ENOTSUPP; | |
644 | ||
645 | v->domain = iommu_domain_alloc(bus); | |
646 | if (!v->domain) | |
647 | return -EIO; | |
648 | ||
649 | ret = iommu_attach_device(v->domain, dma_dev); | |
650 | if (ret) | |
651 | goto err_attach; | |
652 | ||
653 | return 0; | |
654 | ||
655 | err_attach: | |
656 | iommu_domain_free(v->domain); | |
657 | return ret; | |
658 | } | |
659 | ||
660 | static void vhost_vdpa_free_domain(struct vhost_vdpa *v) | |
661 | { | |
662 | struct vdpa_device *vdpa = v->vdpa; | |
663 | struct device *dma_dev = vdpa_get_dma_dev(vdpa); | |
664 | ||
665 | if (v->domain) { | |
666 | iommu_detach_device(v->domain, dma_dev); | |
667 | iommu_domain_free(v->domain); | |
668 | } | |
669 | ||
670 | v->domain = NULL; | |
671 | } | |
672 | ||
673 | static int vhost_vdpa_open(struct inode *inode, struct file *filep) | |
674 | { | |
675 | struct vhost_vdpa *v; | |
676 | struct vhost_dev *dev; | |
677 | struct vhost_virtqueue **vqs; | |
678 | int nvqs, i, r, opened; | |
679 | ||
680 | v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); | |
681 | if (!v) | |
682 | return -ENODEV; | |
683 | ||
684 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
685 | if (opened) | |
686 | return -EBUSY; | |
687 | ||
688 | nvqs = v->nvqs; | |
689 | vhost_vdpa_reset(v); | |
690 | ||
691 | vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL); | |
692 | if (!vqs) { | |
693 | r = -ENOMEM; | |
694 | goto err; | |
695 | } | |
696 | ||
697 | dev = &v->vdev; | |
698 | for (i = 0; i < nvqs; i++) { | |
699 | vqs[i] = &v->vqs[i]; | |
700 | vqs[i]->handle_kick = handle_vq_kick; | |
701 | } | |
702 | vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, | |
703 | vhost_vdpa_process_iotlb_msg); | |
704 | ||
705 | dev->iotlb = vhost_iotlb_alloc(0, 0); | |
706 | if (!dev->iotlb) { | |
707 | r = -ENOMEM; | |
708 | goto err_init_iotlb; | |
709 | } | |
710 | ||
711 | r = vhost_vdpa_alloc_domain(v); | |
712 | if (r) | |
713 | goto err_init_iotlb; | |
714 | ||
715 | filep->private_data = v; | |
716 | ||
717 | return 0; | |
718 | ||
719 | err_init_iotlb: | |
720 | vhost_dev_cleanup(&v->vdev); | |
721 | err: | |
722 | atomic_dec(&v->opened); | |
723 | return r; | |
724 | } | |
725 | ||
726 | static int vhost_vdpa_release(struct inode *inode, struct file *filep) | |
727 | { | |
728 | struct vhost_vdpa *v = filep->private_data; | |
729 | struct vhost_dev *d = &v->vdev; | |
730 | ||
731 | mutex_lock(&d->mutex); | |
732 | filep->private_data = NULL; | |
733 | vhost_vdpa_reset(v); | |
734 | vhost_dev_stop(&v->vdev); | |
735 | vhost_vdpa_iotlb_free(v); | |
736 | vhost_vdpa_free_domain(v); | |
737 | vhost_dev_cleanup(&v->vdev); | |
738 | kfree(v->vdev.vqs); | |
739 | mutex_unlock(&d->mutex); | |
740 | ||
741 | atomic_dec(&v->opened); | |
742 | complete(&v->completion); | |
743 | ||
744 | return 0; | |
745 | } | |
746 | ||
747 | static const struct file_operations vhost_vdpa_fops = { | |
748 | .owner = THIS_MODULE, | |
749 | .open = vhost_vdpa_open, | |
750 | .release = vhost_vdpa_release, | |
751 | .write_iter = vhost_vdpa_chr_write_iter, | |
752 | .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, | |
753 | .compat_ioctl = compat_ptr_ioctl, | |
754 | }; | |
755 | ||
756 | static void vhost_vdpa_release_dev(struct device *device) | |
757 | { | |
758 | struct vhost_vdpa *v = | |
759 | container_of(device, struct vhost_vdpa, dev); | |
760 | ||
761 | ida_simple_remove(&vhost_vdpa_ida, v->minor); | |
762 | kfree(v->vqs); | |
763 | kfree(v); | |
764 | } | |
765 | ||
766 | static int vhost_vdpa_probe(struct vdpa_device *vdpa) | |
767 | { | |
768 | const struct vdpa_config_ops *ops = vdpa->config; | |
769 | struct vhost_vdpa *v; | |
770 | int minor, nvqs = VHOST_VDPA_VQ_MAX; | |
771 | int r; | |
772 | ||
773 | /* Currently, we only accept the network devices. */ | |
774 | if (ops->get_device_id(vdpa) != VIRTIO_ID_NET) | |
775 | return -ENOTSUPP; | |
776 | ||
777 | v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL); | |
778 | if (!v) | |
779 | return -ENOMEM; | |
780 | ||
781 | minor = ida_simple_get(&vhost_vdpa_ida, 0, | |
782 | VHOST_VDPA_DEV_MAX, GFP_KERNEL); | |
783 | if (minor < 0) { | |
784 | kfree(v); | |
785 | return minor; | |
786 | } | |
787 | ||
788 | atomic_set(&v->opened, 0); | |
789 | v->minor = minor; | |
790 | v->vdpa = vdpa; | |
791 | v->nvqs = nvqs; | |
792 | v->virtio_id = ops->get_device_id(vdpa); | |
793 | ||
794 | device_initialize(&v->dev); | |
795 | v->dev.release = vhost_vdpa_release_dev; | |
796 | v->dev.parent = &vdpa->dev; | |
797 | v->dev.devt = MKDEV(MAJOR(vhost_vdpa_major), minor); | |
798 | v->vqs = kmalloc_array(nvqs, sizeof(struct vhost_virtqueue), | |
799 | GFP_KERNEL); | |
800 | if (!v->vqs) { | |
801 | r = -ENOMEM; | |
802 | goto err; | |
803 | } | |
804 | ||
805 | r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); | |
806 | if (r) | |
807 | goto err; | |
808 | ||
809 | cdev_init(&v->cdev, &vhost_vdpa_fops); | |
810 | v->cdev.owner = THIS_MODULE; | |
811 | ||
812 | r = cdev_device_add(&v->cdev, &v->dev); | |
813 | if (r) | |
814 | goto err; | |
815 | ||
816 | init_completion(&v->completion); | |
817 | vdpa_set_drvdata(vdpa, v); | |
818 | ||
819 | return 0; | |
820 | ||
821 | err: | |
822 | put_device(&v->dev); | |
823 | return r; | |
824 | } | |
825 | ||
826 | static void vhost_vdpa_remove(struct vdpa_device *vdpa) | |
827 | { | |
828 | struct vhost_vdpa *v = vdpa_get_drvdata(vdpa); | |
829 | int opened; | |
830 | ||
831 | cdev_device_del(&v->cdev, &v->dev); | |
832 | ||
833 | do { | |
834 | opened = atomic_cmpxchg(&v->opened, 0, 1); | |
835 | if (!opened) | |
836 | break; | |
837 | wait_for_completion(&v->completion); | |
838 | } while (1); | |
839 | ||
840 | put_device(&v->dev); | |
841 | } | |
842 | ||
843 | static struct vdpa_driver vhost_vdpa_driver = { | |
844 | .driver = { | |
845 | .name = "vhost_vdpa", | |
846 | }, | |
847 | .probe = vhost_vdpa_probe, | |
848 | .remove = vhost_vdpa_remove, | |
849 | }; | |
850 | ||
851 | static int __init vhost_vdpa_init(void) | |
852 | { | |
853 | int r; | |
854 | ||
855 | r = alloc_chrdev_region(&vhost_vdpa_major, 0, VHOST_VDPA_DEV_MAX, | |
856 | "vhost-vdpa"); | |
857 | if (r) | |
858 | goto err_alloc_chrdev; | |
859 | ||
860 | r = vdpa_register_driver(&vhost_vdpa_driver); | |
861 | if (r) | |
862 | goto err_vdpa_register_driver; | |
863 | ||
864 | return 0; | |
865 | ||
866 | err_vdpa_register_driver: | |
867 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
868 | err_alloc_chrdev: | |
869 | return r; | |
870 | } | |
871 | module_init(vhost_vdpa_init); | |
872 | ||
873 | static void __exit vhost_vdpa_exit(void) | |
874 | { | |
875 | vdpa_unregister_driver(&vhost_vdpa_driver); | |
876 | unregister_chrdev_region(vhost_vdpa_major, VHOST_VDPA_DEV_MAX); | |
877 | } | |
878 | module_exit(vhost_vdpa_exit); | |
879 | ||
880 | MODULE_VERSION("0.0.1"); | |
881 | MODULE_LICENSE("GPL v2"); | |
882 | MODULE_AUTHOR("Intel Corporation"); | |
883 | MODULE_DESCRIPTION("vDPA-based vhost backend for virtio"); |