]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/misc/mic/vop/vop_vringh.c
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / mic / vop / vop_vringh.c
CommitLineData
61e9c905
SD
1/*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2016 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * The full GNU General Public License is included in this distribution in
16 * the file called "COPYING".
17 *
18 * Intel Virtio Over PCIe (VOP) driver.
19 *
20 */
21#include <linux/sched.h>
22#include <linux/poll.h>
23#include <linux/dma-mapping.h>
24
25#include <linux/mic_common.h>
26#include "../common/mic_dev.h"
27
28#include <linux/mic_ioctl.h>
29#include "vop_main.h"
30
31/* Helper API to obtain the VOP PCIe device */
32static inline struct device *vop_dev(struct vop_vdev *vdev)
33{
34 return vdev->vpdev->dev.parent;
35}
36
37/* Helper API to check if a virtio device is initialized */
38static inline int vop_vdev_inited(struct vop_vdev *vdev)
39{
40 if (!vdev)
41 return -EINVAL;
42 /* Device has not been created yet */
43 if (!vdev->dd || !vdev->dd->type) {
44 dev_err(vop_dev(vdev), "%s %d err %d\n",
45 __func__, __LINE__, -EINVAL);
46 return -EINVAL;
47 }
48 /* Device has been removed/deleted */
49 if (vdev->dd->type == -1) {
50 dev_dbg(vop_dev(vdev), "%s %d err %d\n",
51 __func__, __LINE__, -ENODEV);
52 return -ENODEV;
53 }
54 return 0;
55}
56
57static void _vop_notify(struct vringh *vrh)
58{
59 struct vop_vringh *vvrh = container_of(vrh, struct vop_vringh, vrh);
60 struct vop_vdev *vdev = vvrh->vdev;
61 struct vop_device *vpdev = vdev->vpdev;
62 s8 db = vdev->dc->h2c_vdev_db;
63
64 if (db != -1)
65 vpdev->hw_ops->send_intr(vpdev, db);
66}
67
68static void vop_virtio_init_post(struct vop_vdev *vdev)
69{
70 struct mic_vqconfig *vqconfig = mic_vq_config(vdev->dd);
71 struct vop_device *vpdev = vdev->vpdev;
72 int i, used_size;
73
74 for (i = 0; i < vdev->dd->num_vq; i++) {
75 used_size = PAGE_ALIGN(sizeof(u16) * 3 +
76 sizeof(struct vring_used_elem) *
77 le16_to_cpu(vqconfig->num));
78 if (!le64_to_cpu(vqconfig[i].used_address)) {
79 dev_warn(vop_dev(vdev), "used_address zero??\n");
80 continue;
81 }
82 vdev->vvr[i].vrh.vring.used =
83 (void __force *)vpdev->hw_ops->ioremap(
84 vpdev,
85 le64_to_cpu(vqconfig[i].used_address),
86 used_size);
87 }
88
89 vdev->dc->used_address_updated = 0;
90
91 dev_info(vop_dev(vdev), "%s: device type %d LINKUP\n",
92 __func__, vdev->virtio_id);
93}
94
95static inline void vop_virtio_device_reset(struct vop_vdev *vdev)
96{
97 int i;
98
99 dev_dbg(vop_dev(vdev), "%s: status %d device type %d RESET\n",
100 __func__, vdev->dd->status, vdev->virtio_id);
101
102 for (i = 0; i < vdev->dd->num_vq; i++)
103 /*
104 * Avoid lockdep false positive. The + 1 is for the vop
105 * mutex which is held in the reset devices code path.
106 */
107 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
108
109 /* 0 status means "reset" */
110 vdev->dd->status = 0;
111 vdev->dc->vdev_reset = 0;
112 vdev->dc->host_ack = 1;
113
114 for (i = 0; i < vdev->dd->num_vq; i++) {
115 struct vringh *vrh = &vdev->vvr[i].vrh;
116
117 vdev->vvr[i].vring.info->avail_idx = 0;
118 vrh->completed = 0;
119 vrh->last_avail_idx = 0;
120 vrh->last_used_idx = 0;
121 }
122
123 for (i = 0; i < vdev->dd->num_vq; i++)
124 mutex_unlock(&vdev->vvr[i].vr_mutex);
125}
126
127static void vop_virtio_reset_devices(struct vop_info *vi)
128{
129 struct list_head *pos, *tmp;
130 struct vop_vdev *vdev;
131
132 list_for_each_safe(pos, tmp, &vi->vdev_list) {
133 vdev = list_entry(pos, struct vop_vdev, list);
134 vop_virtio_device_reset(vdev);
135 vdev->poll_wake = 1;
136 wake_up(&vdev->waitq);
137 }
138}
139
140static void vop_bh_handler(struct work_struct *work)
141{
142 struct vop_vdev *vdev = container_of(work, struct vop_vdev,
143 virtio_bh_work);
144
145 if (vdev->dc->used_address_updated)
146 vop_virtio_init_post(vdev);
147
148 if (vdev->dc->vdev_reset)
149 vop_virtio_device_reset(vdev);
150
151 vdev->poll_wake = 1;
152 wake_up(&vdev->waitq);
153}
154
155static irqreturn_t _vop_virtio_intr_handler(int irq, void *data)
156{
157 struct vop_vdev *vdev = data;
158 struct vop_device *vpdev = vdev->vpdev;
159
160 vpdev->hw_ops->ack_interrupt(vpdev, vdev->virtio_db);
161 schedule_work(&vdev->virtio_bh_work);
162 return IRQ_HANDLED;
163}
164
165static int vop_virtio_config_change(struct vop_vdev *vdev, void *argp)
166{
167 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
168 int ret = 0, retry, i;
169 struct vop_device *vpdev = vdev->vpdev;
170 struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
171 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
172 s8 db = bootparam->h2c_config_db;
173
174 mutex_lock(&vi->vop_mutex);
175 for (i = 0; i < vdev->dd->num_vq; i++)
176 mutex_lock_nested(&vdev->vvr[i].vr_mutex, i + 1);
177
178 if (db == -1 || vdev->dd->type == -1) {
179 ret = -EIO;
180 goto exit;
181 }
182
183 memcpy(mic_vq_configspace(vdev->dd), argp, vdev->dd->config_len);
184 vdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
185 vpdev->hw_ops->send_intr(vpdev, db);
186
187 for (retry = 100; retry--;) {
188 ret = wait_event_timeout(wake, vdev->dc->guest_ack,
189 msecs_to_jiffies(100));
190 if (ret)
191 break;
192 }
193
194 dev_dbg(vop_dev(vdev),
195 "%s %d retry: %d\n", __func__, __LINE__, retry);
196 vdev->dc->config_change = 0;
197 vdev->dc->guest_ack = 0;
198exit:
199 for (i = 0; i < vdev->dd->num_vq; i++)
200 mutex_unlock(&vdev->vvr[i].vr_mutex);
201 mutex_unlock(&vi->vop_mutex);
202 return ret;
203}
204
205static int vop_copy_dp_entry(struct vop_vdev *vdev,
206 struct mic_device_desc *argp, __u8 *type,
207 struct mic_device_desc **devpage)
208{
209 struct vop_device *vpdev = vdev->vpdev;
210 struct mic_device_desc *devp;
211 struct mic_vqconfig *vqconfig;
212 int ret = 0, i;
213 bool slot_found = false;
214
215 vqconfig = mic_vq_config(argp);
216 for (i = 0; i < argp->num_vq; i++) {
217 if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
218 ret = -EINVAL;
219 dev_err(vop_dev(vdev), "%s %d err %d\n",
220 __func__, __LINE__, ret);
221 goto exit;
222 }
223 }
224
225 /* Find the first free device page entry */
226 for (i = sizeof(struct mic_bootparam);
227 i < MIC_DP_SIZE - mic_total_desc_size(argp);
228 i += mic_total_desc_size(devp)) {
229 devp = vpdev->hw_ops->get_dp(vpdev) + i;
230 if (devp->type == 0 || devp->type == -1) {
231 slot_found = true;
232 break;
233 }
234 }
235 if (!slot_found) {
236 ret = -EINVAL;
237 dev_err(vop_dev(vdev), "%s %d err %d\n",
238 __func__, __LINE__, ret);
239 goto exit;
240 }
241 /*
242 * Save off the type before doing the memcpy. Type will be set in the
243 * end after completing all initialization for the new device.
244 */
245 *type = argp->type;
246 argp->type = 0;
247 memcpy(devp, argp, mic_desc_size(argp));
248
249 *devpage = devp;
250exit:
251 return ret;
252}
253
254static void vop_init_device_ctrl(struct vop_vdev *vdev,
255 struct mic_device_desc *devpage)
256{
257 struct mic_device_ctrl *dc;
258
259 dc = (void *)devpage + mic_aligned_desc_size(devpage);
260
261 dc->config_change = 0;
262 dc->guest_ack = 0;
263 dc->vdev_reset = 0;
264 dc->host_ack = 0;
265 dc->used_address_updated = 0;
266 dc->c2h_vdev_db = -1;
267 dc->h2c_vdev_db = -1;
268 vdev->dc = dc;
269}
270
271static int vop_virtio_add_device(struct vop_vdev *vdev,
272 struct mic_device_desc *argp)
273{
274 struct vop_info *vi = vdev->vi;
275 struct vop_device *vpdev = vi->vpdev;
276 struct mic_device_desc *dd = NULL;
277 struct mic_vqconfig *vqconfig;
278 int vr_size, i, j, ret;
279 u8 type = 0;
280 s8 db = -1;
281 char irqname[16];
282 struct mic_bootparam *bootparam;
283 u16 num;
284 dma_addr_t vr_addr;
285
286 bootparam = vpdev->hw_ops->get_dp(vpdev);
287 init_waitqueue_head(&vdev->waitq);
288 INIT_LIST_HEAD(&vdev->list);
289 vdev->vpdev = vpdev;
290
291 ret = vop_copy_dp_entry(vdev, argp, &type, &dd);
292 if (ret) {
61e9c905
SD
293 dev_err(vop_dev(vdev), "%s %d err %d\n",
294 __func__, __LINE__, ret);
295 return ret;
296 }
297
298 vop_init_device_ctrl(vdev, dd);
299
300 vdev->dd = dd;
301 vdev->virtio_id = type;
302 vqconfig = mic_vq_config(dd);
303 INIT_WORK(&vdev->virtio_bh_work, vop_bh_handler);
304
305 for (i = 0; i < dd->num_vq; i++) {
306 struct vop_vringh *vvr = &vdev->vvr[i];
307 struct mic_vring *vr = &vdev->vvr[i].vring;
308
309 num = le16_to_cpu(vqconfig[i].num);
310 mutex_init(&vvr->vr_mutex);
311 vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
312 sizeof(struct _mic_vring_info));
313 vr->va = (void *)
314 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
315 get_order(vr_size));
316 if (!vr->va) {
317 ret = -ENOMEM;
318 dev_err(vop_dev(vdev), "%s %d err %d\n",
319 __func__, __LINE__, ret);
320 goto err;
321 }
322 vr->len = vr_size;
323 vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
324 vr->info->magic = cpu_to_le32(MIC_MAGIC + vdev->virtio_id + i);
325 vr_addr = dma_map_single(&vpdev->dev, vr->va, vr_size,
326 DMA_BIDIRECTIONAL);
327 if (dma_mapping_error(&vpdev->dev, vr_addr)) {
328 free_pages((unsigned long)vr->va, get_order(vr_size));
329 ret = -ENOMEM;
330 dev_err(vop_dev(vdev), "%s %d err %d\n",
331 __func__, __LINE__, ret);
332 goto err;
333 }
334 vqconfig[i].address = cpu_to_le64(vr_addr);
335
336 vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
337 ret = vringh_init_kern(&vvr->vrh,
338 *(u32 *)mic_vq_features(vdev->dd),
339 num, false, vr->vr.desc, vr->vr.avail,
340 vr->vr.used);
341 if (ret) {
342 dev_err(vop_dev(vdev), "%s %d err %d\n",
343 __func__, __LINE__, ret);
344 goto err;
345 }
346 vringh_kiov_init(&vvr->riov, NULL, 0);
347 vringh_kiov_init(&vvr->wiov, NULL, 0);
348 vvr->head = USHRT_MAX;
349 vvr->vdev = vdev;
350 vvr->vrh.notify = _vop_notify;
351 dev_dbg(&vpdev->dev,
352 "%s %d index %d va %p info %p vr_size 0x%x\n",
353 __func__, __LINE__, i, vr->va, vr->info, vr_size);
354 vvr->buf = (void *)__get_free_pages(GFP_KERNEL,
355 get_order(VOP_INT_DMA_BUF_SIZE));
356 vvr->buf_da = dma_map_single(&vpdev->dev,
357 vvr->buf, VOP_INT_DMA_BUF_SIZE,
358 DMA_BIDIRECTIONAL);
359 }
360
361 snprintf(irqname, sizeof(irqname), "vop%dvirtio%d", vpdev->index,
362 vdev->virtio_id);
363 vdev->virtio_db = vpdev->hw_ops->next_db(vpdev);
364 vdev->virtio_cookie = vpdev->hw_ops->request_irq(vpdev,
365 _vop_virtio_intr_handler, irqname, vdev,
366 vdev->virtio_db);
367 if (IS_ERR(vdev->virtio_cookie)) {
368 ret = PTR_ERR(vdev->virtio_cookie);
369 dev_dbg(&vpdev->dev, "request irq failed\n");
370 goto err;
371 }
372
373 vdev->dc->c2h_vdev_db = vdev->virtio_db;
374
375 /*
376 * Order the type update with previous stores. This write barrier
377 * is paired with the corresponding read barrier before the uncached
378 * system memory read of the type, on the card while scanning the
379 * device page.
380 */
381 smp_wmb();
382 dd->type = type;
383 argp->type = type;
384
385 if (bootparam) {
386 db = bootparam->h2c_config_db;
387 if (db != -1)
388 vpdev->hw_ops->send_intr(vpdev, db);
389 }
390 dev_dbg(&vpdev->dev, "Added virtio id %d db %d\n", dd->type, db);
391 return 0;
392err:
393 vqconfig = mic_vq_config(dd);
394 for (j = 0; j < i; j++) {
395 struct vop_vringh *vvr = &vdev->vvr[j];
396
397 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[j].address),
398 vvr->vring.len, DMA_BIDIRECTIONAL);
399 free_pages((unsigned long)vvr->vring.va,
400 get_order(vvr->vring.len));
401 }
402 return ret;
403}
404
405static void vop_dev_remove(struct vop_info *pvi, struct mic_device_ctrl *devp,
406 struct vop_device *vpdev)
407{
408 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
409 s8 db;
410 int ret, retry;
411 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
412
413 devp->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
414 db = bootparam->h2c_config_db;
415 if (db != -1)
416 vpdev->hw_ops->send_intr(vpdev, db);
417 else
418 goto done;
419 for (retry = 15; retry--;) {
420 ret = wait_event_timeout(wake, devp->guest_ack,
421 msecs_to_jiffies(1000));
422 if (ret)
423 break;
424 }
425done:
426 devp->config_change = 0;
427 devp->guest_ack = 0;
428}
429
430static void vop_virtio_del_device(struct vop_vdev *vdev)
431{
432 struct vop_info *vi = vdev->vi;
433 struct vop_device *vpdev = vdev->vpdev;
434 int i;
435 struct mic_vqconfig *vqconfig;
436 struct mic_bootparam *bootparam = vpdev->hw_ops->get_dp(vpdev);
437
438 if (!bootparam)
439 goto skip_hot_remove;
440 vop_dev_remove(vi, vdev->dc, vpdev);
441skip_hot_remove:
442 vpdev->hw_ops->free_irq(vpdev, vdev->virtio_cookie, vdev);
443 flush_work(&vdev->virtio_bh_work);
444 vqconfig = mic_vq_config(vdev->dd);
445 for (i = 0; i < vdev->dd->num_vq; i++) {
446 struct vop_vringh *vvr = &vdev->vvr[i];
447
448 dma_unmap_single(&vpdev->dev,
449 vvr->buf_da, VOP_INT_DMA_BUF_SIZE,
450 DMA_BIDIRECTIONAL);
451 free_pages((unsigned long)vvr->buf,
452 get_order(VOP_INT_DMA_BUF_SIZE));
453 vringh_kiov_cleanup(&vvr->riov);
454 vringh_kiov_cleanup(&vvr->wiov);
455 dma_unmap_single(&vpdev->dev, le64_to_cpu(vqconfig[i].address),
456 vvr->vring.len, DMA_BIDIRECTIONAL);
457 free_pages((unsigned long)vvr->vring.va,
458 get_order(vvr->vring.len));
459 }
460 /*
461 * Order the type update with previous stores. This write barrier
462 * is paired with the corresponding read barrier before the uncached
463 * system memory read of the type, on the card while scanning the
464 * device page.
465 */
466 smp_wmb();
467 vdev->dd->type = -1;
468}
469
470/*
471 * vop_sync_dma - Wrapper for synchronous DMAs.
472 *
473 * @dev - The address of the pointer to the device instance used
474 * for DMA registration.
475 * @dst - destination DMA address.
476 * @src - source DMA address.
477 * @len - size of the transfer.
478 *
479 * Return DMA_SUCCESS on success
480 */
481static int vop_sync_dma(struct vop_vdev *vdev, dma_addr_t dst, dma_addr_t src,
482 size_t len)
483{
484 int err = 0;
485 struct dma_device *ddev;
486 struct dma_async_tx_descriptor *tx;
487 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
488 struct dma_chan *vop_ch = vi->dma_ch;
489
490 if (!vop_ch) {
491 err = -EBUSY;
492 goto error;
493 }
494 ddev = vop_ch->device;
495 tx = ddev->device_prep_dma_memcpy(vop_ch, dst, src, len,
496 DMA_PREP_FENCE);
497 if (!tx) {
498 err = -ENOMEM;
499 goto error;
500 } else {
501 dma_cookie_t cookie;
502
503 cookie = tx->tx_submit(tx);
504 if (dma_submit_error(cookie)) {
505 err = -ENOMEM;
506 goto error;
507 }
508 dma_async_issue_pending(vop_ch);
509 err = dma_sync_wait(vop_ch, cookie);
510 }
511error:
512 if (err)
513 dev_err(&vi->vpdev->dev, "%s %d err %d\n",
514 __func__, __LINE__, err);
515 return err;
516}
517
518#define VOP_USE_DMA true
519
520/*
521 * Initiates the copies across the PCIe bus from card memory to a user
522 * space buffer. When transfers are done using DMA, source/destination
523 * addresses and transfer length must follow the alignment requirements of
524 * the MIC DMA engine.
525 */
526static int vop_virtio_copy_to_user(struct vop_vdev *vdev, void __user *ubuf,
527 size_t len, u64 daddr, size_t dlen,
528 int vr_idx)
529{
530 struct vop_device *vpdev = vdev->vpdev;
531 void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
532 struct vop_vringh *vvr = &vdev->vvr[vr_idx];
533 struct vop_info *vi = dev_get_drvdata(&vpdev->dev);
534 size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
535 bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
536 size_t dma_offset, partlen;
537 int err;
538
539 if (!VOP_USE_DMA) {
540 if (copy_to_user(ubuf, (void __force *)dbuf, len)) {
541 err = -EFAULT;
542 dev_err(vop_dev(vdev), "%s %d err %d\n",
543 __func__, __LINE__, err);
544 goto err;
545 }
546 vdev->in_bytes += len;
547 err = 0;
548 goto err;
549 }
550
551 dma_offset = daddr - round_down(daddr, dma_alignment);
552 daddr -= dma_offset;
553 len += dma_offset;
554 /*
555 * X100 uses DMA addresses as seen by the card so adding
556 * the aperture base is not required for DMA. However x200
557 * requires DMA addresses to be an offset into the bar so
558 * add the aperture base for x200.
559 */
560 if (x200)
561 daddr += vpdev->aper->pa;
562 while (len) {
563 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
564 err = vop_sync_dma(vdev, vvr->buf_da, daddr,
565 ALIGN(partlen, dma_alignment));
566 if (err) {
567 dev_err(vop_dev(vdev), "%s %d err %d\n",
568 __func__, __LINE__, err);
569 goto err;
570 }
571 if (copy_to_user(ubuf, vvr->buf + dma_offset,
572 partlen - dma_offset)) {
573 err = -EFAULT;
574 dev_err(vop_dev(vdev), "%s %d err %d\n",
575 __func__, __LINE__, err);
576 goto err;
577 }
578 daddr += partlen;
579 ubuf += partlen;
580 dbuf += partlen;
581 vdev->in_bytes_dma += partlen;
582 vdev->in_bytes += partlen;
583 len -= partlen;
584 dma_offset = 0;
585 }
586 err = 0;
587err:
588 vpdev->hw_ops->iounmap(vpdev, dbuf);
589 dev_dbg(vop_dev(vdev),
590 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
591 __func__, ubuf, dbuf, len, vr_idx);
592 return err;
593}
594
595/*
596 * Initiates copies across the PCIe bus from a user space buffer to card
597 * memory. When transfers are done using DMA, source/destination addresses
598 * and transfer length must follow the alignment requirements of the MIC
599 * DMA engine.
600 */
601static int vop_virtio_copy_from_user(struct vop_vdev *vdev, void __user *ubuf,
602 size_t len, u64 daddr, size_t dlen,
603 int vr_idx)
604{
605 struct vop_device *vpdev = vdev->vpdev;
606 void __iomem *dbuf = vpdev->hw_ops->ioremap(vpdev, daddr, len);
607 struct vop_vringh *vvr = &vdev->vvr[vr_idx];
608 struct vop_info *vi = dev_get_drvdata(&vdev->vpdev->dev);
609 size_t dma_alignment = 1 << vi->dma_ch->device->copy_align;
610 bool x200 = is_dma_copy_aligned(vi->dma_ch->device, 1, 1, 1);
611 size_t partlen;
612 bool dma = VOP_USE_DMA;
613 int err = 0;
614
615 if (daddr & (dma_alignment - 1)) {
616 vdev->tx_dst_unaligned += len;
617 dma = false;
618 } else if (ALIGN(len, dma_alignment) > dlen) {
619 vdev->tx_len_unaligned += len;
620 dma = false;
621 }
622
623 if (!dma)
624 goto memcpy;
625
626 /*
627 * X100 uses DMA addresses as seen by the card so adding
628 * the aperture base is not required for DMA. However x200
629 * requires DMA addresses to be an offset into the bar so
630 * add the aperture base for x200.
631 */
632 if (x200)
633 daddr += vpdev->aper->pa;
634 while (len) {
635 partlen = min_t(size_t, len, VOP_INT_DMA_BUF_SIZE);
636
637 if (copy_from_user(vvr->buf, ubuf, partlen)) {
638 err = -EFAULT;
639 dev_err(vop_dev(vdev), "%s %d err %d\n",
640 __func__, __LINE__, err);
641 goto err;
642 }
643 err = vop_sync_dma(vdev, daddr, vvr->buf_da,
644 ALIGN(partlen, dma_alignment));
645 if (err) {
646 dev_err(vop_dev(vdev), "%s %d err %d\n",
647 __func__, __LINE__, err);
648 goto err;
649 }
650 daddr += partlen;
651 ubuf += partlen;
652 dbuf += partlen;
653 vdev->out_bytes_dma += partlen;
654 vdev->out_bytes += partlen;
655 len -= partlen;
656 }
657memcpy:
658 /*
659 * We are copying to IO below and should ideally use something
660 * like copy_from_user_toio(..) if it existed.
661 */
662 if (copy_from_user((void __force *)dbuf, ubuf, len)) {
663 err = -EFAULT;
664 dev_err(vop_dev(vdev), "%s %d err %d\n",
665 __func__, __LINE__, err);
666 goto err;
667 }
668 vdev->out_bytes += len;
669 err = 0;
670err:
671 vpdev->hw_ops->iounmap(vpdev, dbuf);
672 dev_dbg(vop_dev(vdev),
673 "%s: ubuf %p dbuf %p len 0x%lx vr_idx 0x%x\n",
674 __func__, ubuf, dbuf, len, vr_idx);
675 return err;
676}
677
678#define MIC_VRINGH_READ true
679
680/* Determine the total number of bytes consumed in a VRINGH KIOV */
681static inline u32 vop_vringh_iov_consumed(struct vringh_kiov *iov)
682{
683 int i;
684 u32 total = iov->consumed;
685
686 for (i = 0; i < iov->i; i++)
687 total += iov->iov[i].iov_len;
688 return total;
689}
690
691/*
692 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
693 * This API is heavily based on the vringh_iov_xfer(..) implementation
694 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
695 * and vringh_iov_push_kern(..) directly is because there is no
696 * way to override the VRINGH xfer(..) routines as of v3.10.
697 */
698static int vop_vringh_copy(struct vop_vdev *vdev, struct vringh_kiov *iov,
699 void __user *ubuf, size_t len, bool read, int vr_idx,
700 size_t *out_len)
701{
702 int ret = 0;
703 size_t partlen, tot_len = 0;
704
705 while (len && iov->i < iov->used) {
706 struct kvec *kiov = &iov->iov[iov->i];
707
708 partlen = min(kiov->iov_len, len);
709 if (read)
710 ret = vop_virtio_copy_to_user(vdev, ubuf, partlen,
711 (u64)kiov->iov_base,
712 kiov->iov_len,
713 vr_idx);
714 else
715 ret = vop_virtio_copy_from_user(vdev, ubuf, partlen,
716 (u64)kiov->iov_base,
717 kiov->iov_len,
718 vr_idx);
719 if (ret) {
720 dev_err(vop_dev(vdev), "%s %d err %d\n",
721 __func__, __LINE__, ret);
722 break;
723 }
724 len -= partlen;
725 ubuf += partlen;
726 tot_len += partlen;
727 iov->consumed += partlen;
728 kiov->iov_len -= partlen;
729 kiov->iov_base += partlen;
730 if (!kiov->iov_len) {
731 /* Fix up old iov element then increment. */
732 kiov->iov_len = iov->consumed;
733 kiov->iov_base -= iov->consumed;
734
735 iov->consumed = 0;
736 iov->i++;
737 }
738 }
739 *out_len = tot_len;
740 return ret;
741}
742
743/*
744 * Use the standard VRINGH infrastructure in the kernel to fetch new
745 * descriptors, initiate the copies and update the used ring.
746 */
747static int _vop_virtio_copy(struct vop_vdev *vdev, struct mic_copy_desc *copy)
748{
749 int ret = 0;
750 u32 iovcnt = copy->iovcnt;
751 struct iovec iov;
752 struct iovec __user *u_iov = copy->iov;
753 void __user *ubuf = NULL;
754 struct vop_vringh *vvr = &vdev->vvr[copy->vr_idx];
755 struct vringh_kiov *riov = &vvr->riov;
756 struct vringh_kiov *wiov = &vvr->wiov;
757 struct vringh *vrh = &vvr->vrh;
758 u16 *head = &vvr->head;
759 struct mic_vring *vr = &vvr->vring;
760 size_t len = 0, out_len;
761
762 copy->out_len = 0;
763 /* Fetch a new IOVEC if all previous elements have been processed */
764 if (riov->i == riov->used && wiov->i == wiov->used) {
765 ret = vringh_getdesc_kern(vrh, riov, wiov,
766 head, GFP_KERNEL);
767 /* Check if there are available descriptors */
768 if (ret <= 0)
769 return ret;
770 }
771 while (iovcnt) {
772 if (!len) {
773 /* Copy over a new iovec from user space. */
774 ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
775 if (ret) {
776 ret = -EINVAL;
777 dev_err(vop_dev(vdev), "%s %d err %d\n",
778 __func__, __LINE__, ret);
779 break;
780 }
781 len = iov.iov_len;
782 ubuf = iov.iov_base;
783 }
784 /* Issue all the read descriptors first */
785 ret = vop_vringh_copy(vdev, riov, ubuf, len,
786 MIC_VRINGH_READ, copy->vr_idx, &out_len);
787 if (ret) {
788 dev_err(vop_dev(vdev), "%s %d err %d\n",
789 __func__, __LINE__, ret);
790 break;
791 }
792 len -= out_len;
793 ubuf += out_len;
794 copy->out_len += out_len;
795 /* Issue the write descriptors next */
796 ret = vop_vringh_copy(vdev, wiov, ubuf, len,
797 !MIC_VRINGH_READ, copy->vr_idx, &out_len);
798 if (ret) {
799 dev_err(vop_dev(vdev), "%s %d err %d\n",
800 __func__, __LINE__, ret);
801 break;
802 }
803 len -= out_len;
804 ubuf += out_len;
805 copy->out_len += out_len;
806 if (!len) {
807 /* One user space iovec is now completed */
808 iovcnt--;
809 u_iov++;
810 }
811 /* Exit loop if all elements in KIOVs have been processed. */
812 if (riov->i == riov->used && wiov->i == wiov->used)
813 break;
814 }
815 /*
816 * Update the used ring if a descriptor was available and some data was
817 * copied in/out and the user asked for a used ring update.
818 */
819 if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
820 u32 total = 0;
821
822 /* Determine the total data consumed */
823 total += vop_vringh_iov_consumed(riov);
824 total += vop_vringh_iov_consumed(wiov);
825 vringh_complete_kern(vrh, *head, total);
826 *head = USHRT_MAX;
827 if (vringh_need_notify_kern(vrh) > 0)
828 vringh_notify(vrh);
829 vringh_kiov_cleanup(riov);
830 vringh_kiov_cleanup(wiov);
831 /* Update avail idx for user space */
832 vr->info->avail_idx = vrh->last_avail_idx;
833 }
834 return ret;
835}
836
837static inline int vop_verify_copy_args(struct vop_vdev *vdev,
838 struct mic_copy_desc *copy)
839{
840 if (!vdev || copy->vr_idx >= vdev->dd->num_vq)
841 return -EINVAL;
842 return 0;
843}
844
845/* Copy a specified number of virtio descriptors in a chain */
846static int vop_virtio_copy_desc(struct vop_vdev *vdev,
847 struct mic_copy_desc *copy)
848{
849 int err;
59ea2590 850 struct vop_vringh *vvr;
61e9c905
SD
851
852 err = vop_verify_copy_args(vdev, copy);
853 if (err)
854 return err;
855
59ea2590 856 vvr = &vdev->vvr[copy->vr_idx];
61e9c905
SD
857 mutex_lock(&vvr->vr_mutex);
858 if (!vop_vdevup(vdev)) {
859 err = -ENODEV;
860 dev_err(vop_dev(vdev), "%s %d err %d\n",
861 __func__, __LINE__, err);
862 goto err;
863 }
864 err = _vop_virtio_copy(vdev, copy);
865 if (err) {
866 dev_err(vop_dev(vdev), "%s %d err %d\n",
867 __func__, __LINE__, err);
868 }
869err:
870 mutex_unlock(&vvr->vr_mutex);
871 return err;
872}
873
874static int vop_open(struct inode *inode, struct file *f)
875{
876 struct vop_vdev *vdev;
877 struct vop_info *vi = container_of(f->private_data,
878 struct vop_info, miscdev);
879
880 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
881 if (!vdev)
882 return -ENOMEM;
883 vdev->vi = vi;
884 mutex_init(&vdev->vdev_mutex);
885 f->private_data = vdev;
886 init_completion(&vdev->destroy);
887 complete(&vdev->destroy);
888 return 0;
889}
890
891static int vop_release(struct inode *inode, struct file *f)
892{
893 struct vop_vdev *vdev = f->private_data, *vdev_tmp;
894 struct vop_info *vi = vdev->vi;
895 struct list_head *pos, *tmp;
896 bool found = false;
897
898 mutex_lock(&vdev->vdev_mutex);
899 if (vdev->deleted)
900 goto unlock;
901 mutex_lock(&vi->vop_mutex);
902 list_for_each_safe(pos, tmp, &vi->vdev_list) {
903 vdev_tmp = list_entry(pos, struct vop_vdev, list);
904 if (vdev == vdev_tmp) {
905 vop_virtio_del_device(vdev);
906 list_del(pos);
907 found = true;
908 break;
909 }
910 }
911 mutex_unlock(&vi->vop_mutex);
912unlock:
913 mutex_unlock(&vdev->vdev_mutex);
914 if (!found)
915 wait_for_completion(&vdev->destroy);
916 f->private_data = NULL;
917 kfree(vdev);
918 return 0;
919}
920
921static long vop_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
922{
923 struct vop_vdev *vdev = f->private_data;
924 struct vop_info *vi = vdev->vi;
925 void __user *argp = (void __user *)arg;
926 int ret;
927
928 switch (cmd) {
929 case MIC_VIRTIO_ADD_DEVICE:
930 {
931 struct mic_device_desc dd, *dd_config;
932
933 if (copy_from_user(&dd, argp, sizeof(dd)))
934 return -EFAULT;
935
936 if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
937 dd.num_vq > MIC_MAX_VRINGS)
938 return -EINVAL;
939
940 dd_config = kzalloc(mic_desc_size(&dd), GFP_KERNEL);
941 if (!dd_config)
942 return -ENOMEM;
943 if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
944 ret = -EFAULT;
945 goto free_ret;
946 }
82dc4afd
AD
947 /* Ensure desc has not changed between the two reads */
948 if (memcmp(&dd, dd_config, sizeof(dd))) {
949 ret = -EINVAL;
950 goto free_ret;
951 }
61e9c905
SD
952 mutex_lock(&vdev->vdev_mutex);
953 mutex_lock(&vi->vop_mutex);
954 ret = vop_virtio_add_device(vdev, dd_config);
955 if (ret)
956 goto unlock_ret;
957 list_add_tail(&vdev->list, &vi->vdev_list);
958unlock_ret:
959 mutex_unlock(&vi->vop_mutex);
960 mutex_unlock(&vdev->vdev_mutex);
961free_ret:
962 kfree(dd_config);
963 return ret;
964 }
965 case MIC_VIRTIO_COPY_DESC:
966 {
967 struct mic_copy_desc copy;
968
969 mutex_lock(&vdev->vdev_mutex);
970 ret = vop_vdev_inited(vdev);
971 if (ret)
972 goto _unlock_ret;
973
974 if (copy_from_user(&copy, argp, sizeof(copy))) {
975 ret = -EFAULT;
976 goto _unlock_ret;
977 }
978
979 ret = vop_virtio_copy_desc(vdev, &copy);
980 if (ret < 0)
981 goto _unlock_ret;
982 if (copy_to_user(
983 &((struct mic_copy_desc __user *)argp)->out_len,
984 &copy.out_len, sizeof(copy.out_len)))
985 ret = -EFAULT;
986_unlock_ret:
987 mutex_unlock(&vdev->vdev_mutex);
988 return ret;
989 }
990 case MIC_VIRTIO_CONFIG_CHANGE:
991 {
992 void *buf;
993
994 mutex_lock(&vdev->vdev_mutex);
995 ret = vop_vdev_inited(vdev);
996 if (ret)
997 goto __unlock_ret;
998 buf = kzalloc(vdev->dd->config_len, GFP_KERNEL);
999 if (!buf) {
1000 ret = -ENOMEM;
1001 goto __unlock_ret;
1002 }
1003 if (copy_from_user(buf, argp, vdev->dd->config_len)) {
1004 ret = -EFAULT;
1005 goto done;
1006 }
1007 ret = vop_virtio_config_change(vdev, buf);
1008done:
1009 kfree(buf);
1010__unlock_ret:
1011 mutex_unlock(&vdev->vdev_mutex);
1012 return ret;
1013 }
1014 default:
1015 return -ENOIOCTLCMD;
1016 };
1017 return 0;
1018}
1019
1020/*
1021 * We return POLLIN | POLLOUT from poll when new buffers are enqueued, and
1022 * not when previously enqueued buffers may be available. This means that
1023 * in the card->host (TX) path, when userspace is unblocked by poll it
1024 * must drain all available descriptors or it can stall.
1025 */
afc9a42b 1026static __poll_t vop_poll(struct file *f, poll_table *wait)
61e9c905
SD
1027{
1028 struct vop_vdev *vdev = f->private_data;
afc9a42b 1029 __poll_t mask = 0;
61e9c905
SD
1030
1031 mutex_lock(&vdev->vdev_mutex);
1032 if (vop_vdev_inited(vdev)) {
1033 mask = POLLERR;
1034 goto done;
1035 }
1036 poll_wait(f, &vdev->waitq, wait);
1037 if (vop_vdev_inited(vdev)) {
1038 mask = POLLERR;
1039 } else if (vdev->poll_wake) {
1040 vdev->poll_wake = 0;
1041 mask = POLLIN | POLLOUT;
1042 }
1043done:
1044 mutex_unlock(&vdev->vdev_mutex);
1045 return mask;
1046}
1047
1048static inline int
1049vop_query_offset(struct vop_vdev *vdev, unsigned long offset,
1050 unsigned long *size, unsigned long *pa)
1051{
1052 struct vop_device *vpdev = vdev->vpdev;
1053 unsigned long start = MIC_DP_SIZE;
1054 int i;
1055
1056 /*
1057 * MMAP interface is as follows:
1058 * offset region
1059 * 0x0 virtio device_page
1060 * 0x1000 first vring
1061 * 0x1000 + size of 1st vring second vring
1062 * ....
1063 */
1064 if (!offset) {
1065 *pa = virt_to_phys(vpdev->hw_ops->get_dp(vpdev));
1066 *size = MIC_DP_SIZE;
1067 return 0;
1068 }
1069
1070 for (i = 0; i < vdev->dd->num_vq; i++) {
1071 struct vop_vringh *vvr = &vdev->vvr[i];
1072
1073 if (offset == start) {
1074 *pa = virt_to_phys(vvr->vring.va);
1075 *size = vvr->vring.len;
1076 return 0;
1077 }
1078 start += vvr->vring.len;
1079 }
1080 return -1;
1081}
1082
1083/*
1084 * Maps the device page and virtio rings to user space for readonly access.
1085 */
1086static int vop_mmap(struct file *f, struct vm_area_struct *vma)
1087{
1088 struct vop_vdev *vdev = f->private_data;
1089 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
1090 unsigned long pa, size = vma->vm_end - vma->vm_start, size_rem = size;
1091 int i, err;
1092
1093 err = vop_vdev_inited(vdev);
1094 if (err)
1095 goto ret;
1096 if (vma->vm_flags & VM_WRITE) {
1097 err = -EACCES;
1098 goto ret;
1099 }
1100 while (size_rem) {
1101 i = vop_query_offset(vdev, offset, &size, &pa);
1102 if (i < 0) {
1103 err = -EINVAL;
1104 goto ret;
1105 }
1106 err = remap_pfn_range(vma, vma->vm_start + offset,
1107 pa >> PAGE_SHIFT, size,
1108 vma->vm_page_prot);
1109 if (err)
1110 goto ret;
1111 size_rem -= size;
1112 offset += size;
1113 }
1114ret:
1115 return err;
1116}
1117
1118static const struct file_operations vop_fops = {
1119 .open = vop_open,
1120 .release = vop_release,
1121 .unlocked_ioctl = vop_ioctl,
1122 .poll = vop_poll,
1123 .mmap = vop_mmap,
1124 .owner = THIS_MODULE,
1125};
1126
1127int vop_host_init(struct vop_info *vi)
1128{
1129 int rc;
1130 struct miscdevice *mdev;
1131 struct vop_device *vpdev = vi->vpdev;
1132
1133 INIT_LIST_HEAD(&vi->vdev_list);
1134 vi->dma_ch = vpdev->dma_ch;
1135 mdev = &vi->miscdev;
1136 mdev->minor = MISC_DYNAMIC_MINOR;
1137 snprintf(vi->name, sizeof(vi->name), "vop_virtio%d", vpdev->index);
1138 mdev->name = vi->name;
1139 mdev->fops = &vop_fops;
1140 mdev->parent = &vpdev->dev;
1141
1142 rc = misc_register(mdev);
1143 if (rc)
1144 dev_err(&vpdev->dev, "%s failed rc %d\n", __func__, rc);
1145 return rc;
1146}
1147
1148void vop_host_uninit(struct vop_info *vi)
1149{
1150 struct list_head *pos, *tmp;
1151 struct vop_vdev *vdev;
1152
1153 mutex_lock(&vi->vop_mutex);
1154 vop_virtio_reset_devices(vi);
1155 list_for_each_safe(pos, tmp, &vi->vdev_list) {
1156 vdev = list_entry(pos, struct vop_vdev, list);
1157 list_del(pos);
1158 reinit_completion(&vdev->destroy);
1159 mutex_unlock(&vi->vop_mutex);
1160 mutex_lock(&vdev->vdev_mutex);
1161 vop_virtio_del_device(vdev);
1162 vdev->deleted = true;
1163 mutex_unlock(&vdev->vdev_mutex);
1164 complete(&vdev->destroy);
1165 mutex_lock(&vi->vop_mutex);
1166 }
1167 mutex_unlock(&vi->vop_mutex);
1168 misc_deregister(&vi->miscdev);
1169}