]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blobdiff - drivers/vhost/net.c
vhost_net: disable zerocopy by default
[mirror_ubuntu-bionic-kernel.git] / drivers / vhost / net.c
index c7bdeb6556469efb93e2a6a7e742da3a37ad7e69..e71583c96055343ed7d2de3830bfaa88b55cb52c 100644 (file)
@@ -35,7 +35,7 @@
 
 #include "vhost.h"
 
-static int experimental_zcopytx = 1;
+static int experimental_zcopytx = 0;
 module_param(experimental_zcopytx, int, 0444);
 MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
                                       " 1 -Enable; 0 - Disable");
@@ -44,6 +44,12 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
  * Using this limit prevents one virtqueue from starving others. */
 #define VHOST_NET_WEIGHT 0x80000
 
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others with small
+ * pkts.
+ */
+#define VHOST_NET_PKT_WEIGHT 256
+
 /* MAX number of TX used buffers for outstanding zerocopy */
 #define VHOST_MAX_PEND 128
 #define VHOST_GOODCOPY_LEN 256
@@ -373,13 +379,10 @@ static inline unsigned long busy_clock(void)
        return local_clock() >> 10;
 }
 
-static bool vhost_can_busy_poll(struct vhost_dev *dev,
-                               unsigned long endtime)
+static bool vhost_can_busy_poll(unsigned long endtime)
 {
-       return likely(!need_resched()) &&
-              likely(!time_after(busy_clock(), endtime)) &&
-              likely(!signal_pending(current)) &&
-              !vhost_has_work(dev);
+       return likely(!need_resched() && !time_after(busy_clock(), endtime) &&
+                     !signal_pending(current));
 }
 
 static void vhost_net_disable_vq(struct vhost_net *n,
@@ -411,7 +414,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
 static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
                                    struct vhost_virtqueue *vq,
                                    struct iovec iov[], unsigned int iov_size,
-                                   unsigned int *out_num, unsigned int *in_num)
+                                   unsigned int *out_num, unsigned int *in_num,
+                                   bool *busyloop_intr)
 {
        unsigned long uninitialized_var(endtime);
        int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
@@ -420,9 +424,15 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
        if (r == vq->num && vq->busyloop_timeout) {
                preempt_disable();
                endtime = busy_clock() + vq->busyloop_timeout;
-               while (vhost_can_busy_poll(vq->dev, endtime) &&
-                      vhost_vq_avail_empty(vq->dev, vq))
+               while (vhost_can_busy_poll(endtime)) {
+                       if (vhost_has_work(vq->dev)) {
+                               *busyloop_intr = true;
+                               break;
+                       }
+                       if (!vhost_vq_avail_empty(vq->dev, vq))
+                               break;
                        cpu_relax();
+               }
                preempt_enable();
                r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
                                      out_num, in_num, NULL, NULL);
@@ -461,6 +471,7 @@ static void handle_tx(struct vhost_net *net)
        struct socket *sock;
        struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
        bool zcopy, zcopy_used;
+       int sent_pkts = 0;
 
        mutex_lock(&vq->mutex);
        sock = vq->private_data;
@@ -476,21 +487,25 @@ static void handle_tx(struct vhost_net *net)
        hdr_size = nvq->vhost_hlen;
        zcopy = nvq->ubufs;
 
-       for (;;) {
+       do {
+               bool busyloop_intr;
+
                /* Release DMAs done buffers first */
                if (zcopy)
                        vhost_zerocopy_signal_used(net, vq);
 
-
+               busyloop_intr = false;
                head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
                                                ARRAY_SIZE(vq->iov),
-                                               &out, &in);
+                                               &out, &in, &busyloop_intr);
                /* On error, stop handling until the next kick. */
                if (unlikely(head < 0))
                        break;
                /* Nothing new?  Wait for eventfd to tell us they refilled. */
                if (head == vq->num) {
-                       if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+                       if (unlikely(busyloop_intr)) {
+                               vhost_poll_queue(&vq->poll);
+                       } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
                                vhost_disable_notify(&net->dev, vq);
                                continue;
                        }
@@ -538,7 +553,6 @@ static void handle_tx(struct vhost_net *net)
                        msg.msg_control = NULL;
                        ubufs = NULL;
                }
-
                total_len += len;
                if (total_len < VHOST_NET_WEIGHT &&
                    !vhost_vq_avail_empty(&net->dev, vq) &&
@@ -568,11 +582,7 @@ static void handle_tx(struct vhost_net *net)
                else
                        vhost_zerocopy_signal_used(net, vq);
                vhost_net_tx_packet(net);
-               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-                       vhost_poll_queue(&vq->poll);
-                       break;
-               }
-       }
+       } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
 out:
        mutex_unlock(&vq->mutex);
 }
@@ -618,13 +628,14 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk)
 
        if (!len && vq->busyloop_timeout) {
                /* Both tx vq and rx socket were polled here */
-               mutex_lock(&vq->mutex);
+               mutex_lock_nested(&vq->mutex, 1);
                vhost_disable_notify(&net->dev, vq);
 
                preempt_disable();
                endtime = busy_clock() + vq->busyloop_timeout;
 
-               while (vhost_can_busy_poll(&net->dev, endtime) &&
+               while (vhost_can_busy_poll(endtime) &&
+                      !vhost_has_work(&net->dev) &&
                       !sk_has_rx_data(sk) &&
                       vhost_vq_avail_empty(&net->dev, vq))
                        cpu_relax();
@@ -750,8 +761,9 @@ static void handle_rx(struct vhost_net *net)
        struct socket *sock;
        struct iov_iter fixup;
        __virtio16 num_buffers;
+       int recv_pkts = 0;
 
-       mutex_lock(&vq->mutex);
+       mutex_lock_nested(&vq->mutex, 0);
        sock = vq->private_data;
        if (!sock)
                goto out;
@@ -769,7 +781,11 @@ static void handle_rx(struct vhost_net *net)
                vq->log : NULL;
        mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
 
-       while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
+       do {
+               sock_len = vhost_net_rx_peek_head_len(net, sock->sk);
+
+               if (!sock_len)
+                       break;
                sock_len += sock_hlen;
                vhost_len = sock_len + vhost_hlen;
                headcount = get_rx_bufs(vq, vq->heads, vhost_len,
@@ -847,13 +863,11 @@ static void handle_rx(struct vhost_net *net)
                vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
                                            headcount);
                if (unlikely(vq_log))
-                       vhost_log_write(vq, vq_log, log, vhost_len);
+                       vhost_log_write(vq, vq_log, log, vhost_len,
+                                       vq->iov, in);
                total_len += vhost_len;
-               if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
-                       vhost_poll_queue(&vq->poll);
-                       goto out;
-               }
-       }
+       } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
+
        vhost_net_enable_vq(net, vq);
 out:
        mutex_unlock(&vq->mutex);
@@ -931,7 +945,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
                n->vqs[i].sock_hlen = 0;
                vhost_net_buf_init(&n->vqs[i].rxq);
        }
-       vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
+       vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
+                      VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
 
        vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
        vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
@@ -1182,7 +1197,8 @@ err_used:
        if (ubufs)
                vhost_net_ubuf_put_wait_and_free(ubufs);
 err_ubufs:
-       sockfd_put(sock);
+       if (sock)
+               sockfd_put(sock);
 err_vq:
        mutex_unlock(&vq->mutex);
 err:
@@ -1208,6 +1224,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
        }
        vhost_net_stop(n, &tx_sock, &rx_sock);
        vhost_net_flush(n);
+       vhost_dev_stop(&n->dev);
        vhost_dev_reset_owner(&n->dev, umem);
        vhost_net_vq_reset(n);
 done: