#include "vhost.h"
-static int experimental_zcopytx = 1;
+static int experimental_zcopytx = 0;
module_param(experimental_zcopytx, int, 0444);
MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
" 1 -Enable; 0 - Disable");
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_NET_WEIGHT 0x80000
+/* Max number of packets transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others with small
+ * pkts.
+ */
+#define VHOST_NET_PKT_WEIGHT 256
+
/* MAX number of TX used buffers for outstanding zerocopy */
#define VHOST_MAX_PEND 128
#define VHOST_GOODCOPY_LEN 256
return local_clock() >> 10;
}
-static bool vhost_can_busy_poll(struct vhost_dev *dev,
- unsigned long endtime)
+static bool vhost_can_busy_poll(unsigned long endtime)
{
- return likely(!need_resched()) &&
- likely(!time_after(busy_clock(), endtime)) &&
- likely(!signal_pending(current)) &&
- !vhost_has_work(dev);
+ return likely(!need_resched() && !time_after(busy_clock(), endtime) &&
+ !signal_pending(current));
}
static void vhost_net_disable_vq(struct vhost_net *n,
static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
struct vhost_virtqueue *vq,
struct iovec iov[], unsigned int iov_size,
- unsigned int *out_num, unsigned int *in_num)
+ unsigned int *out_num, unsigned int *in_num,
+ bool *busyloop_intr)
{
unsigned long uninitialized_var(endtime);
int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
if (r == vq->num && vq->busyloop_timeout) {
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
- while (vhost_can_busy_poll(vq->dev, endtime) &&
- vhost_vq_avail_empty(vq->dev, vq))
+ while (vhost_can_busy_poll(endtime)) {
+ if (vhost_has_work(vq->dev)) {
+ *busyloop_intr = true;
+ break;
+ }
+ if (!vhost_vq_avail_empty(vq->dev, vq))
+ break;
cpu_relax();
+ }
preempt_enable();
r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
out_num, in_num, NULL, NULL);
struct socket *sock;
struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
bool zcopy, zcopy_used;
+ int sent_pkts = 0;
mutex_lock(&vq->mutex);
sock = vq->private_data;
hdr_size = nvq->vhost_hlen;
zcopy = nvq->ubufs;
- for (;;) {
+ do {
+ bool busyloop_intr;
+
/* Release DMAs done buffers first */
if (zcopy)
vhost_zerocopy_signal_used(net, vq);
-
+ busyloop_intr = false;
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
ARRAY_SIZE(vq->iov),
- &out, &in);
+ &out, &in, &busyloop_intr);
/* On error, stop handling until the next kick. */
if (unlikely(head < 0))
break;
/* Nothing new? Wait for eventfd to tell us they refilled. */
if (head == vq->num) {
- if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+ if (unlikely(busyloop_intr)) {
+ vhost_poll_queue(&vq->poll);
+ } else if (unlikely(vhost_enable_notify(&net->dev, vq))) {
vhost_disable_notify(&net->dev, vq);
continue;
}
msg.msg_control = NULL;
ubufs = NULL;
}
-
total_len += len;
if (total_len < VHOST_NET_WEIGHT &&
!vhost_vq_avail_empty(&net->dev, vq) &&
else
vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
- if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
- vhost_poll_queue(&vq->poll);
- break;
- }
- }
+ } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
out:
mutex_unlock(&vq->mutex);
}
if (!len && vq->busyloop_timeout) {
/* Both tx vq and rx socket were polled here */
- mutex_lock(&vq->mutex);
+ mutex_lock_nested(&vq->mutex, 1);
vhost_disable_notify(&net->dev, vq);
preempt_disable();
endtime = busy_clock() + vq->busyloop_timeout;
- while (vhost_can_busy_poll(&net->dev, endtime) &&
+ while (vhost_can_busy_poll(endtime) &&
+ !vhost_has_work(&net->dev) &&
!sk_has_rx_data(sk) &&
vhost_vq_avail_empty(&net->dev, vq))
cpu_relax();
struct socket *sock;
struct iov_iter fixup;
__virtio16 num_buffers;
+ int recv_pkts = 0;
- mutex_lock(&vq->mutex);
+ mutex_lock_nested(&vq->mutex, 0);
sock = vq->private_data;
if (!sock)
goto out;
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
- while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) {
+ do {
+ sock_len = vhost_net_rx_peek_head_len(net, sock->sk);
+
+ if (!sock_len)
+ break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads, vhost_len,
vhost_add_used_and_signal_n(&net->dev, vq, vq->heads,
headcount);
if (unlikely(vq_log))
- vhost_log_write(vq, vq_log, log, vhost_len);
+ vhost_log_write(vq, vq_log, log, vhost_len,
+ vq->iov, in);
total_len += vhost_len;
- if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
- vhost_poll_queue(&vq->poll);
- goto out;
- }
- }
+ } while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
+
vhost_net_enable_vq(net, vq);
out:
mutex_unlock(&vq->mutex);
n->vqs[i].sock_hlen = 0;
vhost_net_buf_init(&n->vqs[i].rxq);
}
- vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
+ vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
+ VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
if (ubufs)
vhost_net_ubuf_put_wait_and_free(ubufs);
err_ubufs:
- sockfd_put(sock);
+ if (sock)
+ sockfd_put(sock);
err_vq:
mutex_unlock(&vq->mutex);
err:
}
vhost_net_stop(n, &tx_sock, &rx_sock);
vhost_net_flush(n);
+ vhost_dev_stop(&n->dev);
vhost_dev_reset_owner(&n->dev, umem);
vhost_net_vq_reset(n);
done: