--- /dev/null
+From 15bea51dda49a0403a9a84fb26f3376636d2a3c2 Mon Sep 17 00:00:00 2001
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:36 -0500
+Subject: [PATCH 08/13] vhost: fix skb leak in handle_rx()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Matthew found a roughly 40% tcp throughput regression with commit
+c67df11f(vhost_net: try batch dequing from skb array) as discussed
+in the following thread:
+https://www.mail-archive.com/netdev@vger.kernel.org/msg187936.html
+
+Eventually we figured out that it was a skb leak in handle_rx()
+when sending packets to the VM. This usually happens when a guest
+can not drain out vq as fast as vhost fills in, afterwards it sets
+off the traffic jam and leaks skb(s) which occurs as no headcount
+to send on the vq from vhost side.
+
+This can be avoided by making sure we have got enough headcount
+before actually consuming a skb from the batched rx array while
+transmitting, which is simply done by moving checking the zero
+headcount a bit ahead.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/vhost/net.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
+index 1c75572f5a3f..010253847022 100644
+--- a/drivers/vhost/net.c
++++ b/drivers/vhost/net.c
+@@ -781,16 +781,6 @@ static void handle_rx(struct vhost_net *net)
+ /* On error, stop handling until the next kick. */
+ if (unlikely(headcount < 0))
+ goto out;
+- if (nvq->rx_array)
+- msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
+- /* On overrun, truncate and discard */
+- if (unlikely(headcount > UIO_MAXIOV)) {
+- iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
+- err = sock->ops->recvmsg(sock, &msg,
+- 1, MSG_DONTWAIT | MSG_TRUNC);
+- pr_debug("Discarded rx packet: len %zd\n", sock_len);
+- continue;
+- }
+ /* OK, now we need to know about added descriptors. */
+ if (!headcount) {
+ if (unlikely(vhost_enable_notify(&net->dev, vq))) {
+@@ -803,6 +793,16 @@ static void handle_rx(struct vhost_net *net)
+ * they refilled. */
+ goto out;
+ }
++ if (nvq->rx_array)
++ msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
++ /* On overrun, truncate and discard */
++ if (unlikely(headcount > UIO_MAXIOV)) {
++ iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
++ err = sock->ops->recvmsg(sock, &msg,
++ 1, MSG_DONTWAIT | MSG_TRUNC);
++ pr_debug("Discarded rx packet: len %zd\n", sock_len);
++ continue;
++ }
+ /* We don't need to be notified again. */
+ iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+ fixup = msg.msg_iter;
+--
+2.14.2
+
--- /dev/null
+From ccc0b8662620d562798183b77bcd30125d2d14f3 Mon Sep 17 00:00:00 2001
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:37 -0500
+Subject: [PATCH 09/13] tun: free skb in early errors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+tun_recvmsg() supports accepting skb by msg_control after
+commit ac77cfd4258f ("tun: support receiving skb through msg_control"),
+the skb if presented should be freed no matter how far it can go
+along, otherwise it would be leaked.
+
+This patch fixes several missed cases.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/tun.c | 24 ++++++++++++++++++------
+ 1 file changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index cb1f7747adad..5143e948d7d1 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1519,8 +1519,11 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
+
+ tun_debug(KERN_INFO, tun, "tun_do_read\n");
+
+- if (!iov_iter_count(to))
++ if (!iov_iter_count(to)) {
++ if (skb)
++ kfree_skb(skb);
+ return 0;
++ }
+
+ if (!skb) {
+ /* Read frames from ring */
+@@ -1636,22 +1639,24 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
+ {
+ struct tun_file *tfile = container_of(sock, struct tun_file, socket);
+ struct tun_struct *tun = __tun_get(tfile);
++ struct sk_buff *skb = m->msg_control;
+ int ret;
+
+- if (!tun)
+- return -EBADFD;
++ if (!tun) {
++ ret = -EBADFD;
++ goto out_free_skb;
++ }
+
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
+ ret = -EINVAL;
+- goto out;
++ goto out_put_tun;
+ }
+ if (flags & MSG_ERRQUEUE) {
+ ret = sock_recv_errqueue(sock->sk, m, total_len,
+ SOL_PACKET, TUN_TX_TIMESTAMP);
+ goto out;
+ }
+- ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT,
+- m->msg_control);
++ ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+ if (ret > (ssize_t)total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+@@ -1659,6 +1664,13 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
+ out:
+ tun_put(tun);
+ return ret;
++
++out_put_tun:
++ tun_put(tun);
++out_free_skb:
++ if (skb)
++ kfree_skb(skb);
++ return ret;
+ }
+
+ static int tun_peek_len(struct socket *sock)
+--
+2.14.2
+
--- /dev/null
+From 68783aa6989756cda8e9e305292afbb9f4f5677c Mon Sep 17 00:00:00 2001
+From: Wei Xu <wexu@redhat.com>
+Date: Fri, 1 Dec 2017 05:10:38 -0500
+Subject: [PATCH 10/13] tap: free skb if flags error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+tap_recvmsg() supports accepting skb by msg_control after
+commit 3b4ba04acca8 ("tap: support receiving skb from msg_control"),
+the skb if presented should be freed within the function, otherwise
+it would be leaked.
+
+Signed-off-by: Wei Xu <wexu@redhat.com>
+Reported-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
+Signed-off-by: Fabian Grünbichler <f.gruenbichler@proxmox.com>
+---
+ drivers/net/tap.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/tap.c b/drivers/net/tap.c
+index 3570c7576993..4e04b6094f3c 100644
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -829,8 +829,11 @@ static ssize_t tap_do_read(struct tap_queue *q,
+ DEFINE_WAIT(wait);
+ ssize_t ret = 0;
+
+- if (!iov_iter_count(to))
++ if (!iov_iter_count(to)) {
++ if (skb)
++ kfree_skb(skb);
+ return 0;
++ }
+
+ if (skb)
+ goto put;
+@@ -1155,11 +1158,14 @@ static int tap_recvmsg(struct socket *sock, struct msghdr *m,
+ size_t total_len, int flags)
+ {
+ struct tap_queue *q = container_of(sock, struct tap_queue, sock);
++ struct sk_buff *skb = m->msg_control;
+ int ret;
+- if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
++ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) {
++ if (skb)
++ kfree_skb(skb);
+ return -EINVAL;
+- ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT,
+- m->msg_control);
++ }
++ ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+ if (ret > total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+--
+2.14.2
+