]> git.proxmox.com Git - pve-kernel-jessie.git/blobdiff - IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch
drop already applied patches
[pve-kernel-jessie.git] / IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch
diff --git a/IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch b/IB-ipoib-move-back-the-IB-LL-address-into-the-hard-header.patch
deleted file mode 100644 (file)
index 5b58edd..0000000
+++ /dev/null
@@ -1,365 +0,0 @@
-From patchwork Wed Oct 12 14:30:30 2016
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 7bit
-Subject: [v2] IB/ipoib: move back IB LL address into the hard header
-From: Paolo Abeni <pabeni@redhat.com>
-X-Patchwork-Id: 681344
-X-Patchwork-Delegate: davem@davemloft.net
-Message-Id: <60efcf739ce3d45a01a7127dbaf7fe366e5ddce4.1476264804.git.pabeni@redhat.com>
-To: linux-rdma@vger.kernel.org
-Cc: Doug Ledford <dledford@redhat.com>, Sean Hefty <sean.hefty@intel.com>,
- Hal Rosenstock <hal.rosenstock@gmail.com>,
- Jason Gunthorpe <jgunthorpe@obsidianresearch.com>, netdev@vger.kernel.org
-Date: Wed, 12 Oct 2016 16:30:30 +0200
-
-After the commit 9207f9d45b0a ("net: preserve IP control block
-during GSO segmentation"), the GSO CB and the IPoIB CB conflict.
-That destroy the IPoIB address information cached there,
-causing a severe performance regression, as better described here:
-
-http://marc.info/?l=linux-kernel&m=146787279825501&w=2
-
-This change moves the data cached by the IPoIB driver from the
-skb control lock into the IPoIB hard header, as done before
-the commit 936d7de3d736 ("IPoIB: Stop lying about hard_header_len
-and use skb->cb to stash LL addresses").
-In order to avoid GRO issue, on packet reception, the IPoIB driver
-stash into the skb a dummy pseudo header, so that the received
-packets have actually a hard header matching the declared length.
-To avoid changing the connected mode maximum mtu, the allocated 
-head buffer size is increased by the pseudo header length.
-
-After this commit, IPoIB performances are back to pre-regression
-value.
-
-v1 -> v2: avoid changing the max mtu, increasing the head buf size
-
-Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation")
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
----
- drivers/infiniband/ulp/ipoib/ipoib.h           | 20 +++++++---
- drivers/infiniband/ulp/ipoib/ipoib_cm.c        | 15 +++----
- drivers/infiniband/ulp/ipoib/ipoib_ib.c        | 12 +++---
- drivers/infiniband/ulp/ipoib/ipoib_main.c      | 54 ++++++++++++++++----------
- drivers/infiniband/ulp/ipoib/ipoib_multicast.c |  6 ++-
- 5 files changed, 64 insertions(+), 43 deletions(-)
-
-diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
-index 9dbfcc0..5ff64af 100644
---- a/drivers/infiniband/ulp/ipoib/ipoib.h
-+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
-@@ -63,6 +63,8 @@ enum ipoib_flush_level {
- enum {
-       IPOIB_ENCAP_LEN           = 4,
-+      IPOIB_PSEUDO_LEN          = 20,
-+      IPOIB_HARD_LEN            = IPOIB_ENCAP_LEN + IPOIB_PSEUDO_LEN,
-       IPOIB_UD_HEAD_SIZE        = IB_GRH_BYTES + IPOIB_ENCAP_LEN,
-       IPOIB_UD_RX_SG            = 2, /* max buffer needed for 4K mtu */
-@@ -134,15 +136,21 @@ struct ipoib_header {
-       u16     reserved;
- };
--struct ipoib_cb {
--      struct qdisc_skb_cb     qdisc_cb;
--      u8                      hwaddr[INFINIBAND_ALEN];
-+struct ipoib_pseudo_header {
-+      u8      hwaddr[INFINIBAND_ALEN];
- };
--static inline struct ipoib_cb *ipoib_skb_cb(const struct sk_buff *skb)
-+static inline void skb_add_pseudo_hdr(struct sk_buff *skb)
- {
--      BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct ipoib_cb));
--      return (struct ipoib_cb *)skb->cb;
-+      char *data = skb_push(skb, IPOIB_PSEUDO_LEN);
-+
-+      /*
-+       * only the ipoib header is present now, make room for a dummy
-+       * pseudo header and set skb field accordingly
-+       */
-+      memset(data, 0, IPOIB_PSEUDO_LEN);
-+      skb_reset_mac_header(skb);
-+      skb_pull(skb, IPOIB_HARD_LEN);
- }
- /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
-diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
-index 4ad297d..339a1ee 100644
---- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
-+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
-@@ -63,6 +63,8 @@ MODULE_PARM_DESC(cm_data_debug_level,
- #define IPOIB_CM_RX_DELAY       (3 * 256 * HZ)
- #define IPOIB_CM_RX_UPDATE_MASK (0x3)
-+#define IPOIB_CM_RX_RESERVE     (ALIGN(IPOIB_HARD_LEN, 16) - IPOIB_ENCAP_LEN)
-+
- static struct ib_qp_attr ipoib_cm_err_attr = {
-       .qp_state = IB_QPS_ERR
- };
-@@ -146,15 +148,15 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
-       struct sk_buff *skb;
-       int i;
--      skb = dev_alloc_skb(IPOIB_CM_HEAD_SIZE + 12);
-+      skb = dev_alloc_skb(ALIGN(IPOIB_CM_HEAD_SIZE + IPOIB_PSEUDO_LEN, 16));
-       if (unlikely(!skb))
-               return NULL;
-       /*
--       * IPoIB adds a 4 byte header. So we need 12 more bytes to align the
-+       * IPoIB adds a IPOIB_ENCAP_LEN byte header, this will align the
-        * IP header to a multiple of 16.
-        */
--      skb_reserve(skb, 12);
-+      skb_reserve(skb, IPOIB_CM_RX_RESERVE);
-       mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_CM_HEAD_SIZE,
-                                      DMA_FROM_DEVICE);
-@@ -624,9 +626,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
-       if (wc->byte_len < IPOIB_CM_COPYBREAK) {
-               int dlen = wc->byte_len;
--              small_skb = dev_alloc_skb(dlen + 12);
-+              small_skb = dev_alloc_skb(dlen + IPOIB_CM_RX_RESERVE);
-               if (small_skb) {
--                      skb_reserve(small_skb, 12);
-+                      skb_reserve(small_skb, IPOIB_CM_RX_RESERVE);
-                       ib_dma_sync_single_for_cpu(priv->ca, rx_ring[wr_id].mapping[0],
-                                                  dlen, DMA_FROM_DEVICE);
-                       skb_copy_from_linear_data(skb, small_skb->data, dlen);
-@@ -663,8 +665,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
- copied:
-       skb->protocol = ((struct ipoib_header *) skb->data)->proto;
--      skb_reset_mac_header(skb);
--      skb_pull(skb, IPOIB_ENCAP_LEN);
-+      skb_add_pseudo_hdr(skb);
-       ++dev->stats.rx_packets;
-       dev->stats.rx_bytes += skb->len;
-diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
-index be11d5d..830fecb 100644
---- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
-+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
-@@ -128,16 +128,15 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
-       buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
--      skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
-+      skb = dev_alloc_skb(buf_size + IPOIB_HARD_LEN);
-       if (unlikely(!skb))
-               return NULL;
-       /*
--       * IB will leave a 40 byte gap for a GRH and IPoIB adds a 4 byte
--       * header.  So we need 4 more bytes to get to 48 and align the
--       * IP header to a multiple of 16.
-+       * the IP header will be at IPOIP_HARD_LEN + IB_GRH_BYTES, that is
-+       * 64 bytes aligned
-        */
--      skb_reserve(skb, 4);
-+      skb_reserve(skb, sizeof(struct ipoib_pseudo_header));
-       mapping = priv->rx_ring[id].mapping;
-       mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
-@@ -253,8 +252,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
-       skb_pull(skb, IB_GRH_BYTES);
-       skb->protocol = ((struct ipoib_header *) skb->data)->proto;
--      skb_reset_mac_header(skb);
--      skb_pull(skb, IPOIB_ENCAP_LEN);
-+      skb_add_pseudo_hdr(skb);
-       ++dev->stats.rx_packets;
-       dev->stats.rx_bytes += skb->len;
-diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
-index cc1c1b0..823a528 100644
---- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
-+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
-@@ -925,9 +925,12 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
-                               ipoib_neigh_free(neigh);
-                               goto err_drop;
-                       }
--                      if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
-+                      if (skb_queue_len(&neigh->queue) <
-+                          IPOIB_MAX_PATH_REC_QUEUE) {
-+                              /* put pseudoheader back on for next time */
-+                              skb_push(skb, IPOIB_PSEUDO_LEN);
-                               __skb_queue_tail(&neigh->queue, skb);
--                      else {
-+                      } else {
-                               ipoib_warn(priv, "queue length limit %d. Packet drop.\n",
-                                          skb_queue_len(&neigh->queue));
-                               goto err_drop;
-@@ -964,7 +967,7 @@ err_drop:
- }
- static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
--                           struct ipoib_cb *cb)
-+                           struct ipoib_pseudo_header *phdr)
- {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_path *path;
-@@ -972,16 +975,18 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
-       spin_lock_irqsave(&priv->lock, flags);
--      path = __path_find(dev, cb->hwaddr + 4);
-+      path = __path_find(dev, phdr->hwaddr + 4);
-       if (!path || !path->valid) {
-               int new_path = 0;
-               if (!path) {
--                      path = path_rec_create(dev, cb->hwaddr + 4);
-+                      path = path_rec_create(dev, phdr->hwaddr + 4);
-                       new_path = 1;
-               }
-               if (path) {
-                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-+                              /* put pseudoheader back on for next time */
-+                              skb_push(skb, IPOIB_PSEUDO_LEN);
-                               __skb_queue_tail(&path->queue, skb);
-                       } else {
-                               ++dev->stats.tx_dropped;
-@@ -1009,10 +1014,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
-                         be16_to_cpu(path->pathrec.dlid));
-               spin_unlock_irqrestore(&priv->lock, flags);
--              ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr));
-+              ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
-               return;
-       } else if ((path->query || !path_rec_start(dev, path)) &&
-                  skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-+              /* put pseudoheader back on for next time */
-+              skb_push(skb, IPOIB_PSEUDO_LEN);
-               __skb_queue_tail(&path->queue, skb);
-       } else {
-               ++dev->stats.tx_dropped;
-@@ -1026,13 +1033,15 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
- {
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ipoib_neigh *neigh;
--      struct ipoib_cb *cb = ipoib_skb_cb(skb);
-+      struct ipoib_pseudo_header *phdr;
-       struct ipoib_header *header;
-       unsigned long flags;
-+      phdr = (struct ipoib_pseudo_header *) skb->data;
-+      skb_pull(skb, sizeof(*phdr));
-       header = (struct ipoib_header *) skb->data;
--      if (unlikely(cb->hwaddr[4] == 0xff)) {
-+      if (unlikely(phdr->hwaddr[4] == 0xff)) {
-               /* multicast, arrange "if" according to probability */
-               if ((header->proto != htons(ETH_P_IP)) &&
-                   (header->proto != htons(ETH_P_IPV6)) &&
-@@ -1045,13 +1054,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
-                       return NETDEV_TX_OK;
-               }
-               /* Add in the P_Key for multicast*/
--              cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
--              cb->hwaddr[9] = priv->pkey & 0xff;
-+              phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
-+              phdr->hwaddr[9] = priv->pkey & 0xff;
--              neigh = ipoib_neigh_get(dev, cb->hwaddr);
-+              neigh = ipoib_neigh_get(dev, phdr->hwaddr);
-               if (likely(neigh))
-                       goto send_using_neigh;
--              ipoib_mcast_send(dev, cb->hwaddr, skb);
-+              ipoib_mcast_send(dev, phdr->hwaddr, skb);
-               return NETDEV_TX_OK;
-       }
-@@ -1060,16 +1069,16 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
-       case htons(ETH_P_IP):
-       case htons(ETH_P_IPV6):
-       case htons(ETH_P_TIPC):
--              neigh = ipoib_neigh_get(dev, cb->hwaddr);
-+              neigh = ipoib_neigh_get(dev, phdr->hwaddr);
-               if (unlikely(!neigh)) {
--                      neigh_add_path(skb, cb->hwaddr, dev);
-+                      neigh_add_path(skb, phdr->hwaddr, dev);
-                       return NETDEV_TX_OK;
-               }
-               break;
-       case htons(ETH_P_ARP):
-       case htons(ETH_P_RARP):
-               /* for unicast ARP and RARP should always perform path find */
--              unicast_arp_send(skb, dev, cb);
-+              unicast_arp_send(skb, dev, phdr);
-               return NETDEV_TX_OK;
-       default:
-               /* ethertype not supported by IPoIB */
-@@ -1086,11 +1095,13 @@ send_using_neigh:
-                       goto unref;
-               }
-       } else if (neigh->ah) {
--              ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
-+              ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(phdr->hwaddr));
-               goto unref;
-       }
-       if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
-+              /* put pseudoheader back on for next time */
-+              skb_push(skb, sizeof(*phdr));
-               spin_lock_irqsave(&priv->lock, flags);
-               __skb_queue_tail(&neigh->queue, skb);
-               spin_unlock_irqrestore(&priv->lock, flags);
-@@ -1122,8 +1133,8 @@ static int ipoib_hard_header(struct sk_buff *skb,
-                            unsigned short type,
-                            const void *daddr, const void *saddr, unsigned len)
- {
-+      struct ipoib_pseudo_header *phdr;
-       struct ipoib_header *header;
--      struct ipoib_cb *cb = ipoib_skb_cb(skb);
-       header = (struct ipoib_header *) skb_push(skb, sizeof *header);
-@@ -1132,12 +1143,13 @@ static int ipoib_hard_header(struct sk_buff *skb,
-       /*
-        * we don't rely on dst_entry structure,  always stuff the
--       * destination address into skb->cb so we can figure out where
-+       * destination address into skb hard header so we can figure out where
-        * to send the packet later.
-        */
--      memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN);
-+      phdr = (struct ipoib_pseudo_header *) skb_push(skb, sizeof(*phdr));
-+      memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
--      return sizeof *header;
-+      return IPOIB_HARD_LEN;
- }
- static void ipoib_set_mcast_list(struct net_device *dev)
-@@ -1759,7 +1771,7 @@ void ipoib_setup(struct net_device *dev)
-       dev->flags              |= IFF_BROADCAST | IFF_MULTICAST;
--      dev->hard_header_len     = IPOIB_ENCAP_LEN;
-+      dev->hard_header_len     = IPOIB_HARD_LEN;
-       dev->addr_len            = INFINIBAND_ALEN;
-       dev->type                = ARPHRD_INFINIBAND;
-       dev->tx_queue_len        = ipoib_sendq_size * 2;
-diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
-index d3394b6..1909dd2 100644
---- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
-+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
-@@ -796,9 +796,11 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
-                       __ipoib_mcast_add(dev, mcast);
-                       list_add_tail(&mcast->list, &priv->multicast_list);
-               }
--              if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
-+              if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) {
-+                      /* put pseudoheader back on for next time */
-+                      skb_push(skb, sizeof(struct ipoib_pseudo_header));
-                       skb_queue_tail(&mcast->pkt_queue, skb);
--              else {
-+              } else {
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-               }