]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
forcedeth: use per cpu to collect xmit/recv statistics
authorZhu Yanjun <yanjun.zhu@oracle.com>
Thu, 5 Sep 2019 09:15:42 +0000 (05:15 -0400)
committerDavid S. Miller <davem@davemloft.net>
Fri, 6 Sep 2019 13:06:42 +0000 (15:06 +0200)
When testing with a background iperf pushing 1Gbit/sec traffic and running
both ifconfig and netstat to collect statistics, some deadlocks occurred.

Ifconfig and netstat will call nv_get_stats64 to get software xmit/recv
statistics. In the commit f5d827aece36 ("forcedeth: implement
ndo_get_stats64() API"), the normal tx/rx variables is to collect tx/rx
statistics. The fix is to replace normal tx/rx variables with per
cpu 64-bit variable to collect xmit/recv statistics. The per cpu variable
will avoid deadlocks and provide fast efficient statistics updates.

In nv_probe, the per cpu variable is initialized. In nv_remove, this
per cpu variable is freed.

In xmit/recv process, this per cpu variable will be updated.

In nv_get_stats64, this per cpu variable on each cpu is added up. Then
the driver can get xmit/recv packets statistics.

A test runs for several days with this commit, the deadlocks disappear
and the performance is better.

Tested:
   - iperf SMP x86_64 ->
   Client connecting to 1.1.1.108, TCP port 5001
   TCP window size: 85.0 KByte (default)
   ------------------------------------------------------------
   [  3] local 1.1.1.105 port 38888 connected with 1.1.1.108 port 5001
   [ ID] Interval       Transfer     Bandwidth
   [  3]  0.0-10.0 sec  1.10 GBytes   943 Mbits/sec

   ifconfig results:

   enp0s9 Link encap:Ethernet  HWaddr 00:21:28:6f:de:0f
          inet addr:1.1.1.105  Bcast:0.0.0.0  Mask:255.255.255.0
          UP BROADCAST RUNNING MULTICAST  MTU:1500  Metric:1
          RX packets:5774764531 errors:0 dropped:0 overruns:0 frame:0
          TX packets:633534193 errors:0 dropped:0 overruns:0 carrier:0
          collisions:0 txqueuelen:1000
          RX bytes:7646159340904 (7.6 TB) TX bytes:11425340407722 (11.4 TB)

   netstat results:

   Kernel Interface table
   Iface MTU Met RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flg
   ...
   enp0s9 1500 0  5774764531 0    0 0      633534193      0      0  0 BMRU
   ...

Fixes: f5d827aece36 ("forcedeth: implement ndo_get_stats64() API")
CC: Joe Jin <joe.jin@oracle.com>
CC: JUNXIAO_BI <junxiao.bi@oracle.com>
Reported-and-tested-by: Nan san <nan.1986san@gmail.com>
Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/nvidia/forcedeth.c

index b327b29f5d577a3f72bbeb3b744ecda2cb27219f..a6b4bfae4684fbbfdc1d83125c11cf55d920a9b0 100644 (file)
@@ -713,6 +713,21 @@ struct nv_skb_map {
        struct nv_skb_map *next_tx_ctx;
 };
 
+struct nv_txrx_stats {
+       u64 stat_rx_packets;
+       u64 stat_rx_bytes; /* not always available in HW */
+       u64 stat_rx_missed_errors;
+       u64 stat_rx_dropped;
+       u64 stat_tx_packets; /* not always available in HW */
+       u64 stat_tx_bytes;
+       u64 stat_tx_dropped;
+};
+
+#define nv_txrx_stats_inc(member) \
+               __this_cpu_inc(np->txrx_stats->member)
+#define nv_txrx_stats_add(member, count) \
+               __this_cpu_add(np->txrx_stats->member, (count))
+
 /*
  * SMP locking:
  * All hardware access under netdev_priv(dev)->lock, except the performance
@@ -797,10 +812,7 @@ struct fe_priv {
 
        /* RX software stats */
        struct u64_stats_sync swstats_rx_syncp;
-       u64 stat_rx_packets;
-       u64 stat_rx_bytes; /* not always available in HW */
-       u64 stat_rx_missed_errors;
-       u64 stat_rx_dropped;
+       struct nv_txrx_stats __percpu *txrx_stats;
 
        /* media detection workaround.
         * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
@@ -826,9 +838,6 @@ struct fe_priv {
 
        /* TX software stats */
        struct u64_stats_sync swstats_tx_syncp;
-       u64 stat_tx_packets; /* not always available in HW */
-       u64 stat_tx_bytes;
-       u64 stat_tx_dropped;
 
        /* msi/msi-x fields */
        u32 msi_flags;
@@ -1721,6 +1730,39 @@ static void nv_update_stats(struct net_device *dev)
        }
 }
 
+static void nv_get_stats(int cpu, struct fe_priv *np,
+                        struct rtnl_link_stats64 *storage)
+{
+       struct nv_txrx_stats *src = per_cpu_ptr(np->txrx_stats, cpu);
+       unsigned int syncp_start;
+       u64 rx_packets, rx_bytes, rx_dropped, rx_missed_errors;
+       u64 tx_packets, tx_bytes, tx_dropped;
+
+       do {
+               syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
+               rx_packets       = src->stat_rx_packets;
+               rx_bytes         = src->stat_rx_bytes;
+               rx_dropped       = src->stat_rx_dropped;
+               rx_missed_errors = src->stat_rx_missed_errors;
+       } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
+
+       storage->rx_packets       += rx_packets;
+       storage->rx_bytes         += rx_bytes;
+       storage->rx_dropped       += rx_dropped;
+       storage->rx_missed_errors += rx_missed_errors;
+
+       do {
+               syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
+               tx_packets  = src->stat_tx_packets;
+               tx_bytes    = src->stat_tx_bytes;
+               tx_dropped  = src->stat_tx_dropped;
+       } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+
+       storage->tx_packets += tx_packets;
+       storage->tx_bytes   += tx_bytes;
+       storage->tx_dropped += tx_dropped;
+}
+
 /*
  * nv_get_stats64: dev->ndo_get_stats64 function
  * Get latest stats value from the nic.
@@ -1733,7 +1775,7 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
        __releases(&netdev_priv(dev)->hwstats_lock)
 {
        struct fe_priv *np = netdev_priv(dev);
-       unsigned int syncp_start;
+       int cpu;
 
        /*
         * Note: because HW stats are not always available and for
@@ -1746,20 +1788,8 @@ nv_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *storage)
         */
 
        /* software stats */
-       do {
-               syncp_start = u64_stats_fetch_begin_irq(&np->swstats_rx_syncp);
-               storage->rx_packets       = np->stat_rx_packets;
-               storage->rx_bytes         = np->stat_rx_bytes;
-               storage->rx_dropped       = np->stat_rx_dropped;
-               storage->rx_missed_errors = np->stat_rx_missed_errors;
-       } while (u64_stats_fetch_retry_irq(&np->swstats_rx_syncp, syncp_start));
-
-       do {
-               syncp_start = u64_stats_fetch_begin_irq(&np->swstats_tx_syncp);
-               storage->tx_packets = np->stat_tx_packets;
-               storage->tx_bytes   = np->stat_tx_bytes;
-               storage->tx_dropped = np->stat_tx_dropped;
-       } while (u64_stats_fetch_retry_irq(&np->swstats_tx_syncp, syncp_start));
+       for_each_online_cpu(cpu)
+               nv_get_stats(cpu, np, storage);
 
        /* If the nic supports hw counters then retrieve latest values */
        if (np->driver_data & DEV_HAS_STATISTICS_V123) {
@@ -1827,7 +1857,7 @@ static int nv_alloc_rx(struct net_device *dev)
                } else {
 packet_dropped:
                        u64_stats_update_begin(&np->swstats_rx_syncp);
-                       np->stat_rx_dropped++;
+                       nv_txrx_stats_inc(stat_rx_dropped);
                        u64_stats_update_end(&np->swstats_rx_syncp);
                        return 1;
                }
@@ -1869,7 +1899,7 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
                } else {
 packet_dropped:
                        u64_stats_update_begin(&np->swstats_rx_syncp);
-                       np->stat_rx_dropped++;
+                       nv_txrx_stats_inc(stat_rx_dropped);
                        u64_stats_update_end(&np->swstats_rx_syncp);
                        return 1;
                }
@@ -2013,7 +2043,7 @@ static void nv_drain_tx(struct net_device *dev)
                }
                if (nv_release_txskb(np, &np->tx_skb[i])) {
                        u64_stats_update_begin(&np->swstats_tx_syncp);
-                       np->stat_tx_dropped++;
+                       nv_txrx_stats_inc(stat_tx_dropped);
                        u64_stats_update_end(&np->swstats_tx_syncp);
                }
                np->tx_skb[i].dma = 0;
@@ -2227,7 +2257,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        /* on DMA mapping error - drop the packet */
                        dev_kfree_skb_any(skb);
                        u64_stats_update_begin(&np->swstats_tx_syncp);
-                       np->stat_tx_dropped++;
+                       nv_txrx_stats_inc(stat_tx_dropped);
                        u64_stats_update_end(&np->swstats_tx_syncp);
                        return NETDEV_TX_OK;
                }
@@ -2273,7 +2303,7 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                                dev_kfree_skb_any(skb);
                                np->put_tx_ctx = start_tx_ctx;
                                u64_stats_update_begin(&np->swstats_tx_syncp);
-                               np->stat_tx_dropped++;
+                               nv_txrx_stats_inc(stat_tx_dropped);
                                u64_stats_update_end(&np->swstats_tx_syncp);
                                return NETDEV_TX_OK;
                        }
@@ -2384,7 +2414,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
                        /* on DMA mapping error - drop the packet */
                        dev_kfree_skb_any(skb);
                        u64_stats_update_begin(&np->swstats_tx_syncp);
-                       np->stat_tx_dropped++;
+                       nv_txrx_stats_inc(stat_tx_dropped);
                        u64_stats_update_end(&np->swstats_tx_syncp);
                        return NETDEV_TX_OK;
                }
@@ -2431,7 +2461,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
                                dev_kfree_skb_any(skb);
                                np->put_tx_ctx = start_tx_ctx;
                                u64_stats_update_begin(&np->swstats_tx_syncp);
-                               np->stat_tx_dropped++;
+                               nv_txrx_stats_inc(stat_tx_dropped);
                                u64_stats_update_end(&np->swstats_tx_syncp);
                                return NETDEV_TX_OK;
                        }
@@ -2560,9 +2590,12 @@ static int nv_tx_done(struct net_device *dev, int limit)
                                            && !(flags & NV_TX_RETRYCOUNT_MASK))
                                                nv_legacybackoff_reseed(dev);
                                } else {
+                                       unsigned int len;
+
                                        u64_stats_update_begin(&np->swstats_tx_syncp);
-                                       np->stat_tx_packets++;
-                                       np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+                                       nv_txrx_stats_inc(stat_tx_packets);
+                                       len = np->get_tx_ctx->skb->len;
+                                       nv_txrx_stats_add(stat_tx_bytes, len);
                                        u64_stats_update_end(&np->swstats_tx_syncp);
                                }
                                bytes_compl += np->get_tx_ctx->skb->len;
@@ -2577,9 +2610,12 @@ static int nv_tx_done(struct net_device *dev, int limit)
                                            && !(flags & NV_TX2_RETRYCOUNT_MASK))
                                                nv_legacybackoff_reseed(dev);
                                } else {
+                                       unsigned int len;
+
                                        u64_stats_update_begin(&np->swstats_tx_syncp);
-                                       np->stat_tx_packets++;
-                                       np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+                                       nv_txrx_stats_inc(stat_tx_packets);
+                                       len = np->get_tx_ctx->skb->len;
+                                       nv_txrx_stats_add(stat_tx_bytes, len);
                                        u64_stats_update_end(&np->swstats_tx_syncp);
                                }
                                bytes_compl += np->get_tx_ctx->skb->len;
@@ -2627,9 +2663,12 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
                                                nv_legacybackoff_reseed(dev);
                                }
                        } else {
+                               unsigned int len;
+
                                u64_stats_update_begin(&np->swstats_tx_syncp);
-                               np->stat_tx_packets++;
-                               np->stat_tx_bytes += np->get_tx_ctx->skb->len;
+                               nv_txrx_stats_inc(stat_tx_packets);
+                               len = np->get_tx_ctx->skb->len;
+                               nv_txrx_stats_add(stat_tx_bytes, len);
                                u64_stats_update_end(&np->swstats_tx_syncp);
                        }
 
@@ -2806,6 +2845,15 @@ static int nv_getlen(struct net_device *dev, void *packet, int datalen)
        }
 }
 
+static void rx_missing_handler(u32 flags, struct fe_priv *np)
+{
+       if (flags & NV_RX_MISSEDFRAME) {
+               u64_stats_update_begin(&np->swstats_rx_syncp);
+               nv_txrx_stats_inc(stat_rx_missed_errors);
+               u64_stats_update_end(&np->swstats_rx_syncp);
+       }
+}
+
 static int nv_rx_process(struct net_device *dev, int limit)
 {
        struct fe_priv *np = netdev_priv(dev);
@@ -2848,11 +2896,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
                                        }
                                        /* the rest are hard errors */
                                        else {
-                                               if (flags & NV_RX_MISSEDFRAME) {
-                                                       u64_stats_update_begin(&np->swstats_rx_syncp);
-                                                       np->stat_rx_missed_errors++;
-                                                       u64_stats_update_end(&np->swstats_rx_syncp);
-                                               }
+                                               rx_missing_handler(flags, np);
                                                dev_kfree_skb(skb);
                                                goto next_pkt;
                                        }
@@ -2896,8 +2940,8 @@ static int nv_rx_process(struct net_device *dev, int limit)
                skb->protocol = eth_type_trans(skb, dev);
                napi_gro_receive(&np->napi, skb);
                u64_stats_update_begin(&np->swstats_rx_syncp);
-               np->stat_rx_packets++;
-               np->stat_rx_bytes += len;
+               nv_txrx_stats_inc(stat_rx_packets);
+               nv_txrx_stats_add(stat_rx_bytes, len);
                u64_stats_update_end(&np->swstats_rx_syncp);
 next_pkt:
                if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
@@ -2982,8 +3026,8 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
                        }
                        napi_gro_receive(&np->napi, skb);
                        u64_stats_update_begin(&np->swstats_rx_syncp);
-                       np->stat_rx_packets++;
-                       np->stat_rx_bytes += len;
+                       nv_txrx_stats_inc(stat_rx_packets);
+                       nv_txrx_stats_add(stat_rx_bytes, len);
                        u64_stats_update_end(&np->swstats_rx_syncp);
                } else {
                        dev_kfree_skb(skb);
@@ -5651,6 +5695,12 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
        SET_NETDEV_DEV(dev, &pci_dev->dev);
        u64_stats_init(&np->swstats_rx_syncp);
        u64_stats_init(&np->swstats_tx_syncp);
+       np->txrx_stats = alloc_percpu(struct nv_txrx_stats);
+       if (!np->txrx_stats) {
+               pr_err("np->txrx_stats, alloc memory error.\n");
+               err = -ENOMEM;
+               goto out_alloc_percpu;
+       }
 
        timer_setup(&np->oom_kick, nv_do_rx_refill, 0);
        timer_setup(&np->nic_poll, nv_do_nic_poll, 0);
@@ -6060,6 +6110,8 @@ out_relreg:
 out_disable:
        pci_disable_device(pci_dev);
 out_free:
+       free_percpu(np->txrx_stats);
+out_alloc_percpu:
        free_netdev(dev);
 out:
        return err;
@@ -6105,6 +6157,9 @@ static void nv_restore_mac_addr(struct pci_dev *pci_dev)
 static void nv_remove(struct pci_dev *pci_dev)
 {
        struct net_device *dev = pci_get_drvdata(pci_dev);
+       struct fe_priv *np = netdev_priv(dev);
+
+       free_percpu(np->txrx_stats);
 
        unregister_netdev(dev);