From: Sunil Goutham Date: Thu, 1 Dec 2016 12:54:28 +0000 (+0530) Subject: net: thunderx: Fix transmit queue timeout issue X-Git-Tag: v4.13~1792^2~122 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=bd3ad7d3a14b07aeeb4f92abc757672719e2a0eb;p=mirror_ubuntu-bionic-kernel.git net: thunderx: Fix transmit queue timeout issue Transmit queue timeout issue is seen in two cases - Due to a race condition btw setting stop_queue at xmit() and checking for stopped_queue in NAPI poll routine, at times transmission from a SQ comes to a halt. This is fixed by using barriers and also added a check for SQ free descriptors, incase SQ is stopped and there are only CQE_RX i.e no CQE_TX. - Contrary to an assumption, a HW errata where HW doesn't stop transmission even though there are not enough CQEs available for a CQE_TX is not fixed in T88 pass 2.x. This results in a Qset error with 'CQ_WR_FULL' stalling transmission. This is fixed by adjusting RXQ's RED levels for CQ level such that there is always enough space left for CQE_TXs. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 1eacec85dac3..2006f58b14b1 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -644,6 +644,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; + struct snd_queue *sq; unsigned int tx_pkts = 0, tx_bytes = 0; spin_lock_bh(&cq->lock); @@ -709,16 +710,20 @@ loop: done: /* Wakeup TXQ if its stopped earlier due to SQ full */ - if (tx_done) { + sq = &nic->qs->sq[cq_idx]; + if (tx_done || + (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { netdev = nic->pnicvf->netdev; txq = netdev_get_tx_queue(netdev, nicvf_netdev_qidx(nic, cq_idx)); if (tx_pkts) netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); - nic = nic->pnicvf; + /* To read updated queue and carrier status */ + smp_mb(); if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { - netif_tx_start_queue(txq); + netif_tx_wake_queue(txq); + nic = nic->pnicvf; this_cpu_inc(nic->drv_stats->txq_wake); if (netif_msg_tx_err(nic)) netdev_warn(netdev, @@ -1054,6 +1059,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) struct nicvf *nic = netdev_priv(netdev); int qid = skb_get_queue_mapping(skb); struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); + struct nicvf *snic; + struct snd_queue *sq; + int tmp; /* Check for minimum packet length */ if (skb->len <= ETH_HLEN) { @@ -1061,13 +1069,39 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } - if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) { + snic = nic; + /* Get secondary Qset's SQ structure */ + if (qid >= MAX_SND_QUEUES_PER_QS) { + tmp = qid / MAX_SND_QUEUES_PER_QS; + snic = (struct nicvf *)nic->snicvf[tmp - 1]; + if (!snic) { + netdev_warn(nic->netdev, + "Secondary Qset#%d's ptr not initialized\n", + tmp - 1); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + qid = qid % MAX_SND_QUEUES_PER_QS; + } + + sq = &snic->qs->sq[qid]; + if (!netif_tx_queue_stopped(txq) && + !nicvf_sq_append_skb(snic, sq, skb, qid)) { netif_tx_stop_queue(txq); - this_cpu_inc(nic->drv_stats->txq_stop); - if (netif_msg_tx_err(nic)) - netdev_warn(netdev, - "%s: Transmit ring full, stopping SQ%d\n", - netdev->name, qid); + + /* Barrier, so that stop_queue visible to other cpus */ + smp_mb(); + + /* Check again, incase another cpu freed descriptors */ + if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) { + netif_tx_wake_queue(txq); + } else { + this_cpu_inc(nic->drv_stats->txq_stop); + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit ring full, stopping SQ%d\n", + netdev->name, qid); + } return NETDEV_TX_BUSY; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 7b336cda66e8..d2ac133e36f1 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1190,30 +1190,12 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, } /* Append an skb to a SQ for packet transfer. */ -int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) +int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, + struct sk_buff *skb, u8 sq_num) { int i, size; int subdesc_cnt, tso_sqe = 0; - int sq_num, qentry; - struct queue_set *qs; - struct snd_queue *sq; - - sq_num = skb_get_queue_mapping(skb); - if (sq_num >= MAX_SND_QUEUES_PER_QS) { - /* Get secondary Qset's SQ structure */ - i = sq_num / MAX_SND_QUEUES_PER_QS; - if (!nic->snicvf[i - 1]) { - netdev_warn(nic->netdev, - "Secondary Qset#%d's ptr not initialized\n", - i - 1); - return 1; - } - nic = (struct nicvf *)nic->snicvf[i - 1]; - sq_num = sq_num % MAX_SND_QUEUES_PER_QS; - } - - qs = nic->qs; - sq = &qs->sq[sq_num]; + int qentry; subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); if (subdesc_cnt > atomic_read(&sq->free_cnt)) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 20511f2cb134..9e2104675bc9 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -88,13 +88,13 @@ /* RED and Backpressure levels of CQ for pkt reception * For CQ, level is a measure of emptiness i.e 0x0 means full - * eg: For CQ of size 4K, and for pass/drop levels of 128/96 - * HW accepts pkt if unused CQE >= 2048 - * RED accepts pkt if unused CQE < 2048 & >= 1536 - * DROPs pkts if unused CQE < 1536 + * eg: For CQ of size 4K, and for pass/drop levels of 160/144 + * HW accepts pkt if unused CQE >= 2560 + * RED accepts pkt if unused CQE < 2304 & >= 2560 + * DROPs pkts if unused CQE < 2304 */ -#define RQ_PASS_CQ_LVL 128ULL -#define RQ_DROP_CQ_LVL 96ULL +#define RQ_PASS_CQ_LVL 160ULL +#define RQ_DROP_CQ_LVL 144ULL /* RED and Backpressure levels of RBDR for pkt reception * For RBDR, level is a measure of fullness i.e 0x0 means empty @@ -306,7 +306,8 @@ void nicvf_sq_disable(struct nicvf *nic, int qidx); void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt); void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, int qidx); -int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb); +int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, + struct sk_buff *skb, u8 sq_num); struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); void nicvf_rbdr_task(unsigned long data);