2 * Copyright (C) 2015 Cavium, Inc.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License
6 * as published by the Free Software Foundation.
10 #include <linux/netdevice.h>
12 #include <linux/etherdevice.h>
13 #include <linux/iommu.h>
20 #include "nicvf_queues.h"
22 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue
*sq
, int qentry
,
24 static void nicvf_get_page(struct nicvf
*nic
)
26 if (!nic
->rb_pageref
|| !nic
->rb_page
)
29 page_ref_add(nic
->rb_page
, nic
->rb_pageref
);
33 /* Poll a register for a specific value */
34 static int nicvf_poll_reg(struct nicvf
*nic
, int qidx
,
35 u64 reg
, int bit_pos
, int bits
, int val
)
41 bit_mask
= (1ULL << bits
) - 1;
42 bit_mask
= (bit_mask
<< bit_pos
);
45 reg_val
= nicvf_queue_reg_read(nic
, reg
, qidx
);
46 if (((reg_val
& bit_mask
) >> bit_pos
) == val
)
48 usleep_range(1000, 2000);
51 netdev_err(nic
->netdev
, "Poll on reg 0x%llx failed\n", reg
);
55 /* Allocate memory for a queue's descriptors */
56 static int nicvf_alloc_q_desc_mem(struct nicvf
*nic
, struct q_desc_mem
*dmem
,
57 int q_len
, int desc_size
, int align_bytes
)
60 dmem
->size
= (desc_size
* q_len
) + align_bytes
;
61 /* Save address, need it while freeing */
62 dmem
->unalign_base
= dma_zalloc_coherent(&nic
->pdev
->dev
, dmem
->size
,
63 &dmem
->dma
, GFP_KERNEL
);
64 if (!dmem
->unalign_base
)
67 /* Align memory address for 'align_bytes' */
68 dmem
->phys_base
= NICVF_ALIGNED_ADDR((u64
)dmem
->dma
, align_bytes
);
69 dmem
->base
= dmem
->unalign_base
+ (dmem
->phys_base
- dmem
->dma
);
73 /* Free queue's descriptor memory */
74 static void nicvf_free_q_desc_mem(struct nicvf
*nic
, struct q_desc_mem
*dmem
)
79 dma_free_coherent(&nic
->pdev
->dev
, dmem
->size
,
80 dmem
->unalign_base
, dmem
->dma
);
81 dmem
->unalign_base
= NULL
;
85 #define XDP_PAGE_REFCNT_REFILL 256
87 /* Allocate a new page or recycle one if possible
89 * We cannot optimize dma mapping here, since
90 * 1. It's only one RBDR ring for 8 Rx queues.
91 * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed
92 * and not idx into RBDR ring, so can't refer to saved info.
93 * 3. There are multiple receive buffers per page
95 static inline struct pgcache
*nicvf_alloc_page(struct nicvf
*nic
,
96 struct rbdr
*rbdr
, gfp_t gfp
)
99 struct page
*page
= NULL
;
100 struct pgcache
*pgcache
, *next
;
102 /* Check if page is already allocated */
103 pgcache
= &rbdr
->pgcache
[rbdr
->pgidx
];
104 page
= pgcache
->page
;
105 /* Check if page can be recycled */
107 ref_count
= page_ref_count(page
);
108 /* Check if this page has been used once i.e 'put_page'
109 * called after packet transmission i.e internal ref_count
110 * and page's ref_count are equal i.e page can be recycled.
112 if (rbdr
->is_xdp
&& (ref_count
== pgcache
->ref_count
))
113 pgcache
->ref_count
--;
117 /* In non-XDP mode, page's ref_count needs to be '1' for it
120 if (!rbdr
->is_xdp
&& (ref_count
!= 1))
125 page
= alloc_pages(gfp
| __GFP_COMP
| __GFP_NOWARN
, 0);
129 this_cpu_inc(nic
->pnicvf
->drv_stats
->page_alloc
);
131 /* Check for space */
132 if (rbdr
->pgalloc
>= rbdr
->pgcnt
) {
133 /* Page can still be used */
138 /* Save the page in page cache */
139 pgcache
->page
= page
;
140 pgcache
->dma_addr
= 0;
141 pgcache
->ref_count
= 0;
145 /* Take additional page references for recycling */
147 /* Since there is single RBDR (i.e single core doing
148 * page recycling) per 8 Rx queues, in XDP mode adjusting
149 * page references atomically is the biggest bottleneck, so
150 * take bunch of references at a time.
152 * So here, below reference counts defer by '1'.
154 if (!pgcache
->ref_count
) {
155 pgcache
->ref_count
= XDP_PAGE_REFCNT_REFILL
;
156 page_ref_add(page
, XDP_PAGE_REFCNT_REFILL
);
159 /* In non-XDP case, single 64K page is divided across multiple
160 * receive buffers, so cost of recycling is less anyway.
161 * So we can do with just one extra reference.
163 page_ref_add(page
, 1);
167 rbdr
->pgidx
&= (rbdr
->pgcnt
- 1);
169 /* Prefetch refcount of next page in page cache */
170 next
= &rbdr
->pgcache
[rbdr
->pgidx
];
173 prefetch(&page
->_refcount
);
178 /* Allocate buffer for packet reception */
179 static inline int nicvf_alloc_rcv_buffer(struct nicvf
*nic
, struct rbdr
*rbdr
,
180 gfp_t gfp
, u32 buf_len
, u64
*rbuf
)
182 struct pgcache
*pgcache
= NULL
;
184 /* Check if request can be accomodated in previous allocated page.
185 * But in XDP mode only one buffer per page is permitted.
187 if (!rbdr
->is_xdp
&& nic
->rb_page
&&
188 ((nic
->rb_page_offset
+ buf_len
) <= PAGE_SIZE
)) {
196 /* Get new page, either recycled or new one */
197 pgcache
= nicvf_alloc_page(nic
, rbdr
, gfp
);
198 if (!pgcache
&& !nic
->rb_page
) {
199 this_cpu_inc(nic
->pnicvf
->drv_stats
->rcv_buffer_alloc_failures
);
203 nic
->rb_page_offset
= 0;
205 /* Reserve space for header modifications by BPF program */
207 buf_len
+= XDP_PACKET_HEADROOM
;
209 /* Check if it's recycled */
211 nic
->rb_page
= pgcache
->page
;
213 if (rbdr
->is_xdp
&& pgcache
&& pgcache
->dma_addr
) {
214 *rbuf
= pgcache
->dma_addr
;
216 /* HW will ensure data coherency, CPU sync not required */
217 *rbuf
= (u64
)dma_map_page_attrs(&nic
->pdev
->dev
, nic
->rb_page
,
218 nic
->rb_page_offset
, buf_len
,
220 DMA_ATTR_SKIP_CPU_SYNC
);
221 if (dma_mapping_error(&nic
->pdev
->dev
, (dma_addr_t
)*rbuf
)) {
222 if (!nic
->rb_page_offset
)
223 __free_pages(nic
->rb_page
, 0);
228 pgcache
->dma_addr
= *rbuf
+ XDP_PACKET_HEADROOM
;
229 nic
->rb_page_offset
+= buf_len
;
235 /* Build skb around receive buffer */
236 static struct sk_buff
*nicvf_rb_ptr_to_skb(struct nicvf
*nic
,
242 data
= phys_to_virt(rb_ptr
);
244 /* Now build an skb to give to stack */
245 skb
= build_skb(data
, RCV_FRAG_LEN
);
247 put_page(virt_to_page(data
));
255 /* Allocate RBDR ring and populate receive buffers */
256 static int nicvf_init_rbdr(struct nicvf
*nic
, struct rbdr
*rbdr
,
257 int ring_len
, int buf_size
)
261 struct rbdr_entry_t
*desc
;
264 err
= nicvf_alloc_q_desc_mem(nic
, &rbdr
->dmem
, ring_len
,
265 sizeof(struct rbdr_entry_t
),
266 NICVF_RCV_BUF_ALIGN_BYTES
);
270 rbdr
->desc
= rbdr
->dmem
.base
;
271 /* Buffer size has to be in multiples of 128 bytes */
272 rbdr
->dma_size
= buf_size
;
274 rbdr
->thresh
= RBDR_THRESH
;
278 /* Initialize page recycling stuff.
280 * Can't use single buffer per page especially with 64K pages.
281 * On embedded platforms i.e 81xx/83xx available memory itself
282 * is low and minimum ring size of RBDR is 8K, that takes away
285 * But for XDP it has to be a single buffer per page.
287 if (!nic
->pnicvf
->xdp_prog
) {
288 rbdr
->pgcnt
= ring_len
/ (PAGE_SIZE
/ buf_size
);
289 rbdr
->is_xdp
= false;
291 rbdr
->pgcnt
= ring_len
;
294 rbdr
->pgcnt
= roundup_pow_of_two(rbdr
->pgcnt
);
295 rbdr
->pgcache
= kzalloc(sizeof(*rbdr
->pgcache
) *
296 rbdr
->pgcnt
, GFP_KERNEL
);
303 for (idx
= 0; idx
< ring_len
; idx
++) {
304 err
= nicvf_alloc_rcv_buffer(nic
, rbdr
, GFP_KERNEL
,
305 RCV_FRAG_LEN
, &rbuf
);
307 /* To free already allocated and mapped ones */
308 rbdr
->tail
= idx
- 1;
312 desc
= GET_RBDR_DESC(rbdr
, idx
);
313 desc
->buf_addr
= rbuf
& ~(NICVF_RCV_BUF_ALIGN_BYTES
- 1);
321 /* Free RBDR ring and its receive buffers */
322 static void nicvf_free_rbdr(struct nicvf
*nic
, struct rbdr
*rbdr
)
325 u64 buf_addr
, phys_addr
;
326 struct pgcache
*pgcache
;
327 struct rbdr_entry_t
*desc
;
332 rbdr
->enable
= false;
333 if (!rbdr
->dmem
.base
)
339 /* Release page references */
340 while (head
!= tail
) {
341 desc
= GET_RBDR_DESC(rbdr
, head
);
342 buf_addr
= desc
->buf_addr
;
343 phys_addr
= nicvf_iova_to_phys(nic
, buf_addr
);
344 dma_unmap_page_attrs(&nic
->pdev
->dev
, buf_addr
, RCV_FRAG_LEN
,
345 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
347 put_page(virt_to_page(phys_to_virt(phys_addr
)));
349 head
&= (rbdr
->dmem
.q_len
- 1);
351 /* Release buffer of tail desc */
352 desc
= GET_RBDR_DESC(rbdr
, tail
);
353 buf_addr
= desc
->buf_addr
;
354 phys_addr
= nicvf_iova_to_phys(nic
, buf_addr
);
355 dma_unmap_page_attrs(&nic
->pdev
->dev
, buf_addr
, RCV_FRAG_LEN
,
356 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
358 put_page(virt_to_page(phys_to_virt(phys_addr
)));
360 /* Sync page cache info */
363 /* Release additional page references held for recycling */
365 while (head
< rbdr
->pgcnt
) {
366 pgcache
= &rbdr
->pgcache
[head
];
367 if (pgcache
->page
&& page_ref_count(pgcache
->page
) != 0) {
369 put_page(pgcache
->page
);
372 page_ref_sub(pgcache
->page
, pgcache
->ref_count
- 1);
373 put_page(pgcache
->page
);
379 nicvf_free_q_desc_mem(nic
, &rbdr
->dmem
);
382 /* Refill receive buffer descriptors with new buffers.
384 static void nicvf_refill_rbdr(struct nicvf
*nic
, gfp_t gfp
)
386 struct queue_set
*qs
= nic
->qs
;
387 int rbdr_idx
= qs
->rbdr_cnt
;
391 struct rbdr_entry_t
*desc
;
399 rbdr
= &qs
->rbdr
[rbdr_idx
];
400 /* Check if it's enabled */
404 /* Get no of desc's to be refilled */
405 qcount
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_STATUS0
, rbdr_idx
);
407 /* Doorbell can be ringed with a max of ring size minus 1 */
408 if (qcount
>= (qs
->rbdr_len
- 1))
411 refill_rb_cnt
= qs
->rbdr_len
- qcount
- 1;
413 /* Sync page cache info */
416 /* Start filling descs from tail */
417 tail
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_TAIL
, rbdr_idx
) >> 3;
418 while (refill_rb_cnt
) {
420 tail
&= (rbdr
->dmem
.q_len
- 1);
422 if (nicvf_alloc_rcv_buffer(nic
, rbdr
, gfp
, RCV_FRAG_LEN
, &rbuf
))
425 desc
= GET_RBDR_DESC(rbdr
, tail
);
426 desc
->buf_addr
= rbuf
& ~(NICVF_RCV_BUF_ALIGN_BYTES
- 1);
433 /* make sure all memory stores are done before ringing doorbell */
436 /* Check if buffer allocation failed */
438 nic
->rb_alloc_fail
= true;
440 nic
->rb_alloc_fail
= false;
443 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_DOOR
,
446 /* Re-enable RBDR interrupts only if buffer allocation is success */
447 if (!nic
->rb_alloc_fail
&& rbdr
->enable
&&
448 netif_running(nic
->pnicvf
->netdev
))
449 nicvf_enable_intr(nic
, NICVF_INTR_RBDR
, rbdr_idx
);
455 /* Alloc rcv buffers in non-atomic mode for better success */
456 void nicvf_rbdr_work(struct work_struct
*work
)
458 struct nicvf
*nic
= container_of(work
, struct nicvf
, rbdr_work
.work
);
460 nicvf_refill_rbdr(nic
, GFP_KERNEL
);
461 if (nic
->rb_alloc_fail
)
462 schedule_delayed_work(&nic
->rbdr_work
, msecs_to_jiffies(10));
464 nic
->rb_work_scheduled
= false;
467 /* In Softirq context, alloc rcv buffers in atomic mode */
468 void nicvf_rbdr_task(unsigned long data
)
470 struct nicvf
*nic
= (struct nicvf
*)data
;
472 nicvf_refill_rbdr(nic
, GFP_ATOMIC
);
473 if (nic
->rb_alloc_fail
) {
474 nic
->rb_work_scheduled
= true;
475 schedule_delayed_work(&nic
->rbdr_work
, msecs_to_jiffies(10));
479 /* Initialize completion queue */
480 static int nicvf_init_cmp_queue(struct nicvf
*nic
,
481 struct cmp_queue
*cq
, int q_len
)
485 err
= nicvf_alloc_q_desc_mem(nic
, &cq
->dmem
, q_len
, CMP_QUEUE_DESC_SIZE
,
486 NICVF_CQ_BASE_ALIGN_BYTES
);
490 cq
->desc
= cq
->dmem
.base
;
491 cq
->thresh
= pass1_silicon(nic
->pdev
) ? 0 : CMP_QUEUE_CQE_THRESH
;
492 nic
->cq_coalesce_usecs
= (CMP_QUEUE_TIMER_THRESH
* 0.05) - 1;
497 static void nicvf_free_cmp_queue(struct nicvf
*nic
, struct cmp_queue
*cq
)
504 nicvf_free_q_desc_mem(nic
, &cq
->dmem
);
507 /* Initialize transmit queue */
508 static int nicvf_init_snd_queue(struct nicvf
*nic
,
509 struct snd_queue
*sq
, int q_len
, int qidx
)
513 err
= nicvf_alloc_q_desc_mem(nic
, &sq
->dmem
, q_len
, SND_QUEUE_DESC_SIZE
,
514 NICVF_SQ_BASE_ALIGN_BYTES
);
518 sq
->desc
= sq
->dmem
.base
;
519 sq
->skbuff
= kcalloc(q_len
, sizeof(u64
), GFP_KERNEL
);
525 sq
->thresh
= SND_QUEUE_THRESH
;
527 /* Check if this SQ is a XDP TX queue */
529 qidx
+= ((nic
->sqs_id
+ 1) * MAX_SND_QUEUES_PER_QS
);
530 if (qidx
< nic
->pnicvf
->xdp_tx_queues
) {
531 /* Alloc memory to save page pointers for XDP_TX */
532 sq
->xdp_page
= kcalloc(q_len
, sizeof(u64
), GFP_KERNEL
);
535 sq
->xdp_desc_cnt
= 0;
536 sq
->xdp_free_cnt
= q_len
- 1;
540 sq
->xdp_desc_cnt
= 0;
541 sq
->xdp_free_cnt
= 0;
544 atomic_set(&sq
->free_cnt
, q_len
- 1);
546 /* Preallocate memory for TSO segment's header */
547 sq
->tso_hdrs
= dma_alloc_coherent(&nic
->pdev
->dev
,
548 q_len
* TSO_HEADER_SIZE
,
558 void nicvf_unmap_sndq_buffers(struct nicvf
*nic
, struct snd_queue
*sq
,
559 int hdr_sqe
, u8 subdesc_cnt
)
562 struct sq_gather_subdesc
*gather
;
564 /* Unmap DMA mapped skb data buffers */
565 for (idx
= 0; idx
< subdesc_cnt
; idx
++) {
567 hdr_sqe
&= (sq
->dmem
.q_len
- 1);
568 gather
= (struct sq_gather_subdesc
*)GET_SQ_DESC(sq
, hdr_sqe
);
569 /* HW will ensure data coherency, CPU sync not required */
570 dma_unmap_page_attrs(&nic
->pdev
->dev
, gather
->addr
,
571 gather
->size
, DMA_TO_DEVICE
,
572 DMA_ATTR_SKIP_CPU_SYNC
);
576 static void nicvf_free_snd_queue(struct nicvf
*nic
, struct snd_queue
*sq
)
580 struct sq_hdr_subdesc
*hdr
;
581 struct sq_hdr_subdesc
*tso_sqe
;
589 dma_free_coherent(&nic
->pdev
->dev
,
590 sq
->dmem
.q_len
* TSO_HEADER_SIZE
,
591 sq
->tso_hdrs
, sq
->tso_hdrs_phys
);
593 /* Free pending skbs in the queue */
595 while (sq
->head
!= sq
->tail
) {
596 skb
= (struct sk_buff
*)sq
->skbuff
[sq
->head
];
597 if (!skb
|| !sq
->xdp_page
)
600 page
= (struct page
*)sq
->xdp_page
[sq
->head
];
606 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, sq
->head
);
607 /* Check for dummy descriptor used for HW TSO offload on 88xx */
608 if (hdr
->dont_send
) {
609 /* Get actual TSO descriptors and unmap them */
611 (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, hdr
->rsvd2
);
612 nicvf_unmap_sndq_buffers(nic
, sq
, hdr
->rsvd2
,
613 tso_sqe
->subdesc_cnt
);
615 nicvf_unmap_sndq_buffers(nic
, sq
, sq
->head
,
619 dev_kfree_skb_any(skb
);
622 sq
->head
&= (sq
->dmem
.q_len
- 1);
626 nicvf_free_q_desc_mem(nic
, &sq
->dmem
);
629 static void nicvf_reclaim_snd_queue(struct nicvf
*nic
,
630 struct queue_set
*qs
, int qidx
)
632 /* Disable send queue */
633 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, 0);
634 /* Check if SQ is stopped */
635 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_SQ_0_7_STATUS
, 21, 1, 0x01))
637 /* Reset send queue */
638 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, NICVF_SQ_RESET
);
641 static void nicvf_reclaim_rcv_queue(struct nicvf
*nic
,
642 struct queue_set
*qs
, int qidx
)
644 union nic_mbx mbx
= {};
646 /* Make sure all packets in the pipeline are written back into mem */
647 mbx
.msg
.msg
= NIC_MBOX_MSG_RQ_SW_SYNC
;
648 nicvf_send_msg_to_pf(nic
, &mbx
);
651 static void nicvf_reclaim_cmp_queue(struct nicvf
*nic
,
652 struct queue_set
*qs
, int qidx
)
654 /* Disable timer threshold (doesn't get reset upon CQ reset */
655 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG2
, qidx
, 0);
656 /* Disable completion queue */
657 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, 0);
658 /* Reset completion queue */
659 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, NICVF_CQ_RESET
);
662 static void nicvf_reclaim_rbdr(struct nicvf
*nic
,
663 struct rbdr
*rbdr
, int qidx
)
668 /* Save head and tail pointers for feeing up buffers */
669 rbdr
->head
= nicvf_queue_reg_read(nic
,
670 NIC_QSET_RBDR_0_1_HEAD
,
672 rbdr
->tail
= nicvf_queue_reg_read(nic
,
673 NIC_QSET_RBDR_0_1_TAIL
,
676 /* If RBDR FIFO is in 'FAIL' state then do a reset first
679 fifo_state
= nicvf_queue_reg_read(nic
, NIC_QSET_RBDR_0_1_STATUS0
, qidx
);
680 if (((fifo_state
>> 62) & 0x03) == 0x3)
681 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
682 qidx
, NICVF_RBDR_RESET
);
685 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
, qidx
, 0);
686 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x00))
689 tmp
= nicvf_queue_reg_read(nic
,
690 NIC_QSET_RBDR_0_1_PREFETCH_STATUS
,
692 if ((tmp
& 0xFFFFFFFF) == ((tmp
>> 32) & 0xFFFFFFFF))
694 usleep_range(1000, 2000);
697 netdev_err(nic
->netdev
,
698 "Failed polling on prefetch status\n");
702 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
703 qidx
, NICVF_RBDR_RESET
);
705 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x02))
707 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
, qidx
, 0x00);
708 if (nicvf_poll_reg(nic
, qidx
, NIC_QSET_RBDR_0_1_STATUS0
, 62, 2, 0x00))
712 void nicvf_config_vlan_stripping(struct nicvf
*nic
, netdev_features_t features
)
717 rq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_RQ_GEN_CFG
, 0);
719 /* Enable first VLAN stripping */
720 if (features
& NETIF_F_HW_VLAN_CTAG_RX
)
721 rq_cfg
|= (1ULL << 25);
723 rq_cfg
&= ~(1ULL << 25);
724 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_GEN_CFG
, 0, rq_cfg
);
726 /* Configure Secondary Qsets, if any */
727 for (sqs
= 0; sqs
< nic
->sqs_count
; sqs
++)
728 if (nic
->snicvf
[sqs
])
729 nicvf_queue_reg_write(nic
->snicvf
[sqs
],
730 NIC_QSET_RQ_GEN_CFG
, 0, rq_cfg
);
733 static void nicvf_reset_rcv_queue_stats(struct nicvf
*nic
)
735 union nic_mbx mbx
= {};
737 /* Reset all RQ/SQ and VF stats */
738 mbx
.reset_stat
.msg
= NIC_MBOX_MSG_RESET_STAT_COUNTER
;
739 mbx
.reset_stat
.rx_stat_mask
= 0x3FFF;
740 mbx
.reset_stat
.tx_stat_mask
= 0x1F;
741 mbx
.reset_stat
.rq_stat_mask
= 0xFFFF;
742 mbx
.reset_stat
.sq_stat_mask
= 0xFFFF;
743 nicvf_send_msg_to_pf(nic
, &mbx
);
746 /* Configures receive queue */
747 static void nicvf_rcv_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
748 int qidx
, bool enable
)
750 union nic_mbx mbx
= {};
751 struct rcv_queue
*rq
;
752 struct rq_cfg rq_cfg
;
757 /* Disable receive queue */
758 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_0_7_CFG
, qidx
, 0);
761 nicvf_reclaim_rcv_queue(nic
, qs
, qidx
);
765 rq
->cq_qs
= qs
->vnic_id
;
767 rq
->start_rbdr_qs
= qs
->vnic_id
;
768 rq
->start_qs_rbdr_idx
= qs
->rbdr_cnt
- 1;
769 rq
->cont_rbdr_qs
= qs
->vnic_id
;
770 rq
->cont_qs_rbdr_idx
= qs
->rbdr_cnt
- 1;
771 /* all writes of RBDR data to be loaded into L2 Cache as well*/
774 /* Send a mailbox msg to PF to config RQ */
775 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_CFG
;
776 mbx
.rq
.qs_num
= qs
->vnic_id
;
777 mbx
.rq
.rq_num
= qidx
;
778 mbx
.rq
.cfg
= (rq
->caching
<< 26) | (rq
->cq_qs
<< 19) |
779 (rq
->cq_idx
<< 16) | (rq
->cont_rbdr_qs
<< 9) |
780 (rq
->cont_qs_rbdr_idx
<< 8) |
781 (rq
->start_rbdr_qs
<< 1) | (rq
->start_qs_rbdr_idx
);
782 nicvf_send_msg_to_pf(nic
, &mbx
);
784 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_BP_CFG
;
785 mbx
.rq
.cfg
= BIT_ULL(63) | BIT_ULL(62) |
786 (RQ_PASS_RBDR_LVL
<< 16) | (RQ_PASS_CQ_LVL
<< 8) |
788 nicvf_send_msg_to_pf(nic
, &mbx
);
791 * Enable CQ drop to reserve sufficient CQEs for all tx packets
793 mbx
.rq
.msg
= NIC_MBOX_MSG_RQ_DROP_CFG
;
794 mbx
.rq
.cfg
= BIT_ULL(63) | BIT_ULL(62) |
795 (RQ_PASS_RBDR_LVL
<< 40) | (RQ_DROP_RBDR_LVL
<< 32) |
796 (RQ_PASS_CQ_LVL
<< 16) | (RQ_DROP_CQ_LVL
<< 8);
797 nicvf_send_msg_to_pf(nic
, &mbx
);
799 if (!nic
->sqs_mode
&& (qidx
== 0)) {
800 /* Enable checking L3/L4 length and TCP/UDP checksums
801 * Also allow IPv6 pkts with zero UDP checksum.
803 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_GEN_CFG
, 0,
804 (BIT(24) | BIT(23) | BIT(21) | BIT(20)));
805 nicvf_config_vlan_stripping(nic
, nic
->netdev
->features
);
808 /* Enable Receive queue */
809 memset(&rq_cfg
, 0, sizeof(struct rq_cfg
));
812 nicvf_queue_reg_write(nic
, NIC_QSET_RQ_0_7_CFG
, qidx
, *(u64
*)&rq_cfg
);
815 /* Configures completion queue */
816 void nicvf_cmp_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
817 int qidx
, bool enable
)
819 struct cmp_queue
*cq
;
820 struct cq_cfg cq_cfg
;
826 nicvf_reclaim_cmp_queue(nic
, qs
, qidx
);
830 /* Reset completion queue */
831 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, NICVF_CQ_RESET
);
836 spin_lock_init(&cq
->lock
);
837 /* Set completion queue base address */
838 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_BASE
,
839 qidx
, (u64
)(cq
->dmem
.phys_base
));
841 /* Enable Completion queue */
842 memset(&cq_cfg
, 0, sizeof(struct cq_cfg
));
846 cq_cfg
.qsize
= ilog2(qs
->cq_len
>> 10);
848 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG
, qidx
, *(u64
*)&cq_cfg
);
850 /* Set threshold value for interrupt generation */
851 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_THRESH
, qidx
, cq
->thresh
);
852 nicvf_queue_reg_write(nic
, NIC_QSET_CQ_0_7_CFG2
,
853 qidx
, CMP_QUEUE_TIMER_THRESH
);
856 /* Configures transmit queue */
857 static void nicvf_snd_queue_config(struct nicvf
*nic
, struct queue_set
*qs
,
858 int qidx
, bool enable
)
860 union nic_mbx mbx
= {};
861 struct snd_queue
*sq
;
862 struct sq_cfg sq_cfg
;
868 nicvf_reclaim_snd_queue(nic
, qs
, qidx
);
872 /* Reset send queue */
873 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, NICVF_SQ_RESET
);
875 sq
->cq_qs
= qs
->vnic_id
;
878 /* Send a mailbox msg to PF to config SQ */
879 mbx
.sq
.msg
= NIC_MBOX_MSG_SQ_CFG
;
880 mbx
.sq
.qs_num
= qs
->vnic_id
;
881 mbx
.sq
.sq_num
= qidx
;
882 mbx
.sq
.sqs_mode
= nic
->sqs_mode
;
883 mbx
.sq
.cfg
= (sq
->cq_qs
<< 3) | sq
->cq_idx
;
884 nicvf_send_msg_to_pf(nic
, &mbx
);
886 /* Set queue base address */
887 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_BASE
,
888 qidx
, (u64
)(sq
->dmem
.phys_base
));
890 /* Enable send queue & set queue size */
891 memset(&sq_cfg
, 0, sizeof(struct sq_cfg
));
895 sq_cfg
.qsize
= ilog2(qs
->sq_len
>> 10);
896 sq_cfg
.tstmp_bgx_intf
= 0;
897 /* CQ's level at which HW will stop processing SQEs to avoid
898 * transmitting a pkt with no space in CQ to post CQE_TX.
900 sq_cfg
.cq_limit
= (CMP_QUEUE_PIPELINE_RSVD
* 256) / qs
->cq_len
;
901 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, *(u64
*)&sq_cfg
);
903 /* Set threshold value for interrupt generation */
904 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_THRESH
, qidx
, sq
->thresh
);
906 /* Set queue:cpu affinity for better load distribution */
907 if (cpu_online(qidx
)) {
908 cpumask_set_cpu(qidx
, &sq
->affinity_mask
);
909 netif_set_xps_queue(nic
->netdev
,
910 &sq
->affinity_mask
, qidx
);
914 /* Configures receive buffer descriptor ring */
915 static void nicvf_rbdr_config(struct nicvf
*nic
, struct queue_set
*qs
,
916 int qidx
, bool enable
)
919 struct rbdr_cfg rbdr_cfg
;
921 rbdr
= &qs
->rbdr
[qidx
];
922 nicvf_reclaim_rbdr(nic
, rbdr
, qidx
);
926 /* Set descriptor base address */
927 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_BASE
,
928 qidx
, (u64
)(rbdr
->dmem
.phys_base
));
930 /* Enable RBDR & set queue size */
931 /* Buffer size should be in multiples of 128 bytes */
932 memset(&rbdr_cfg
, 0, sizeof(struct rbdr_cfg
));
936 rbdr_cfg
.qsize
= RBDR_SIZE
;
937 rbdr_cfg
.avg_con
= 0;
938 rbdr_cfg
.lines
= rbdr
->dma_size
/ 128;
939 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_CFG
,
940 qidx
, *(u64
*)&rbdr_cfg
);
943 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_DOOR
,
944 qidx
, qs
->rbdr_len
- 1);
946 /* Set threshold value for interrupt generation */
947 nicvf_queue_reg_write(nic
, NIC_QSET_RBDR_0_1_THRESH
,
948 qidx
, rbdr
->thresh
- 1);
951 /* Requests PF to assign and enable Qset */
952 void nicvf_qset_config(struct nicvf
*nic
, bool enable
)
954 union nic_mbx mbx
= {};
955 struct queue_set
*qs
= nic
->qs
;
956 struct qs_cfg
*qs_cfg
;
959 netdev_warn(nic
->netdev
,
960 "Qset is still not allocated, don't init queues\n");
965 qs
->vnic_id
= nic
->vf_id
;
967 /* Send a mailbox msg to PF to config Qset */
968 mbx
.qs
.msg
= NIC_MBOX_MSG_QS_CFG
;
969 mbx
.qs
.num
= qs
->vnic_id
;
970 mbx
.qs
.sqs_count
= nic
->sqs_count
;
973 qs_cfg
= (struct qs_cfg
*)&mbx
.qs
.cfg
;
979 qs_cfg
->vnic
= qs
->vnic_id
;
981 nicvf_send_msg_to_pf(nic
, &mbx
);
984 static void nicvf_free_resources(struct nicvf
*nic
)
987 struct queue_set
*qs
= nic
->qs
;
989 /* Free receive buffer descriptor ring */
990 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
991 nicvf_free_rbdr(nic
, &qs
->rbdr
[qidx
]);
993 /* Free completion queue */
994 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
995 nicvf_free_cmp_queue(nic
, &qs
->cq
[qidx
]);
997 /* Free send queue */
998 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
999 nicvf_free_snd_queue(nic
, &qs
->sq
[qidx
]);
1002 static int nicvf_alloc_resources(struct nicvf
*nic
)
1005 struct queue_set
*qs
= nic
->qs
;
1007 /* Alloc receive buffer descriptor ring */
1008 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++) {
1009 if (nicvf_init_rbdr(nic
, &qs
->rbdr
[qidx
], qs
->rbdr_len
,
1014 /* Alloc send queue */
1015 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++) {
1016 if (nicvf_init_snd_queue(nic
, &qs
->sq
[qidx
], qs
->sq_len
, qidx
))
1020 /* Alloc completion queue */
1021 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++) {
1022 if (nicvf_init_cmp_queue(nic
, &qs
->cq
[qidx
], qs
->cq_len
))
1028 nicvf_free_resources(nic
);
1032 int nicvf_set_qset_resources(struct nicvf
*nic
)
1034 struct queue_set
*qs
;
1036 qs
= devm_kzalloc(&nic
->pdev
->dev
, sizeof(*qs
), GFP_KERNEL
);
1041 /* Set count of each queue */
1042 qs
->rbdr_cnt
= DEFAULT_RBDR_CNT
;
1043 qs
->rq_cnt
= min_t(u8
, MAX_RCV_QUEUES_PER_QS
, num_online_cpus());
1044 qs
->sq_cnt
= min_t(u8
, MAX_SND_QUEUES_PER_QS
, num_online_cpus());
1045 qs
->cq_cnt
= max_t(u8
, qs
->rq_cnt
, qs
->sq_cnt
);
1047 /* Set queue lengths */
1048 qs
->rbdr_len
= RCV_BUF_COUNT
;
1049 qs
->sq_len
= SND_QUEUE_LEN
;
1050 qs
->cq_len
= CMP_QUEUE_LEN
;
1052 nic
->rx_queues
= qs
->rq_cnt
;
1053 nic
->tx_queues
= qs
->sq_cnt
;
1054 nic
->xdp_tx_queues
= 0;
1059 int nicvf_config_data_transfer(struct nicvf
*nic
, bool enable
)
1061 bool disable
= false;
1062 struct queue_set
*qs
= nic
->qs
;
1063 struct queue_set
*pqs
= nic
->pnicvf
->qs
;
1069 /* Take primary VF's queue lengths.
1070 * This is needed to take queue lengths set from ethtool
1071 * into consideration.
1073 if (nic
->sqs_mode
&& pqs
) {
1074 qs
->cq_len
= pqs
->cq_len
;
1075 qs
->sq_len
= pqs
->sq_len
;
1079 if (nicvf_alloc_resources(nic
))
1082 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1083 nicvf_snd_queue_config(nic
, qs
, qidx
, enable
);
1084 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
1085 nicvf_cmp_queue_config(nic
, qs
, qidx
, enable
);
1086 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
1087 nicvf_rbdr_config(nic
, qs
, qidx
, enable
);
1088 for (qidx
= 0; qidx
< qs
->rq_cnt
; qidx
++)
1089 nicvf_rcv_queue_config(nic
, qs
, qidx
, enable
);
1091 for (qidx
= 0; qidx
< qs
->rq_cnt
; qidx
++)
1092 nicvf_rcv_queue_config(nic
, qs
, qidx
, disable
);
1093 for (qidx
= 0; qidx
< qs
->rbdr_cnt
; qidx
++)
1094 nicvf_rbdr_config(nic
, qs
, qidx
, disable
);
1095 for (qidx
= 0; qidx
< qs
->sq_cnt
; qidx
++)
1096 nicvf_snd_queue_config(nic
, qs
, qidx
, disable
);
1097 for (qidx
= 0; qidx
< qs
->cq_cnt
; qidx
++)
1098 nicvf_cmp_queue_config(nic
, qs
, qidx
, disable
);
1100 nicvf_free_resources(nic
);
1103 /* Reset RXQ's stats.
1104 * SQ's stats will get reset automatically once SQ is reset.
1106 nicvf_reset_rcv_queue_stats(nic
);
1111 /* Get a free desc from SQ
1112 * returns descriptor ponter & descriptor number
1114 static inline int nicvf_get_sq_desc(struct snd_queue
*sq
, int desc_cnt
)
1120 atomic_sub(desc_cnt
, &sq
->free_cnt
);
1122 sq
->xdp_free_cnt
-= desc_cnt
;
1123 sq
->tail
+= desc_cnt
;
1124 sq
->tail
&= (sq
->dmem
.q_len
- 1);
1129 /* Rollback to previous tail pointer when descriptors not used */
1130 static inline void nicvf_rollback_sq_desc(struct snd_queue
*sq
,
1131 int qentry
, int desc_cnt
)
1134 atomic_add(desc_cnt
, &sq
->free_cnt
);
1137 /* Free descriptor back to SQ for future use */
1138 void nicvf_put_sq_desc(struct snd_queue
*sq
, int desc_cnt
)
1141 atomic_add(desc_cnt
, &sq
->free_cnt
);
1143 sq
->xdp_free_cnt
+= desc_cnt
;
1144 sq
->head
+= desc_cnt
;
1145 sq
->head
&= (sq
->dmem
.q_len
- 1);
1148 static inline int nicvf_get_nxt_sqentry(struct snd_queue
*sq
, int qentry
)
1151 qentry
&= (sq
->dmem
.q_len
- 1);
1155 void nicvf_sq_enable(struct nicvf
*nic
, struct snd_queue
*sq
, int qidx
)
1159 sq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
);
1160 sq_cfg
|= NICVF_SQ_EN
;
1161 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, sq_cfg
);
1162 /* Ring doorbell so that H/W restarts processing SQEs */
1163 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
, qidx
, 0);
1166 void nicvf_sq_disable(struct nicvf
*nic
, int qidx
)
1170 sq_cfg
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
);
1171 sq_cfg
&= ~NICVF_SQ_EN
;
1172 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_CFG
, qidx
, sq_cfg
);
1175 void nicvf_sq_free_used_descs(struct net_device
*netdev
, struct snd_queue
*sq
,
1179 struct sk_buff
*skb
;
1180 struct nicvf
*nic
= netdev_priv(netdev
);
1181 struct sq_hdr_subdesc
*hdr
;
1183 head
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_HEAD
, qidx
) >> 4;
1184 tail
= nicvf_queue_reg_read(nic
, NIC_QSET_SQ_0_7_TAIL
, qidx
) >> 4;
1185 while (sq
->head
!= head
) {
1186 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, sq
->head
);
1187 if (hdr
->subdesc_type
!= SQ_DESC_TYPE_HEADER
) {
1188 nicvf_put_sq_desc(sq
, 1);
1191 skb
= (struct sk_buff
*)sq
->skbuff
[sq
->head
];
1193 dev_kfree_skb_any(skb
);
1194 atomic64_add(1, (atomic64_t
*)&netdev
->stats
.tx_packets
);
1195 atomic64_add(hdr
->tot_len
,
1196 (atomic64_t
*)&netdev
->stats
.tx_bytes
);
1197 nicvf_put_sq_desc(sq
, hdr
->subdesc_cnt
+ 1);
1201 /* XDP Transmit APIs */
1202 void nicvf_xdp_sq_doorbell(struct nicvf
*nic
,
1203 struct snd_queue
*sq
, int sq_num
)
1205 if (!sq
->xdp_desc_cnt
)
1208 /* make sure all memory stores are done before ringing doorbell */
1211 /* Inform HW to xmit all TSO segments */
1212 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
,
1213 sq_num
, sq
->xdp_desc_cnt
);
1214 sq
->xdp_desc_cnt
= 0;
1218 nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue
*sq
, int qentry
,
1219 int subdesc_cnt
, u64 data
, int len
)
1221 struct sq_hdr_subdesc
*hdr
;
1223 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1224 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1225 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1226 hdr
->subdesc_cnt
= subdesc_cnt
;
1229 sq
->xdp_page
[qentry
] = (u64
)virt_to_page((void *)data
);
1232 int nicvf_xdp_sq_append_pkt(struct nicvf
*nic
, struct snd_queue
*sq
,
1233 u64 bufaddr
, u64 dma_addr
, u16 len
)
1235 int subdesc_cnt
= MIN_SQ_DESC_PER_PKT_XMIT
;
1238 if (subdesc_cnt
> sq
->xdp_free_cnt
)
1241 qentry
= nicvf_get_sq_desc(sq
, subdesc_cnt
);
1243 nicvf_xdp_sq_add_hdr_subdesc(sq
, qentry
, subdesc_cnt
- 1, bufaddr
, len
);
1245 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1246 nicvf_sq_add_gather_subdesc(sq
, qentry
, len
, dma_addr
);
1248 sq
->xdp_desc_cnt
+= subdesc_cnt
;
1253 /* Calculate no of SQ subdescriptors needed to transmit all
1254 * segments of this TSO packet.
1255 * Taken from 'Tilera network driver' with a minor modification.
1257 static int nicvf_tso_count_subdescs(struct sk_buff
*skb
)
1259 struct skb_shared_info
*sh
= skb_shinfo(skb
);
1260 unsigned int sh_len
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1261 unsigned int data_len
= skb
->len
- sh_len
;
1262 unsigned int p_len
= sh
->gso_size
;
1263 long f_id
= -1; /* id of the current fragment */
1264 long f_size
= skb_headlen(skb
) - sh_len
; /* current fragment size */
1265 long f_used
= 0; /* bytes used from the current fragment */
1266 long n
; /* size of the current piece of payload */
1270 for (segment
= 0; segment
< sh
->gso_segs
; segment
++) {
1271 unsigned int p_used
= 0;
1273 /* One edesc for header and for each piece of the payload. */
1274 for (num_edescs
++; p_used
< p_len
; num_edescs
++) {
1275 /* Advance as needed. */
1276 while (f_used
>= f_size
) {
1278 f_size
= skb_frag_size(&sh
->frags
[f_id
]);
1282 /* Use bytes from the current fragment. */
1284 if (n
> f_size
- f_used
)
1285 n
= f_size
- f_used
;
1290 /* The last segment may be less than gso_size. */
1292 if (data_len
< p_len
)
1296 /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */
1297 return num_edescs
+ sh
->gso_segs
;
1300 #define POST_CQE_DESC_COUNT 2
1302 /* Get the number of SQ descriptors needed to xmit this skb */
1303 static int nicvf_sq_subdesc_required(struct nicvf
*nic
, struct sk_buff
*skb
)
1305 int subdesc_cnt
= MIN_SQ_DESC_PER_PKT_XMIT
;
1307 if (skb_shinfo(skb
)->gso_size
&& !nic
->hw_tso
) {
1308 subdesc_cnt
= nicvf_tso_count_subdescs(skb
);
1312 /* Dummy descriptors to get TSO pkt completion notification */
1313 if (nic
->t88
&& nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
)
1314 subdesc_cnt
+= POST_CQE_DESC_COUNT
;
1316 if (skb_shinfo(skb
)->nr_frags
)
1317 subdesc_cnt
+= skb_shinfo(skb
)->nr_frags
;
1322 /* Add SQ HEADER subdescriptor.
1323 * First subdescriptor for every send descriptor.
1326 nicvf_sq_add_hdr_subdesc(struct nicvf
*nic
, struct snd_queue
*sq
, int qentry
,
1327 int subdesc_cnt
, struct sk_buff
*skb
, int len
)
1330 struct sq_hdr_subdesc
*hdr
;
1337 ip
.hdr
= skb_network_header(skb
);
1338 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1339 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1340 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1342 if (nic
->t88
&& nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
) {
1343 /* post_cqe = 0, to avoid HW posting a CQE for every TSO
1344 * segment transmitted on 88xx.
1346 hdr
->subdesc_cnt
= subdesc_cnt
- POST_CQE_DESC_COUNT
;
1348 sq
->skbuff
[qentry
] = (u64
)skb
;
1349 /* Enable notification via CQE after processing SQE */
1351 /* No of subdescriptors following this */
1352 hdr
->subdesc_cnt
= subdesc_cnt
;
1356 /* Offload checksum calculation to HW */
1357 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1358 hdr
->csum_l3
= 1; /* Enable IP csum calculation */
1359 hdr
->l3_offset
= skb_network_offset(skb
);
1360 hdr
->l4_offset
= skb_transport_offset(skb
);
1362 proto
= (ip
.v4
->version
== 4) ? ip
.v4
->protocol
:
1367 hdr
->csum_l4
= SEND_L4_CSUM_TCP
;
1370 hdr
->csum_l4
= SEND_L4_CSUM_UDP
;
1373 hdr
->csum_l4
= SEND_L4_CSUM_SCTP
;
1378 if (nic
->hw_tso
&& skb_shinfo(skb
)->gso_size
) {
1380 hdr
->tso_start
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1381 hdr
->tso_max_paysize
= skb_shinfo(skb
)->gso_size
;
1382 /* For non-tunneled pkts, point this to L2 ethertype */
1383 hdr
->inner_l3_offset
= skb_network_offset(skb
) - 2;
1384 this_cpu_inc(nic
->pnicvf
->drv_stats
->tx_tso
);
1388 /* SQ GATHER subdescriptor
1389 * Must follow HDR descriptor
1391 static inline void nicvf_sq_add_gather_subdesc(struct snd_queue
*sq
, int qentry
,
1394 struct sq_gather_subdesc
*gather
;
1396 qentry
&= (sq
->dmem
.q_len
- 1);
1397 gather
= (struct sq_gather_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1399 memset(gather
, 0, SND_QUEUE_DESC_SIZE
);
1400 gather
->subdesc_type
= SQ_DESC_TYPE_GATHER
;
1401 gather
->ld_type
= NIC_SEND_LD_TYPE_E_LDD
;
1402 gather
->size
= size
;
1403 gather
->addr
= data
;
1406 /* Add HDR + IMMEDIATE subdescriptors right after descriptors of a TSO
1407 * packet so that a CQE is posted as a notifation for transmission of
1410 static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue
*sq
, int qentry
,
1411 int tso_sqe
, struct sk_buff
*skb
)
1413 struct sq_imm_subdesc
*imm
;
1414 struct sq_hdr_subdesc
*hdr
;
1416 sq
->skbuff
[qentry
] = (u64
)skb
;
1418 hdr
= (struct sq_hdr_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1419 memset(hdr
, 0, SND_QUEUE_DESC_SIZE
);
1420 hdr
->subdesc_type
= SQ_DESC_TYPE_HEADER
;
1421 /* Enable notification via CQE after processing SQE */
1423 /* There is no packet to transmit here */
1425 hdr
->subdesc_cnt
= POST_CQE_DESC_COUNT
- 1;
1427 /* Actual TSO header SQE index, needed for cleanup */
1428 hdr
->rsvd2
= tso_sqe
;
1430 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1431 imm
= (struct sq_imm_subdesc
*)GET_SQ_DESC(sq
, qentry
);
1432 memset(imm
, 0, SND_QUEUE_DESC_SIZE
);
1433 imm
->subdesc_type
= SQ_DESC_TYPE_IMMEDIATE
;
1437 static inline void nicvf_sq_doorbell(struct nicvf
*nic
, struct sk_buff
*skb
,
1438 int sq_num
, int desc_cnt
)
1440 struct netdev_queue
*txq
;
1442 txq
= netdev_get_tx_queue(nic
->pnicvf
->netdev
,
1443 skb_get_queue_mapping(skb
));
1445 netdev_tx_sent_queue(txq
, skb
->len
);
1447 /* make sure all memory stores are done before ringing doorbell */
1450 /* Inform HW to xmit all TSO segments */
1451 nicvf_queue_reg_write(nic
, NIC_QSET_SQ_0_7_DOOR
,
1455 /* Segment a TSO packet into 'gso_size' segments and append
1456 * them to SQ for transfer
1458 static int nicvf_sq_append_tso(struct nicvf
*nic
, struct snd_queue
*sq
,
1459 int sq_num
, int qentry
, struct sk_buff
*skb
)
1462 int seg_subdescs
= 0, desc_cnt
= 0;
1463 int seg_len
, total_len
, data_left
;
1464 int hdr_qentry
= qentry
;
1465 int hdr_len
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
1467 tso_start(skb
, &tso
);
1468 total_len
= skb
->len
- hdr_len
;
1469 while (total_len
> 0) {
1472 /* Save Qentry for adding HDR_SUBDESC at the end */
1473 hdr_qentry
= qentry
;
1475 data_left
= min_t(int, skb_shinfo(skb
)->gso_size
, total_len
);
1476 total_len
-= data_left
;
1478 /* Add segment's header */
1479 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1480 hdr
= sq
->tso_hdrs
+ qentry
* TSO_HEADER_SIZE
;
1481 tso_build_hdr(skb
, hdr
, &tso
, data_left
, total_len
== 0);
1482 nicvf_sq_add_gather_subdesc(sq
, qentry
, hdr_len
,
1484 qentry
* TSO_HEADER_SIZE
);
1485 /* HDR_SUDESC + GATHER */
1489 /* Add segment's payload fragments */
1490 while (data_left
> 0) {
1493 size
= min_t(int, tso
.size
, data_left
);
1495 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1496 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
,
1497 virt_to_phys(tso
.data
));
1502 tso_build_data(skb
, &tso
, size
);
1504 nicvf_sq_add_hdr_subdesc(nic
, sq
, hdr_qentry
,
1505 seg_subdescs
- 1, skb
, seg_len
);
1506 sq
->skbuff
[hdr_qentry
] = (u64
)NULL
;
1507 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1509 desc_cnt
+= seg_subdescs
;
1511 /* Save SKB in the last segment for freeing */
1512 sq
->skbuff
[hdr_qentry
] = (u64
)skb
;
1514 nicvf_sq_doorbell(nic
, skb
, sq_num
, desc_cnt
);
1516 this_cpu_inc(nic
->pnicvf
->drv_stats
->tx_tso
);
1520 /* Append an skb to a SQ for packet transfer. */
1521 int nicvf_sq_append_skb(struct nicvf
*nic
, struct snd_queue
*sq
,
1522 struct sk_buff
*skb
, u8 sq_num
)
1525 int subdesc_cnt
, hdr_sqe
= 0;
1529 subdesc_cnt
= nicvf_sq_subdesc_required(nic
, skb
);
1530 if (subdesc_cnt
> atomic_read(&sq
->free_cnt
))
1533 qentry
= nicvf_get_sq_desc(sq
, subdesc_cnt
);
1535 /* Check if its a TSO packet */
1536 if (skb_shinfo(skb
)->gso_size
&& !nic
->hw_tso
)
1537 return nicvf_sq_append_tso(nic
, sq
, sq_num
, qentry
, skb
);
1539 /* Add SQ header subdesc */
1540 nicvf_sq_add_hdr_subdesc(nic
, sq
, qentry
, subdesc_cnt
- 1,
1544 /* Add SQ gather subdescs */
1545 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1546 size
= skb_is_nonlinear(skb
) ? skb_headlen(skb
) : skb
->len
;
1547 /* HW will ensure data coherency, CPU sync not required */
1548 dma_addr
= dma_map_page_attrs(&nic
->pdev
->dev
, virt_to_page(skb
->data
),
1549 offset_in_page(skb
->data
), size
,
1550 DMA_TO_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
1551 if (dma_mapping_error(&nic
->pdev
->dev
, dma_addr
)) {
1552 nicvf_rollback_sq_desc(sq
, qentry
, subdesc_cnt
);
1556 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
, dma_addr
);
1558 /* Check for scattered buffer */
1559 if (!skb_is_nonlinear(skb
))
1562 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
1563 const struct skb_frag_struct
*frag
;
1565 frag
= &skb_shinfo(skb
)->frags
[i
];
1567 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1568 size
= skb_frag_size(frag
);
1569 dma_addr
= dma_map_page_attrs(&nic
->pdev
->dev
,
1570 skb_frag_page(frag
),
1571 frag
->page_offset
, size
,
1573 DMA_ATTR_SKIP_CPU_SYNC
);
1574 if (dma_mapping_error(&nic
->pdev
->dev
, dma_addr
)) {
1575 /* Free entire chain of mapped buffers
1576 * here 'i' = frags mapped + above mapped skb->data
1578 nicvf_unmap_sndq_buffers(nic
, sq
, hdr_sqe
, i
);
1579 nicvf_rollback_sq_desc(sq
, qentry
, subdesc_cnt
);
1582 nicvf_sq_add_gather_subdesc(sq
, qentry
, size
, dma_addr
);
1586 if (nic
->t88
&& skb_shinfo(skb
)->gso_size
) {
1587 qentry
= nicvf_get_nxt_sqentry(sq
, qentry
);
1588 nicvf_sq_add_cqe_subdesc(sq
, qentry
, hdr_sqe
, skb
);
1591 nicvf_sq_doorbell(nic
, skb
, sq_num
, subdesc_cnt
);
1596 /* Use original PCI dev for debug log */
1598 netdev_dbg(nic
->netdev
, "Not enough SQ descriptors to xmit pkt\n");
1602 static inline unsigned frag_num(unsigned i
)
1605 return (i
& ~3) + 3 - (i
& 3);
1611 static void nicvf_unmap_rcv_buffer(struct nicvf
*nic
, u64 dma_addr
,
1612 u64 buf_addr
, bool xdp
)
1614 struct page
*page
= NULL
;
1615 int len
= RCV_FRAG_LEN
;
1618 page
= virt_to_page(phys_to_virt(buf_addr
));
1619 /* Check if it's a recycled page, if not
1620 * unmap the DMA mapping.
1622 * Recycled page holds an extra reference.
1624 if (page_ref_count(page
) != 1)
1627 len
+= XDP_PACKET_HEADROOM
;
1628 /* Receive buffers in XDP mode are mapped from page start */
1629 dma_addr
&= PAGE_MASK
;
1631 dma_unmap_page_attrs(&nic
->pdev
->dev
, dma_addr
, len
,
1632 DMA_FROM_DEVICE
, DMA_ATTR_SKIP_CPU_SYNC
);
1635 /* Returns SKB for a received packet */
1636 struct sk_buff
*nicvf_get_rcv_skb(struct nicvf
*nic
,
1637 struct cqe_rx_t
*cqe_rx
, bool xdp
)
1640 int payload_len
= 0;
1641 struct sk_buff
*skb
= NULL
;
1644 u16
*rb_lens
= NULL
;
1645 u64
*rb_ptrs
= NULL
;
1648 rb_lens
= (void *)cqe_rx
+ (3 * sizeof(u64
));
1649 /* Except 88xx pass1 on all other chips CQE_RX2_S is added to
1650 * CQE_RX at word6, hence buffer pointers move by word
1652 * Use existing 'hw_tso' flag which will be set for all chips
1653 * except 88xx pass1 instead of a additional cache line
1654 * access (or miss) by using pci dev's revision.
1657 rb_ptrs
= (void *)cqe_rx
+ (6 * sizeof(u64
));
1659 rb_ptrs
= (void *)cqe_rx
+ (7 * sizeof(u64
));
1661 for (frag
= 0; frag
< cqe_rx
->rb_cnt
; frag
++) {
1662 payload_len
= rb_lens
[frag_num(frag
)];
1663 phys_addr
= nicvf_iova_to_phys(nic
, *rb_ptrs
);
1666 dev_kfree_skb_any(skb
);
1671 /* First fragment */
1672 nicvf_unmap_rcv_buffer(nic
,
1673 *rb_ptrs
- cqe_rx
->align_pad
,
1675 skb
= nicvf_rb_ptr_to_skb(nic
,
1676 phys_addr
- cqe_rx
->align_pad
,
1680 skb_reserve(skb
, cqe_rx
->align_pad
);
1681 skb_put(skb
, payload_len
);
1684 nicvf_unmap_rcv_buffer(nic
, *rb_ptrs
, phys_addr
, xdp
);
1685 page
= virt_to_page(phys_to_virt(phys_addr
));
1686 offset
= phys_to_virt(phys_addr
) - page_address(page
);
1687 skb_add_rx_frag(skb
, skb_shinfo(skb
)->nr_frags
, page
,
1688 offset
, payload_len
, RCV_FRAG_LEN
);
1690 /* Next buffer pointer */
1696 static u64
nicvf_int_type_to_mask(int int_type
, int q_idx
)
1702 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_CQ_SHIFT
);
1705 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_SQ_SHIFT
);
1707 case NICVF_INTR_RBDR
:
1708 reg_val
= ((1ULL << q_idx
) << NICVF_INTR_RBDR_SHIFT
);
1710 case NICVF_INTR_PKT_DROP
:
1711 reg_val
= (1ULL << NICVF_INTR_PKT_DROP_SHIFT
);
1713 case NICVF_INTR_TCP_TIMER
:
1714 reg_val
= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT
);
1716 case NICVF_INTR_MBOX
:
1717 reg_val
= (1ULL << NICVF_INTR_MBOX_SHIFT
);
1719 case NICVF_INTR_QS_ERR
:
1720 reg_val
= (1ULL << NICVF_INTR_QS_ERR_SHIFT
);
1729 /* Enable interrupt */
1730 void nicvf_enable_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1732 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1735 netdev_dbg(nic
->netdev
,
1736 "Failed to enable interrupt: unknown type\n");
1739 nicvf_reg_write(nic
, NIC_VF_ENA_W1S
,
1740 nicvf_reg_read(nic
, NIC_VF_ENA_W1S
) | mask
);
1743 /* Disable interrupt */
1744 void nicvf_disable_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1746 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1749 netdev_dbg(nic
->netdev
,
1750 "Failed to disable interrupt: unknown type\n");
1754 nicvf_reg_write(nic
, NIC_VF_ENA_W1C
, mask
);
1757 /* Clear interrupt */
1758 void nicvf_clear_intr(struct nicvf
*nic
, int int_type
, int q_idx
)
1760 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1763 netdev_dbg(nic
->netdev
,
1764 "Failed to clear interrupt: unknown type\n");
1768 nicvf_reg_write(nic
, NIC_VF_INT
, mask
);
1771 /* Check if interrupt is enabled */
1772 int nicvf_is_intr_enabled(struct nicvf
*nic
, int int_type
, int q_idx
)
1774 u64 mask
= nicvf_int_type_to_mask(int_type
, q_idx
);
1775 /* If interrupt type is unknown, we treat it disabled. */
1777 netdev_dbg(nic
->netdev
,
1778 "Failed to check interrupt enable: unknown type\n");
1782 return mask
& nicvf_reg_read(nic
, NIC_VF_ENA_W1S
);
1785 void nicvf_update_rq_stats(struct nicvf
*nic
, int rq_idx
)
1787 struct rcv_queue
*rq
;
1789 #define GET_RQ_STATS(reg) \
1790 nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\
1791 (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1793 rq
= &nic
->qs
->rq
[rq_idx
];
1794 rq
->stats
.bytes
= GET_RQ_STATS(RQ_SQ_STATS_OCTS
);
1795 rq
->stats
.pkts
= GET_RQ_STATS(RQ_SQ_STATS_PKTS
);
1798 void nicvf_update_sq_stats(struct nicvf
*nic
, int sq_idx
)
1800 struct snd_queue
*sq
;
1802 #define GET_SQ_STATS(reg) \
1803 nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\
1804 (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3))
1806 sq
= &nic
->qs
->sq
[sq_idx
];
1807 sq
->stats
.bytes
= GET_SQ_STATS(RQ_SQ_STATS_OCTS
);
1808 sq
->stats
.pkts
= GET_SQ_STATS(RQ_SQ_STATS_PKTS
);
1811 /* Check for errors in the receive cmp.queue entry */
1812 int nicvf_check_cqe_rx_errs(struct nicvf
*nic
, struct cqe_rx_t
*cqe_rx
)
1814 if (netif_msg_rx_err(nic
))
1815 netdev_err(nic
->netdev
,
1816 "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n",
1818 cqe_rx
->err_level
, cqe_rx
->err_opcode
);
1820 switch (cqe_rx
->err_opcode
) {
1821 case CQ_RX_ERROP_RE_PARTIAL
:
1822 this_cpu_inc(nic
->drv_stats
->rx_bgx_truncated_pkts
);
1824 case CQ_RX_ERROP_RE_JABBER
:
1825 this_cpu_inc(nic
->drv_stats
->rx_jabber_errs
);
1827 case CQ_RX_ERROP_RE_FCS
:
1828 this_cpu_inc(nic
->drv_stats
->rx_fcs_errs
);
1830 case CQ_RX_ERROP_RE_RX_CTL
:
1831 this_cpu_inc(nic
->drv_stats
->rx_bgx_errs
);
1833 case CQ_RX_ERROP_PREL2_ERR
:
1834 this_cpu_inc(nic
->drv_stats
->rx_prel2_errs
);
1836 case CQ_RX_ERROP_L2_MAL
:
1837 this_cpu_inc(nic
->drv_stats
->rx_l2_hdr_malformed
);
1839 case CQ_RX_ERROP_L2_OVERSIZE
:
1840 this_cpu_inc(nic
->drv_stats
->rx_oversize
);
1842 case CQ_RX_ERROP_L2_UNDERSIZE
:
1843 this_cpu_inc(nic
->drv_stats
->rx_undersize
);
1845 case CQ_RX_ERROP_L2_LENMISM
:
1846 this_cpu_inc(nic
->drv_stats
->rx_l2_len_mismatch
);
1848 case CQ_RX_ERROP_L2_PCLP
:
1849 this_cpu_inc(nic
->drv_stats
->rx_l2_pclp
);
1851 case CQ_RX_ERROP_IP_NOT
:
1852 this_cpu_inc(nic
->drv_stats
->rx_ip_ver_errs
);
1854 case CQ_RX_ERROP_IP_CSUM_ERR
:
1855 this_cpu_inc(nic
->drv_stats
->rx_ip_csum_errs
);
1857 case CQ_RX_ERROP_IP_MAL
:
1858 this_cpu_inc(nic
->drv_stats
->rx_ip_hdr_malformed
);
1860 case CQ_RX_ERROP_IP_MALD
:
1861 this_cpu_inc(nic
->drv_stats
->rx_ip_payload_malformed
);
1863 case CQ_RX_ERROP_IP_HOP
:
1864 this_cpu_inc(nic
->drv_stats
->rx_ip_ttl_errs
);
1866 case CQ_RX_ERROP_L3_PCLP
:
1867 this_cpu_inc(nic
->drv_stats
->rx_l3_pclp
);
1869 case CQ_RX_ERROP_L4_MAL
:
1870 this_cpu_inc(nic
->drv_stats
->rx_l4_malformed
);
1872 case CQ_RX_ERROP_L4_CHK
:
1873 this_cpu_inc(nic
->drv_stats
->rx_l4_csum_errs
);
1875 case CQ_RX_ERROP_UDP_LEN
:
1876 this_cpu_inc(nic
->drv_stats
->rx_udp_len_errs
);
1878 case CQ_RX_ERROP_L4_PORT
:
1879 this_cpu_inc(nic
->drv_stats
->rx_l4_port_errs
);
1881 case CQ_RX_ERROP_TCP_FLAG
:
1882 this_cpu_inc(nic
->drv_stats
->rx_tcp_flag_errs
);
1884 case CQ_RX_ERROP_TCP_OFFSET
:
1885 this_cpu_inc(nic
->drv_stats
->rx_tcp_offset_errs
);
1887 case CQ_RX_ERROP_L4_PCLP
:
1888 this_cpu_inc(nic
->drv_stats
->rx_l4_pclp
);
1890 case CQ_RX_ERROP_RBDR_TRUNC
:
1891 this_cpu_inc(nic
->drv_stats
->rx_truncated_pkts
);
1898 /* Check for errors in the send cmp.queue entry */
1899 int nicvf_check_cqe_tx_errs(struct nicvf
*nic
, struct cqe_send_t
*cqe_tx
)
1901 switch (cqe_tx
->send_status
) {
1902 case CQ_TX_ERROP_DESC_FAULT
:
1903 this_cpu_inc(nic
->drv_stats
->tx_desc_fault
);
1905 case CQ_TX_ERROP_HDR_CONS_ERR
:
1906 this_cpu_inc(nic
->drv_stats
->tx_hdr_cons_err
);
1908 case CQ_TX_ERROP_SUBDC_ERR
:
1909 this_cpu_inc(nic
->drv_stats
->tx_subdesc_err
);
1911 case CQ_TX_ERROP_MAX_SIZE_VIOL
:
1912 this_cpu_inc(nic
->drv_stats
->tx_max_size_exceeded
);
1914 case CQ_TX_ERROP_IMM_SIZE_OFLOW
:
1915 this_cpu_inc(nic
->drv_stats
->tx_imm_size_oflow
);
1917 case CQ_TX_ERROP_DATA_SEQUENCE_ERR
:
1918 this_cpu_inc(nic
->drv_stats
->tx_data_seq_err
);
1920 case CQ_TX_ERROP_MEM_SEQUENCE_ERR
:
1921 this_cpu_inc(nic
->drv_stats
->tx_mem_seq_err
);
1923 case CQ_TX_ERROP_LOCK_VIOL
:
1924 this_cpu_inc(nic
->drv_stats
->tx_lock_viol
);
1926 case CQ_TX_ERROP_DATA_FAULT
:
1927 this_cpu_inc(nic
->drv_stats
->tx_data_fault
);
1929 case CQ_TX_ERROP_TSTMP_CONFLICT
:
1930 this_cpu_inc(nic
->drv_stats
->tx_tstmp_conflict
);
1932 case CQ_TX_ERROP_TSTMP_TIMEOUT
:
1933 this_cpu_inc(nic
->drv_stats
->tx_tstmp_timeout
);
1935 case CQ_TX_ERROP_MEM_FAULT
:
1936 this_cpu_inc(nic
->drv_stats
->tx_mem_fault
);
1938 case CQ_TX_ERROP_CK_OVERLAP
:
1939 this_cpu_inc(nic
->drv_stats
->tx_csum_overlap
);
1941 case CQ_TX_ERROP_CK_OFLOW
:
1942 this_cpu_inc(nic
->drv_stats
->tx_csum_overflow
);