2 * This file is part of the Chelsio T4 Ethernet driver for Linux.
4 * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35 #include <linux/skbuff.h>
36 #include <linux/netdevice.h>
37 #include <linux/etherdevice.h>
38 #include <linux/if_vlan.h>
40 #include <linux/dma-mapping.h>
41 #include <linux/jiffies.h>
42 #include <linux/prefetch.h>
43 #include <linux/export.h>
46 #include <net/busy_poll.h>
47 #ifdef CONFIG_CHELSIO_T4_FCOE
48 #include <scsi/fc/fc_fcoe.h>
49 #endif /* CONFIG_CHELSIO_T4_FCOE */
52 #include "t4_values.h"
55 #include "cxgb4_ptp.h"
58 * Rx buffer size. We use largish buffers if possible but settle for single
59 * pages under memory shortage.
62 # define FL_PG_ORDER 0
64 # define FL_PG_ORDER (16 - PAGE_SHIFT)
67 /* RX_PULL_LEN should be <= RX_COPY_THRES */
68 #define RX_COPY_THRES 256
69 #define RX_PULL_LEN 128
72 * Main body length for sk_buffs used for Rx Ethernet packets with fragments.
73 * Should be >= RX_PULL_LEN but possibly bigger to give pskb_may_pull some room.
75 #define RX_PKT_SKB_LEN 512
78 * Max number of Tx descriptors we clean up at a time. Should be modest as
79 * freeing skbs isn't cheap and it happens while holding locks. We just need
80 * to free packets faster than they arrive, we eventually catch up and keep
81 * the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES.
83 #define MAX_TX_RECLAIM 16
86 * Max number of Rx buffers we replenish at a time. Again keep this modest,
87 * allocating buffers isn't cheap either.
89 #define MAX_RX_REFILL 16U
92 * Period of the Rx queue check timer. This timer is infrequent as it has
93 * something to do only when the system experiences severe memory shortage.
95 #define RX_QCHECK_PERIOD (HZ / 2)
98 * Period of the Tx queue check timer.
100 #define TX_QCHECK_PERIOD (HZ / 2)
103 * Max number of Tx descriptors to be reclaimed by the Tx timer.
105 #define MAX_TIMER_TX_RECLAIM 100
108 * Timer index used when backing off due to memory shortage.
110 #define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
113 * Suspend an Ethernet Tx queue with fewer available descriptors than this.
114 * This is the same as calc_tx_descs() for a TSO packet with
115 * nr_frags == MAX_SKB_FRAGS.
117 #define ETHTXQ_STOP_THRES \
118 (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
121 * Suspension threshold for non-Ethernet Tx queues. We require enough room
122 * for a full sized WR.
124 #define TXQ_STOP_THRES (SGE_MAX_WR_LEN / sizeof(struct tx_desc))
127 * Max Tx descriptor space we allow for an Ethernet packet to be inlined
130 #define MAX_IMM_TX_PKT_LEN 256
133 * Max size of a WR sent through a control Tx queue.
135 #define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
137 struct tx_sw_desc
{ /* SW state per Tx descriptor */
139 struct ulptx_sgl
*sgl
;
142 struct rx_sw_desc
{ /* SW state per Rx descriptor */
148 * Rx buffer sizes for "useskbs" Free List buffers (one ingress packet pe skb
149 * buffer). We currently only support two sizes for 1500- and 9000-byte MTUs.
150 * We could easily support more but there doesn't seem to be much need for
153 #define FL_MTU_SMALL 1500
154 #define FL_MTU_LARGE 9000
156 static inline unsigned int fl_mtu_bufsize(struct adapter
*adapter
,
159 struct sge
*s
= &adapter
->sge
;
161 return ALIGN(s
->pktshift
+ ETH_HLEN
+ VLAN_HLEN
+ mtu
, s
->fl_align
);
164 #define FL_MTU_SMALL_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_SMALL)
165 #define FL_MTU_LARGE_BUFSIZE(adapter) fl_mtu_bufsize(adapter, FL_MTU_LARGE)
168 * Bits 0..3 of rx_sw_desc.dma_addr have special meaning. The hardware uses
169 * these to specify the buffer size as an index into the SGE Free List Buffer
170 * Size register array. We also use bit 4, when the buffer has been unmapped
171 * for DMA, but this is of course never sent to the hardware and is only used
172 * to prevent double unmappings. All of the above requires that the Free List
173 * Buffers which we allocate have the bottom 5 bits free (0) -- i.e. are
174 * 32-byte or or a power of 2 greater in alignment. Since the SGE's minimal
175 * Free List Buffer alignment is 32 bytes, this works out for us ...
178 RX_BUF_FLAGS
= 0x1f, /* bottom five bits are special */
179 RX_BUF_SIZE
= 0x0f, /* bottom three bits are for buf sizes */
180 RX_UNMAPPED_BUF
= 0x10, /* buffer is not mapped */
183 * XXX We shouldn't depend on being able to use these indices.
184 * XXX Especially when some other Master PF has initialized the
185 * XXX adapter or we use the Firmware Configuration File. We
186 * XXX should really search through the Host Buffer Size register
187 * XXX array for the appropriately sized buffer indices.
189 RX_SMALL_PG_BUF
= 0x0, /* small (PAGE_SIZE) page buffer */
190 RX_LARGE_PG_BUF
= 0x1, /* buffer large (FL_PG_ORDER) page buffer */
192 RX_SMALL_MTU_BUF
= 0x2, /* small MTU buffer */
193 RX_LARGE_MTU_BUF
= 0x3, /* large MTU buffer */
196 static int timer_pkt_quota
[] = {1, 1, 2, 3, 4, 5};
197 #define MIN_NAPI_WORK 1
199 static inline dma_addr_t
get_buf_addr(const struct rx_sw_desc
*d
)
201 return d
->dma_addr
& ~(dma_addr_t
)RX_BUF_FLAGS
;
204 static inline bool is_buf_mapped(const struct rx_sw_desc
*d
)
206 return !(d
->dma_addr
& RX_UNMAPPED_BUF
);
210 * txq_avail - return the number of available slots in a Tx queue
213 * Returns the number of descriptors in a Tx queue available to write new
216 static inline unsigned int txq_avail(const struct sge_txq
*q
)
218 return q
->size
- 1 - q
->in_use
;
222 * fl_cap - return the capacity of a free-buffer list
225 * Returns the capacity of a free-buffer list. The capacity is less than
226 * the size because one descriptor needs to be left unpopulated, otherwise
227 * HW will think the FL is empty.
229 static inline unsigned int fl_cap(const struct sge_fl
*fl
)
231 return fl
->size
- 8; /* 1 descriptor = 8 buffers */
235 * fl_starving - return whether a Free List is starving.
236 * @adapter: pointer to the adapter
239 * Tests specified Free List to see whether the number of buffers
240 * available to the hardware has falled below our "starvation"
243 static inline bool fl_starving(const struct adapter
*adapter
,
244 const struct sge_fl
*fl
)
246 const struct sge
*s
= &adapter
->sge
;
248 return fl
->avail
- fl
->pend_cred
<= s
->fl_starve_thres
;
251 static int map_skb(struct device
*dev
, const struct sk_buff
*skb
,
254 const skb_frag_t
*fp
, *end
;
255 const struct skb_shared_info
*si
;
257 *addr
= dma_map_single(dev
, skb
->data
, skb_headlen(skb
), DMA_TO_DEVICE
);
258 if (dma_mapping_error(dev
, *addr
))
261 si
= skb_shinfo(skb
);
262 end
= &si
->frags
[si
->nr_frags
];
264 for (fp
= si
->frags
; fp
< end
; fp
++) {
265 *++addr
= skb_frag_dma_map(dev
, fp
, 0, skb_frag_size(fp
),
267 if (dma_mapping_error(dev
, *addr
))
273 while (fp
-- > si
->frags
)
274 dma_unmap_page(dev
, *--addr
, skb_frag_size(fp
), DMA_TO_DEVICE
);
276 dma_unmap_single(dev
, addr
[-1], skb_headlen(skb
), DMA_TO_DEVICE
);
281 #ifdef CONFIG_NEED_DMA_MAP_STATE
282 static void unmap_skb(struct device
*dev
, const struct sk_buff
*skb
,
283 const dma_addr_t
*addr
)
285 const skb_frag_t
*fp
, *end
;
286 const struct skb_shared_info
*si
;
288 dma_unmap_single(dev
, *addr
++, skb_headlen(skb
), DMA_TO_DEVICE
);
290 si
= skb_shinfo(skb
);
291 end
= &si
->frags
[si
->nr_frags
];
292 for (fp
= si
->frags
; fp
< end
; fp
++)
293 dma_unmap_page(dev
, *addr
++, skb_frag_size(fp
), DMA_TO_DEVICE
);
297 * deferred_unmap_destructor - unmap a packet when it is freed
300 * This is the packet destructor used for Tx packets that need to remain
301 * mapped until they are freed rather than until their Tx descriptors are
304 static void deferred_unmap_destructor(struct sk_buff
*skb
)
306 unmap_skb(skb
->dev
->dev
.parent
, skb
, (dma_addr_t
*)skb
->head
);
310 static void unmap_sgl(struct device
*dev
, const struct sk_buff
*skb
,
311 const struct ulptx_sgl
*sgl
, const struct sge_txq
*q
)
313 const struct ulptx_sge_pair
*p
;
314 unsigned int nfrags
= skb_shinfo(skb
)->nr_frags
;
316 if (likely(skb_headlen(skb
)))
317 dma_unmap_single(dev
, be64_to_cpu(sgl
->addr0
), ntohl(sgl
->len0
),
320 dma_unmap_page(dev
, be64_to_cpu(sgl
->addr0
), ntohl(sgl
->len0
),
326 * the complexity below is because of the possibility of a wrap-around
327 * in the middle of an SGL
329 for (p
= sgl
->sge
; nfrags
>= 2; nfrags
-= 2) {
330 if (likely((u8
*)(p
+ 1) <= (u8
*)q
->stat
)) {
331 unmap
: dma_unmap_page(dev
, be64_to_cpu(p
->addr
[0]),
332 ntohl(p
->len
[0]), DMA_TO_DEVICE
);
333 dma_unmap_page(dev
, be64_to_cpu(p
->addr
[1]),
334 ntohl(p
->len
[1]), DMA_TO_DEVICE
);
336 } else if ((u8
*)p
== (u8
*)q
->stat
) {
337 p
= (const struct ulptx_sge_pair
*)q
->desc
;
339 } else if ((u8
*)p
+ 8 == (u8
*)q
->stat
) {
340 const __be64
*addr
= (const __be64
*)q
->desc
;
342 dma_unmap_page(dev
, be64_to_cpu(addr
[0]),
343 ntohl(p
->len
[0]), DMA_TO_DEVICE
);
344 dma_unmap_page(dev
, be64_to_cpu(addr
[1]),
345 ntohl(p
->len
[1]), DMA_TO_DEVICE
);
346 p
= (const struct ulptx_sge_pair
*)&addr
[2];
348 const __be64
*addr
= (const __be64
*)q
->desc
;
350 dma_unmap_page(dev
, be64_to_cpu(p
->addr
[0]),
351 ntohl(p
->len
[0]), DMA_TO_DEVICE
);
352 dma_unmap_page(dev
, be64_to_cpu(addr
[0]),
353 ntohl(p
->len
[1]), DMA_TO_DEVICE
);
354 p
= (const struct ulptx_sge_pair
*)&addr
[1];
360 if ((u8
*)p
== (u8
*)q
->stat
)
361 p
= (const struct ulptx_sge_pair
*)q
->desc
;
362 addr
= (u8
*)p
+ 16 <= (u8
*)q
->stat
? p
->addr
[0] :
363 *(const __be64
*)q
->desc
;
364 dma_unmap_page(dev
, be64_to_cpu(addr
), ntohl(p
->len
[0]),
370 * free_tx_desc - reclaims Tx descriptors and their buffers
371 * @adapter: the adapter
372 * @q: the Tx queue to reclaim descriptors from
373 * @n: the number of descriptors to reclaim
374 * @unmap: whether the buffers should be unmapped for DMA
376 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
377 * Tx buffers. Called with the Tx queue lock held.
379 void free_tx_desc(struct adapter
*adap
, struct sge_txq
*q
,
380 unsigned int n
, bool unmap
)
382 struct tx_sw_desc
*d
;
383 unsigned int cidx
= q
->cidx
;
384 struct device
*dev
= adap
->pdev_dev
;
388 if (d
->skb
) { /* an SGL is present */
390 unmap_sgl(dev
, d
->skb
, d
->sgl
, q
);
391 dev_consume_skb_any(d
->skb
);
395 if (++cidx
== q
->size
) {
404 * Return the number of reclaimable descriptors in a Tx queue.
406 static inline int reclaimable(const struct sge_txq
*q
)
408 int hw_cidx
= ntohs(ACCESS_ONCE(q
->stat
->cidx
));
410 return hw_cidx
< 0 ? hw_cidx
+ q
->size
: hw_cidx
;
414 * reclaim_completed_tx - reclaims completed Tx descriptors
416 * @q: the Tx queue to reclaim completed descriptors from
417 * @unmap: whether the buffers should be unmapped for DMA
419 * Reclaims Tx descriptors that the SGE has indicated it has processed,
420 * and frees the associated buffers if possible. Called with the Tx
423 static inline void reclaim_completed_tx(struct adapter
*adap
, struct sge_txq
*q
,
426 int avail
= reclaimable(q
);
430 * Limit the amount of clean up work we do at a time to keep
431 * the Tx lock hold time O(1).
433 if (avail
> MAX_TX_RECLAIM
)
434 avail
= MAX_TX_RECLAIM
;
436 free_tx_desc(adap
, q
, avail
, unmap
);
441 static inline int get_buf_size(struct adapter
*adapter
,
442 const struct rx_sw_desc
*d
)
444 struct sge
*s
= &adapter
->sge
;
445 unsigned int rx_buf_size_idx
= d
->dma_addr
& RX_BUF_SIZE
;
448 switch (rx_buf_size_idx
) {
449 case RX_SMALL_PG_BUF
:
450 buf_size
= PAGE_SIZE
;
453 case RX_LARGE_PG_BUF
:
454 buf_size
= PAGE_SIZE
<< s
->fl_pg_order
;
457 case RX_SMALL_MTU_BUF
:
458 buf_size
= FL_MTU_SMALL_BUFSIZE(adapter
);
461 case RX_LARGE_MTU_BUF
:
462 buf_size
= FL_MTU_LARGE_BUFSIZE(adapter
);
473 * free_rx_bufs - free the Rx buffers on an SGE free list
475 * @q: the SGE free list to free buffers from
476 * @n: how many buffers to free
478 * Release the next @n buffers on an SGE free-buffer Rx queue. The
479 * buffers must be made inaccessible to HW before calling this function.
481 static void free_rx_bufs(struct adapter
*adap
, struct sge_fl
*q
, int n
)
484 struct rx_sw_desc
*d
= &q
->sdesc
[q
->cidx
];
486 if (is_buf_mapped(d
))
487 dma_unmap_page(adap
->pdev_dev
, get_buf_addr(d
),
488 get_buf_size(adap
, d
),
492 if (++q
->cidx
== q
->size
)
499 * unmap_rx_buf - unmap the current Rx buffer on an SGE free list
501 * @q: the SGE free list
503 * Unmap the current buffer on an SGE free-buffer Rx queue. The
504 * buffer must be made inaccessible to HW before calling this function.
506 * This is similar to @free_rx_bufs above but does not free the buffer.
507 * Do note that the FL still loses any further access to the buffer.
509 static void unmap_rx_buf(struct adapter
*adap
, struct sge_fl
*q
)
511 struct rx_sw_desc
*d
= &q
->sdesc
[q
->cidx
];
513 if (is_buf_mapped(d
))
514 dma_unmap_page(adap
->pdev_dev
, get_buf_addr(d
),
515 get_buf_size(adap
, d
), PCI_DMA_FROMDEVICE
);
517 if (++q
->cidx
== q
->size
)
522 static inline void ring_fl_db(struct adapter
*adap
, struct sge_fl
*q
)
524 if (q
->pend_cred
>= 8) {
525 u32 val
= adap
->params
.arch
.sge_fl_db
;
527 if (is_t4(adap
->params
.chip
))
528 val
|= PIDX_V(q
->pend_cred
/ 8);
530 val
|= PIDX_T5_V(q
->pend_cred
/ 8);
532 /* Make sure all memory writes to the Free List queue are
533 * committed before we tell the hardware about them.
537 /* If we don't have access to the new User Doorbell (T5+), use
538 * the old doorbell mechanism; otherwise use the new BAR2
541 if (unlikely(q
->bar2_addr
== NULL
)) {
542 t4_write_reg(adap
, MYPF_REG(SGE_PF_KDOORBELL_A
),
543 val
| QID_V(q
->cntxt_id
));
545 writel(val
| QID_V(q
->bar2_qid
),
546 q
->bar2_addr
+ SGE_UDB_KDOORBELL
);
548 /* This Write memory Barrier will force the write to
549 * the User Doorbell area to be flushed.
557 static inline void set_rx_sw_desc(struct rx_sw_desc
*sd
, struct page
*pg
,
561 sd
->dma_addr
= mapping
; /* includes size low bits */
565 * refill_fl - refill an SGE Rx buffer ring
567 * @q: the ring to refill
568 * @n: the number of new buffers to allocate
569 * @gfp: the gfp flags for the allocations
571 * (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
572 * allocated with the supplied gfp flags. The caller must assure that
573 * @n does not exceed the queue's capacity. If afterwards the queue is
574 * found critically low mark it as starving in the bitmap of starving FLs.
576 * Returns the number of buffers allocated.
578 static unsigned int refill_fl(struct adapter
*adap
, struct sge_fl
*q
, int n
,
581 struct sge
*s
= &adap
->sge
;
584 unsigned int cred
= q
->avail
;
585 __be64
*d
= &q
->desc
[q
->pidx
];
586 struct rx_sw_desc
*sd
= &q
->sdesc
[q
->pidx
];
589 #ifdef CONFIG_DEBUG_FS
590 if (test_bit(q
->cntxt_id
- adap
->sge
.egr_start
, adap
->sge
.blocked_fl
))
595 node
= dev_to_node(adap
->pdev_dev
);
597 if (s
->fl_pg_order
== 0)
598 goto alloc_small_pages
;
601 * Prefer large buffers
604 pg
= alloc_pages_node(node
, gfp
| __GFP_COMP
, s
->fl_pg_order
);
606 q
->large_alloc_failed
++;
607 break; /* fall back to single pages */
610 mapping
= dma_map_page(adap
->pdev_dev
, pg
, 0,
611 PAGE_SIZE
<< s
->fl_pg_order
,
613 if (unlikely(dma_mapping_error(adap
->pdev_dev
, mapping
))) {
614 __free_pages(pg
, s
->fl_pg_order
);
616 goto out
; /* do not try small pages for this error */
618 mapping
|= RX_LARGE_PG_BUF
;
619 *d
++ = cpu_to_be64(mapping
);
621 set_rx_sw_desc(sd
, pg
, mapping
);
625 if (++q
->pidx
== q
->size
) {
635 pg
= alloc_pages_node(node
, gfp
, 0);
641 mapping
= dma_map_page(adap
->pdev_dev
, pg
, 0, PAGE_SIZE
,
643 if (unlikely(dma_mapping_error(adap
->pdev_dev
, mapping
))) {
648 *d
++ = cpu_to_be64(mapping
);
650 set_rx_sw_desc(sd
, pg
, mapping
);
654 if (++q
->pidx
== q
->size
) {
661 out
: cred
= q
->avail
- cred
;
662 q
->pend_cred
+= cred
;
665 if (unlikely(fl_starving(adap
, q
))) {
668 set_bit(q
->cntxt_id
- adap
->sge
.egr_start
,
669 adap
->sge
.starving_fl
);
675 static inline void __refill_fl(struct adapter
*adap
, struct sge_fl
*fl
)
677 refill_fl(adap
, fl
, min(MAX_RX_REFILL
, fl_cap(fl
) - fl
->avail
),
682 * alloc_ring - allocate resources for an SGE descriptor ring
683 * @dev: the PCI device's core device
684 * @nelem: the number of descriptors
685 * @elem_size: the size of each descriptor
686 * @sw_size: the size of the SW state associated with each ring element
687 * @phys: the physical address of the allocated ring
688 * @metadata: address of the array holding the SW state for the ring
689 * @stat_size: extra space in HW ring for status information
690 * @node: preferred node for memory allocations
692 * Allocates resources for an SGE descriptor ring, such as Tx queues,
693 * free buffer lists, or response queues. Each SGE ring requires
694 * space for its HW descriptors plus, optionally, space for the SW state
695 * associated with each HW entry (the metadata). The function returns
696 * three values: the virtual address for the HW ring (the return value
697 * of the function), the bus address of the HW ring, and the address
700 static void *alloc_ring(struct device
*dev
, size_t nelem
, size_t elem_size
,
701 size_t sw_size
, dma_addr_t
*phys
, void *metadata
,
702 size_t stat_size
, int node
)
704 size_t len
= nelem
* elem_size
+ stat_size
;
706 void *p
= dma_alloc_coherent(dev
, len
, phys
, GFP_KERNEL
);
711 s
= kzalloc_node(nelem
* sw_size
, GFP_KERNEL
, node
);
714 dma_free_coherent(dev
, len
, p
, *phys
);
719 *(void **)metadata
= s
;
725 * sgl_len - calculates the size of an SGL of the given capacity
726 * @n: the number of SGL entries
728 * Calculates the number of flits needed for a scatter/gather list that
729 * can hold the given number of entries.
731 static inline unsigned int sgl_len(unsigned int n
)
733 /* A Direct Scatter Gather List uses 32-bit lengths and 64-bit PCI DMA
734 * addresses. The DSGL Work Request starts off with a 32-bit DSGL
735 * ULPTX header, then Length0, then Address0, then, for 1 <= i <= N,
736 * repeated sequences of { Length[i], Length[i+1], Address[i],
737 * Address[i+1] } (this ensures that all addresses are on 64-bit
738 * boundaries). If N is even, then Length[N+1] should be set to 0 and
739 * Address[N+1] is omitted.
741 * The following calculation incorporates all of the above. It's
742 * somewhat hard to follow but, briefly: the "+2" accounts for the
743 * first two flits which include the DSGL header, Length0 and
744 * Address0; the "(3*(n-1))/2" covers the main body of list entries (3
745 * flits for every pair of the remaining N) +1 if (n-1) is odd; and
746 * finally the "+((n-1)&1)" adds the one remaining flit needed if
750 return (3 * n
) / 2 + (n
& 1) + 2;
754 * flits_to_desc - returns the num of Tx descriptors for the given flits
755 * @n: the number of flits
757 * Returns the number of Tx descriptors needed for the supplied number
760 static inline unsigned int flits_to_desc(unsigned int n
)
762 BUG_ON(n
> SGE_MAX_WR_LEN
/ 8);
763 return DIV_ROUND_UP(n
, 8);
767 * is_eth_imm - can an Ethernet packet be sent as immediate data?
770 * Returns whether an Ethernet packet is small enough to fit as
771 * immediate data. Return value corresponds to headroom required.
773 static inline int is_eth_imm(const struct sk_buff
*skb
)
775 int hdrlen
= skb_shinfo(skb
)->gso_size
?
776 sizeof(struct cpl_tx_pkt_lso_core
) : 0;
778 hdrlen
+= sizeof(struct cpl_tx_pkt
);
779 if (skb
->len
<= MAX_IMM_TX_PKT_LEN
- hdrlen
)
785 * calc_tx_flits - calculate the number of flits for a packet Tx WR
788 * Returns the number of flits needed for a Tx WR for the given Ethernet
789 * packet, including the needed WR and CPL headers.
791 static inline unsigned int calc_tx_flits(const struct sk_buff
*skb
)
794 int hdrlen
= is_eth_imm(skb
);
796 /* If the skb is small enough, we can pump it out as a work request
797 * with only immediate data. In that case we just have to have the
798 * TX Packet header plus the skb data in the Work Request.
802 return DIV_ROUND_UP(skb
->len
+ hdrlen
, sizeof(__be64
));
804 /* Otherwise, we're going to have to construct a Scatter gather list
805 * of the skb body and fragments. We also include the flits necessary
806 * for the TX Packet Work Request and CPL. We always have a firmware
807 * Write Header (incorporated as part of the cpl_tx_pkt_lso and
808 * cpl_tx_pkt structures), followed by either a TX Packet Write CPL
809 * message or, if we're doing a Large Send Offload, an LSO CPL message
810 * with an embedded TX Packet Write CPL message.
812 flits
= sgl_len(skb_shinfo(skb
)->nr_frags
+ 1);
813 if (skb_shinfo(skb
)->gso_size
)
814 flits
+= (sizeof(struct fw_eth_tx_pkt_wr
) +
815 sizeof(struct cpl_tx_pkt_lso_core
) +
816 sizeof(struct cpl_tx_pkt_core
)) / sizeof(__be64
);
818 flits
+= (sizeof(struct fw_eth_tx_pkt_wr
) +
819 sizeof(struct cpl_tx_pkt_core
)) / sizeof(__be64
);
824 * calc_tx_descs - calculate the number of Tx descriptors for a packet
827 * Returns the number of Tx descriptors needed for the given Ethernet
828 * packet, including the needed WR and CPL headers.
830 static inline unsigned int calc_tx_descs(const struct sk_buff
*skb
)
832 return flits_to_desc(calc_tx_flits(skb
));
836 * write_sgl - populate a scatter/gather list for a packet
838 * @q: the Tx queue we are writing into
839 * @sgl: starting location for writing the SGL
840 * @end: points right after the end of the SGL
841 * @start: start offset into skb main-body data to include in the SGL
842 * @addr: the list of bus addresses for the SGL elements
844 * Generates a gather list for the buffers that make up a packet.
845 * The caller must provide adequate space for the SGL that will be written.
846 * The SGL includes all of the packet's page fragments and the data in its
847 * main body except for the first @start bytes. @sgl must be 16-byte
848 * aligned and within a Tx descriptor with available space. @end points
849 * right after the end of the SGL but does not account for any potential
850 * wrap around, i.e., @end > @sgl.
852 static void write_sgl(const struct sk_buff
*skb
, struct sge_txq
*q
,
853 struct ulptx_sgl
*sgl
, u64
*end
, unsigned int start
,
854 const dma_addr_t
*addr
)
857 struct ulptx_sge_pair
*to
;
858 const struct skb_shared_info
*si
= skb_shinfo(skb
);
859 unsigned int nfrags
= si
->nr_frags
;
860 struct ulptx_sge_pair buf
[MAX_SKB_FRAGS
/ 2 + 1];
862 len
= skb_headlen(skb
) - start
;
864 sgl
->len0
= htonl(len
);
865 sgl
->addr0
= cpu_to_be64(addr
[0] + start
);
868 sgl
->len0
= htonl(skb_frag_size(&si
->frags
[0]));
869 sgl
->addr0
= cpu_to_be64(addr
[1]);
872 sgl
->cmd_nsge
= htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL
) |
873 ULPTX_NSGE_V(nfrags
));
874 if (likely(--nfrags
== 0))
877 * Most of the complexity below deals with the possibility we hit the
878 * end of the queue in the middle of writing the SGL. For this case
879 * only we create the SGL in a temporary buffer and then copy it.
881 to
= (u8
*)end
> (u8
*)q
->stat
? buf
: sgl
->sge
;
883 for (i
= (nfrags
!= si
->nr_frags
); nfrags
>= 2; nfrags
-= 2, to
++) {
884 to
->len
[0] = cpu_to_be32(skb_frag_size(&si
->frags
[i
]));
885 to
->len
[1] = cpu_to_be32(skb_frag_size(&si
->frags
[++i
]));
886 to
->addr
[0] = cpu_to_be64(addr
[i
]);
887 to
->addr
[1] = cpu_to_be64(addr
[++i
]);
890 to
->len
[0] = cpu_to_be32(skb_frag_size(&si
->frags
[i
]));
891 to
->len
[1] = cpu_to_be32(0);
892 to
->addr
[0] = cpu_to_be64(addr
[i
+ 1]);
894 if (unlikely((u8
*)end
> (u8
*)q
->stat
)) {
895 unsigned int part0
= (u8
*)q
->stat
- (u8
*)sgl
->sge
, part1
;
898 memcpy(sgl
->sge
, buf
, part0
);
899 part1
= (u8
*)end
- (u8
*)q
->stat
;
900 memcpy(q
->desc
, (u8
*)buf
+ part0
, part1
);
901 end
= (void *)q
->desc
+ part1
;
903 if ((uintptr_t)end
& 8) /* 0-pad to multiple of 16 */
907 /* This function copies 64 byte coalesced work request to
908 * memory mapped BAR2 space. For coalesced WR SGE fetches
909 * data from the FIFO instead of from Host.
911 static void cxgb_pio_copy(u64 __iomem
*dst
, u64
*src
)
924 * ring_tx_db - check and potentially ring a Tx queue's doorbell
927 * @n: number of new descriptors to give to HW
929 * Ring the doorbel for a Tx queue.
931 static inline void ring_tx_db(struct adapter
*adap
, struct sge_txq
*q
, int n
)
933 /* Make sure that all writes to the TX Descriptors are committed
934 * before we tell the hardware about them.
938 /* If we don't have access to the new User Doorbell (T5+), use the old
939 * doorbell mechanism; otherwise use the new BAR2 mechanism.
941 if (unlikely(q
->bar2_addr
== NULL
)) {
945 /* For T4 we need to participate in the Doorbell Recovery
948 spin_lock_irqsave(&q
->db_lock
, flags
);
950 t4_write_reg(adap
, MYPF_REG(SGE_PF_KDOORBELL_A
),
951 QID_V(q
->cntxt_id
) | val
);
954 q
->db_pidx
= q
->pidx
;
955 spin_unlock_irqrestore(&q
->db_lock
, flags
);
957 u32 val
= PIDX_T5_V(n
);
959 /* T4 and later chips share the same PIDX field offset within
960 * the doorbell, but T5 and later shrank the field in order to
961 * gain a bit for Doorbell Priority. The field was absurdly
962 * large in the first place (14 bits) so we just use the T5
963 * and later limits and warn if a Queue ID is too large.
965 WARN_ON(val
& DBPRIO_F
);
967 /* If we're only writing a single TX Descriptor and we can use
968 * Inferred QID registers, we can use the Write Combining
969 * Gather Buffer; otherwise we use the simple doorbell.
971 if (n
== 1 && q
->bar2_qid
== 0) {
975 u64
*wr
= (u64
*)&q
->desc
[index
];
977 cxgb_pio_copy((u64 __iomem
*)
978 (q
->bar2_addr
+ SGE_UDB_WCDOORBELL
),
981 writel(val
| QID_V(q
->bar2_qid
),
982 q
->bar2_addr
+ SGE_UDB_KDOORBELL
);
985 /* This Write Memory Barrier will force the write to the User
986 * Doorbell area to be flushed. This is needed to prevent
987 * writes on different CPUs for the same queue from hitting
988 * the adapter out of order. This is required when some Work
989 * Requests take the Write Combine Gather Buffer path (user
990 * doorbell area offset [SGE_UDB_WCDOORBELL..+63]) and some
991 * take the traditional path where we simply increment the
992 * PIDX (User Doorbell area SGE_UDB_KDOORBELL) and have the
993 * hardware DMA read the actual Work Request.
1000 * inline_tx_skb - inline a packet's data into Tx descriptors
1002 * @q: the Tx queue where the packet will be inlined
1003 * @pos: starting position in the Tx queue where to inline the packet
1005 * Inline a packet's contents directly into Tx descriptors, starting at
1006 * the given position within the Tx DMA ring.
1007 * Most of the complexity of this operation is dealing with wrap arounds
1008 * in the middle of the packet we want to inline.
1010 static void inline_tx_skb(const struct sk_buff
*skb
, const struct sge_txq
*q
,
1014 int left
= (void *)q
->stat
- pos
;
1016 if (likely(skb
->len
<= left
)) {
1017 if (likely(!skb
->data_len
))
1018 skb_copy_from_linear_data(skb
, pos
, skb
->len
);
1020 skb_copy_bits(skb
, 0, pos
, skb
->len
);
1023 skb_copy_bits(skb
, 0, pos
, left
);
1024 skb_copy_bits(skb
, left
, q
->desc
, skb
->len
- left
);
1025 pos
= (void *)q
->desc
+ (skb
->len
- left
);
1028 /* 0-pad to multiple of 16 */
1029 p
= PTR_ALIGN(pos
, 8);
1030 if ((uintptr_t)p
& 8)
1034 static void *inline_tx_skb_header(const struct sk_buff
*skb
,
1035 const struct sge_txq
*q
, void *pos
,
1039 int left
= (void *)q
->stat
- pos
;
1041 if (likely(length
<= left
)) {
1042 memcpy(pos
, skb
->data
, length
);
1045 memcpy(pos
, skb
->data
, left
);
1046 memcpy(q
->desc
, skb
->data
+ left
, length
- left
);
1047 pos
= (void *)q
->desc
+ (length
- left
);
1049 /* 0-pad to multiple of 16 */
1050 p
= PTR_ALIGN(pos
, 8);
1051 if ((uintptr_t)p
& 8) {
1059 * Figure out what HW csum a packet wants and return the appropriate control
1062 static u64
hwcsum(enum chip_type chip
, const struct sk_buff
*skb
)
1065 const struct iphdr
*iph
= ip_hdr(skb
);
1067 if (iph
->version
== 4) {
1068 if (iph
->protocol
== IPPROTO_TCP
)
1069 csum_type
= TX_CSUM_TCPIP
;
1070 else if (iph
->protocol
== IPPROTO_UDP
)
1071 csum_type
= TX_CSUM_UDPIP
;
1074 * unknown protocol, disable HW csum
1075 * and hope a bad packet is detected
1077 return TXPKT_L4CSUM_DIS_F
;
1081 * this doesn't work with extension headers
1083 const struct ipv6hdr
*ip6h
= (const struct ipv6hdr
*)iph
;
1085 if (ip6h
->nexthdr
== IPPROTO_TCP
)
1086 csum_type
= TX_CSUM_TCPIP6
;
1087 else if (ip6h
->nexthdr
== IPPROTO_UDP
)
1088 csum_type
= TX_CSUM_UDPIP6
;
1093 if (likely(csum_type
>= TX_CSUM_TCPIP
)) {
1094 u64 hdr_len
= TXPKT_IPHDR_LEN_V(skb_network_header_len(skb
));
1095 int eth_hdr_len
= skb_network_offset(skb
) - ETH_HLEN
;
1097 if (CHELSIO_CHIP_VERSION(chip
) <= CHELSIO_T5
)
1098 hdr_len
|= TXPKT_ETHHDR_LEN_V(eth_hdr_len
);
1100 hdr_len
|= T6_TXPKT_ETHHDR_LEN_V(eth_hdr_len
);
1101 return TXPKT_CSUM_TYPE_V(csum_type
) | hdr_len
;
1103 int start
= skb_transport_offset(skb
);
1105 return TXPKT_CSUM_TYPE_V(csum_type
) |
1106 TXPKT_CSUM_START_V(start
) |
1107 TXPKT_CSUM_LOC_V(start
+ skb
->csum_offset
);
1111 static void eth_txq_stop(struct sge_eth_txq
*q
)
1113 netif_tx_stop_queue(q
->txq
);
1117 static inline void txq_advance(struct sge_txq
*q
, unsigned int n
)
1121 if (q
->pidx
>= q
->size
)
1125 #ifdef CONFIG_CHELSIO_T4_FCOE
1127 cxgb_fcoe_offload(struct sk_buff
*skb
, struct adapter
*adap
,
1128 const struct port_info
*pi
, u64
*cntrl
)
1130 const struct cxgb_fcoe
*fcoe
= &pi
->fcoe
;
1132 if (!(fcoe
->flags
& CXGB_FCOE_ENABLED
))
1135 if (skb
->protocol
!= htons(ETH_P_FCOE
))
1138 skb_reset_mac_header(skb
);
1139 skb
->mac_len
= sizeof(struct ethhdr
);
1141 skb_set_network_header(skb
, skb
->mac_len
);
1142 skb_set_transport_header(skb
, skb
->mac_len
+ sizeof(struct fcoe_hdr
));
1144 if (!cxgb_fcoe_sof_eof_supported(adap
, skb
))
1147 /* FC CRC offload */
1148 *cntrl
= TXPKT_CSUM_TYPE_V(TX_CSUM_FCOE
) |
1149 TXPKT_L4CSUM_DIS_F
| TXPKT_IPCSUM_DIS_F
|
1150 TXPKT_CSUM_START_V(CXGB_FCOE_TXPKT_CSUM_START
) |
1151 TXPKT_CSUM_END_V(CXGB_FCOE_TXPKT_CSUM_END
) |
1152 TXPKT_CSUM_LOC_V(CXGB_FCOE_TXPKT_CSUM_END
);
1155 #endif /* CONFIG_CHELSIO_T4_FCOE */
1158 * t4_eth_xmit - add a packet to an Ethernet Tx queue
1160 * @dev: the egress net device
1162 * Add a packet to an SGE Ethernet Tx queue. Runs with softirqs disabled.
1164 netdev_tx_t
t4_eth_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
1166 u32 wr_mid
, ctrl0
, op
;
1169 unsigned int flits
, ndesc
;
1170 struct adapter
*adap
;
1171 struct sge_eth_txq
*q
;
1172 const struct port_info
*pi
;
1173 struct fw_eth_tx_pkt_wr
*wr
;
1174 struct cpl_tx_pkt_core
*cpl
;
1175 const struct skb_shared_info
*ssi
;
1176 dma_addr_t addr
[MAX_SKB_FRAGS
+ 1];
1177 bool immediate
= false;
1178 int len
, max_pkt_len
;
1179 bool ptp_enabled
= is_ptp_enabled(skb
, dev
);
1180 #ifdef CONFIG_CHELSIO_T4_FCOE
1182 #endif /* CONFIG_CHELSIO_T4_FCOE */
1185 * The chip min packet length is 10 octets but play safe and reject
1186 * anything shorter than an Ethernet header.
1188 if (unlikely(skb
->len
< ETH_HLEN
)) {
1189 out_free
: dev_kfree_skb_any(skb
);
1190 return NETDEV_TX_OK
;
1193 /* Discard the packet if the length is greater than mtu */
1194 max_pkt_len
= ETH_HLEN
+ dev
->mtu
;
1195 if (skb_vlan_tagged(skb
))
1196 max_pkt_len
+= VLAN_HLEN
;
1197 if (!skb_shinfo(skb
)->gso_size
&& (unlikely(skb
->len
> max_pkt_len
)))
1200 pi
= netdev_priv(dev
);
1202 qidx
= skb_get_queue_mapping(skb
);
1204 spin_lock(&adap
->ptp_lock
);
1205 if (!(adap
->ptp_tx_skb
)) {
1206 skb_shinfo(skb
)->tx_flags
|= SKBTX_IN_PROGRESS
;
1207 adap
->ptp_tx_skb
= skb_get(skb
);
1209 spin_unlock(&adap
->ptp_lock
);
1212 q
= &adap
->sge
.ptptxq
;
1214 q
= &adap
->sge
.ethtxq
[qidx
+ pi
->first_qset
];
1216 skb_tx_timestamp(skb
);
1218 reclaim_completed_tx(adap
, &q
->q
, true);
1219 cntrl
= TXPKT_L4CSUM_DIS_F
| TXPKT_IPCSUM_DIS_F
;
1221 #ifdef CONFIG_CHELSIO_T4_FCOE
1222 err
= cxgb_fcoe_offload(skb
, adap
, pi
, &cntrl
);
1223 if (unlikely(err
== -ENOTSUPP
)) {
1225 spin_unlock(&adap
->ptp_lock
);
1228 #endif /* CONFIG_CHELSIO_T4_FCOE */
1230 flits
= calc_tx_flits(skb
);
1231 ndesc
= flits_to_desc(flits
);
1232 credits
= txq_avail(&q
->q
) - ndesc
;
1234 if (unlikely(credits
< 0)) {
1236 dev_err(adap
->pdev_dev
,
1237 "%s: Tx ring %u full while queue awake!\n",
1240 spin_unlock(&adap
->ptp_lock
);
1241 return NETDEV_TX_BUSY
;
1244 if (is_eth_imm(skb
))
1248 unlikely(map_skb(adap
->pdev_dev
, skb
, addr
) < 0)) {
1251 spin_unlock(&adap
->ptp_lock
);
1255 wr_mid
= FW_WR_LEN16_V(DIV_ROUND_UP(flits
, 2));
1256 if (unlikely(credits
< ETHTXQ_STOP_THRES
)) {
1258 wr_mid
|= FW_WR_EQUEQ_F
| FW_WR_EQUIQ_F
;
1261 wr
= (void *)&q
->q
.desc
[q
->q
.pidx
];
1262 wr
->equiq_to_len16
= htonl(wr_mid
);
1263 wr
->r3
= cpu_to_be64(0);
1264 end
= (u64
*)wr
+ flits
;
1266 len
= immediate
? skb
->len
: 0;
1267 ssi
= skb_shinfo(skb
);
1268 if (ssi
->gso_size
) {
1269 struct cpl_tx_pkt_lso
*lso
= (void *)wr
;
1270 bool v6
= (ssi
->gso_type
& SKB_GSO_TCPV6
) != 0;
1271 int l3hdr_len
= skb_network_header_len(skb
);
1272 int eth_xtra_len
= skb_network_offset(skb
) - ETH_HLEN
;
1274 len
+= sizeof(*lso
);
1275 wr
->op_immdlen
= htonl(FW_WR_OP_V(FW_ETH_TX_PKT_WR
) |
1276 FW_WR_IMMDLEN_V(len
));
1277 lso
->c
.lso_ctrl
= htonl(LSO_OPCODE_V(CPL_TX_PKT_LSO
) |
1278 LSO_FIRST_SLICE_F
| LSO_LAST_SLICE_F
|
1280 LSO_ETHHDR_LEN_V(eth_xtra_len
/ 4) |
1281 LSO_IPHDR_LEN_V(l3hdr_len
/ 4) |
1282 LSO_TCPHDR_LEN_V(tcp_hdr(skb
)->doff
));
1283 lso
->c
.ipid_ofst
= htons(0);
1284 lso
->c
.mss
= htons(ssi
->gso_size
);
1285 lso
->c
.seqno_offset
= htonl(0);
1286 if (is_t4(adap
->params
.chip
))
1287 lso
->c
.len
= htonl(skb
->len
);
1289 lso
->c
.len
= htonl(LSO_T5_XFER_SIZE_V(skb
->len
));
1290 cpl
= (void *)(lso
+ 1);
1292 if (CHELSIO_CHIP_VERSION(adap
->params
.chip
) <= CHELSIO_T5
)
1293 cntrl
= TXPKT_ETHHDR_LEN_V(eth_xtra_len
);
1295 cntrl
= T6_TXPKT_ETHHDR_LEN_V(eth_xtra_len
);
1297 cntrl
|= TXPKT_CSUM_TYPE_V(v6
?
1298 TX_CSUM_TCPIP6
: TX_CSUM_TCPIP
) |
1299 TXPKT_IPHDR_LEN_V(l3hdr_len
);
1301 q
->tx_cso
+= ssi
->gso_segs
;
1303 len
+= sizeof(*cpl
);
1305 op
= FW_PTP_TX_PKT_WR
;
1307 op
= FW_ETH_TX_PKT_WR
;
1308 wr
->op_immdlen
= htonl(FW_WR_OP_V(op
) |
1309 FW_WR_IMMDLEN_V(len
));
1310 cpl
= (void *)(wr
+ 1);
1311 if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
1312 cntrl
= hwcsum(adap
->params
.chip
, skb
) |
1318 if (skb_vlan_tag_present(skb
)) {
1320 cntrl
|= TXPKT_VLAN_VLD_F
| TXPKT_VLAN_V(skb_vlan_tag_get(skb
));
1321 #ifdef CONFIG_CHELSIO_T4_FCOE
1322 if (skb
->protocol
== htons(ETH_P_FCOE
))
1323 cntrl
|= TXPKT_VLAN_V(
1324 ((skb
->priority
& 0x7) << VLAN_PRIO_SHIFT
));
1325 #endif /* CONFIG_CHELSIO_T4_FCOE */
1328 ctrl0
= TXPKT_OPCODE_V(CPL_TX_PKT_XT
) | TXPKT_INTF_V(pi
->tx_chan
) |
1329 TXPKT_PF_V(adap
->pf
);
1331 ctrl0
|= TXPKT_TSTAMP_F
;
1332 #ifdef CONFIG_CHELSIO_T4_DCB
1333 if (is_t4(adap
->params
.chip
))
1334 ctrl0
|= TXPKT_OVLAN_IDX_V(q
->dcb_prio
);
1336 ctrl0
|= TXPKT_T5_OVLAN_IDX_V(q
->dcb_prio
);
1338 cpl
->ctrl0
= htonl(ctrl0
);
1339 cpl
->pack
= htons(0);
1340 cpl
->len
= htons(skb
->len
);
1341 cpl
->ctrl1
= cpu_to_be64(cntrl
);
1344 inline_tx_skb(skb
, &q
->q
, cpl
+ 1);
1345 dev_consume_skb_any(skb
);
1349 write_sgl(skb
, &q
->q
, (struct ulptx_sgl
*)(cpl
+ 1), end
, 0,
1353 last_desc
= q
->q
.pidx
+ ndesc
- 1;
1354 if (last_desc
>= q
->q
.size
)
1355 last_desc
-= q
->q
.size
;
1356 q
->q
.sdesc
[last_desc
].skb
= skb
;
1357 q
->q
.sdesc
[last_desc
].sgl
= (struct ulptx_sgl
*)(cpl
+ 1);
1360 txq_advance(&q
->q
, ndesc
);
1362 ring_tx_db(adap
, &q
->q
, ndesc
);
1364 spin_unlock(&adap
->ptp_lock
);
1365 return NETDEV_TX_OK
;
1369 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1370 * @q: the SGE control Tx queue
1372 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1373 * that send only immediate data (presently just the control queues) and
1374 * thus do not have any sk_buffs to release.
1376 static inline void reclaim_completed_tx_imm(struct sge_txq
*q
)
1378 int hw_cidx
= ntohs(ACCESS_ONCE(q
->stat
->cidx
));
1379 int reclaim
= hw_cidx
- q
->cidx
;
1384 q
->in_use
-= reclaim
;
1389 * is_imm - check whether a packet can be sent as immediate data
1392 * Returns true if a packet can be sent as a WR with immediate data.
1394 static inline int is_imm(const struct sk_buff
*skb
)
1396 return skb
->len
<= MAX_CTRL_WR_LEN
;
1400 * ctrlq_check_stop - check if a control queue is full and should stop
1402 * @wr: most recent WR written to the queue
1404 * Check if a control queue has become full and should be stopped.
1405 * We clean up control queue descriptors very lazily, only when we are out.
1406 * If the queue is still full after reclaiming any completed descriptors
1407 * we suspend it and have the last WR wake it up.
1409 static void ctrlq_check_stop(struct sge_ctrl_txq
*q
, struct fw_wr_hdr
*wr
)
1411 reclaim_completed_tx_imm(&q
->q
);
1412 if (unlikely(txq_avail(&q
->q
) < TXQ_STOP_THRES
)) {
1413 wr
->lo
|= htonl(FW_WR_EQUEQ_F
| FW_WR_EQUIQ_F
);
1420 * ctrl_xmit - send a packet through an SGE control Tx queue
1421 * @q: the control queue
1424 * Send a packet through an SGE control Tx queue. Packets sent through
1425 * a control queue must fit entirely as immediate data.
1427 static int ctrl_xmit(struct sge_ctrl_txq
*q
, struct sk_buff
*skb
)
1430 struct fw_wr_hdr
*wr
;
1432 if (unlikely(!is_imm(skb
))) {
1435 return NET_XMIT_DROP
;
1438 ndesc
= DIV_ROUND_UP(skb
->len
, sizeof(struct tx_desc
));
1439 spin_lock(&q
->sendq
.lock
);
1441 if (unlikely(q
->full
)) {
1442 skb
->priority
= ndesc
; /* save for restart */
1443 __skb_queue_tail(&q
->sendq
, skb
);
1444 spin_unlock(&q
->sendq
.lock
);
1448 wr
= (struct fw_wr_hdr
*)&q
->q
.desc
[q
->q
.pidx
];
1449 inline_tx_skb(skb
, &q
->q
, wr
);
1451 txq_advance(&q
->q
, ndesc
);
1452 if (unlikely(txq_avail(&q
->q
) < TXQ_STOP_THRES
))
1453 ctrlq_check_stop(q
, wr
);
1455 ring_tx_db(q
->adap
, &q
->q
, ndesc
);
1456 spin_unlock(&q
->sendq
.lock
);
1459 return NET_XMIT_SUCCESS
;
1463 * restart_ctrlq - restart a suspended control queue
1464 * @data: the control queue to restart
1466 * Resumes transmission on a suspended Tx control queue.
1468 static void restart_ctrlq(unsigned long data
)
1470 struct sk_buff
*skb
;
1471 unsigned int written
= 0;
1472 struct sge_ctrl_txq
*q
= (struct sge_ctrl_txq
*)data
;
1474 spin_lock(&q
->sendq
.lock
);
1475 reclaim_completed_tx_imm(&q
->q
);
1476 BUG_ON(txq_avail(&q
->q
) < TXQ_STOP_THRES
); /* q should be empty */
1478 while ((skb
= __skb_dequeue(&q
->sendq
)) != NULL
) {
1479 struct fw_wr_hdr
*wr
;
1480 unsigned int ndesc
= skb
->priority
; /* previously saved */
1483 /* Write descriptors and free skbs outside the lock to limit
1484 * wait times. q->full is still set so new skbs will be queued.
1486 wr
= (struct fw_wr_hdr
*)&q
->q
.desc
[q
->q
.pidx
];
1487 txq_advance(&q
->q
, ndesc
);
1488 spin_unlock(&q
->sendq
.lock
);
1490 inline_tx_skb(skb
, &q
->q
, wr
);
1493 if (unlikely(txq_avail(&q
->q
) < TXQ_STOP_THRES
)) {
1494 unsigned long old
= q
->q
.stops
;
1496 ctrlq_check_stop(q
, wr
);
1497 if (q
->q
.stops
!= old
) { /* suspended anew */
1498 spin_lock(&q
->sendq
.lock
);
1503 ring_tx_db(q
->adap
, &q
->q
, written
);
1506 spin_lock(&q
->sendq
.lock
);
1509 ringdb
: if (written
)
1510 ring_tx_db(q
->adap
, &q
->q
, written
);
1511 spin_unlock(&q
->sendq
.lock
);
1515 * t4_mgmt_tx - send a management message
1516 * @adap: the adapter
1517 * @skb: the packet containing the management message
1519 * Send a management message through control queue 0.
1521 int t4_mgmt_tx(struct adapter
*adap
, struct sk_buff
*skb
)
1526 ret
= ctrl_xmit(&adap
->sge
.ctrlq
[0], skb
);
1532 * is_ofld_imm - check whether a packet can be sent as immediate data
1535 * Returns true if a packet can be sent as an offload WR with immediate
1536 * data. We currently use the same limit as for Ethernet packets.
1538 static inline int is_ofld_imm(const struct sk_buff
*skb
)
1540 return skb
->len
<= MAX_IMM_TX_PKT_LEN
;
1544 * calc_tx_flits_ofld - calculate # of flits for an offload packet
1547 * Returns the number of flits needed for the given offload packet.
1548 * These packets are already fully constructed and no additional headers
1551 static inline unsigned int calc_tx_flits_ofld(const struct sk_buff
*skb
)
1553 unsigned int flits
, cnt
;
1555 if (is_ofld_imm(skb
))
1556 return DIV_ROUND_UP(skb
->len
, 8);
1558 flits
= skb_transport_offset(skb
) / 8U; /* headers */
1559 cnt
= skb_shinfo(skb
)->nr_frags
;
1560 if (skb_tail_pointer(skb
) != skb_transport_header(skb
))
1562 return flits
+ sgl_len(cnt
);
1566 * txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
1567 * @adap: the adapter
1568 * @q: the queue to stop
1570 * Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
1571 * inability to map packets. A periodic timer attempts to restart
1574 static void txq_stop_maperr(struct sge_uld_txq
*q
)
1578 set_bit(q
->q
.cntxt_id
- q
->adap
->sge
.egr_start
,
1579 q
->adap
->sge
.txq_maperr
);
1583 * ofldtxq_stop - stop an offload Tx queue that has become full
1584 * @q: the queue to stop
1585 * @skb: the packet causing the queue to become full
1587 * Stops an offload Tx queue that has become full and modifies the packet
1588 * being written to request a wakeup.
1590 static void ofldtxq_stop(struct sge_uld_txq
*q
, struct sk_buff
*skb
)
1592 struct fw_wr_hdr
*wr
= (struct fw_wr_hdr
*)skb
->data
;
1594 wr
->lo
|= htonl(FW_WR_EQUEQ_F
| FW_WR_EQUIQ_F
);
1600 * service_ofldq - service/restart a suspended offload queue
1601 * @q: the offload queue
1603 * Services an offload Tx queue by moving packets from its Pending Send
1604 * Queue to the Hardware TX ring. The function starts and ends with the
1605 * Send Queue locked, but drops the lock while putting the skb at the
1606 * head of the Send Queue onto the Hardware TX Ring. Dropping the lock
1607 * allows more skbs to be added to the Send Queue by other threads.
1608 * The packet being processed at the head of the Pending Send Queue is
1609 * left on the queue in case we experience DMA Mapping errors, etc.
1610 * and need to give up and restart later.
1612 * service_ofldq() can be thought of as a task which opportunistically
1613 * uses other threads execution contexts. We use the Offload Queue
1614 * boolean "service_ofldq_running" to make sure that only one instance
1615 * is ever running at a time ...
1617 static void service_ofldq(struct sge_uld_txq
*q
)
1619 u64
*pos
, *before
, *end
;
1621 struct sk_buff
*skb
;
1622 struct sge_txq
*txq
;
1624 unsigned int written
= 0;
1625 unsigned int flits
, ndesc
;
1627 /* If another thread is currently in service_ofldq() processing the
1628 * Pending Send Queue then there's nothing to do. Otherwise, flag
1629 * that we're doing the work and continue. Examining/modifying
1630 * the Offload Queue boolean "service_ofldq_running" must be done
1631 * while holding the Pending Send Queue Lock.
1633 if (q
->service_ofldq_running
)
1635 q
->service_ofldq_running
= true;
1637 while ((skb
= skb_peek(&q
->sendq
)) != NULL
&& !q
->full
) {
1638 /* We drop the lock while we're working with the skb at the
1639 * head of the Pending Send Queue. This allows more skbs to
1640 * be added to the Pending Send Queue while we're working on
1641 * this one. We don't need to lock to guard the TX Ring
1642 * updates because only one thread of execution is ever
1643 * allowed into service_ofldq() at a time.
1645 spin_unlock(&q
->sendq
.lock
);
1647 reclaim_completed_tx(q
->adap
, &q
->q
, false);
1649 flits
= skb
->priority
; /* previously saved */
1650 ndesc
= flits_to_desc(flits
);
1651 credits
= txq_avail(&q
->q
) - ndesc
;
1652 BUG_ON(credits
< 0);
1653 if (unlikely(credits
< TXQ_STOP_THRES
))
1654 ofldtxq_stop(q
, skb
);
1656 pos
= (u64
*)&q
->q
.desc
[q
->q
.pidx
];
1657 if (is_ofld_imm(skb
))
1658 inline_tx_skb(skb
, &q
->q
, pos
);
1659 else if (map_skb(q
->adap
->pdev_dev
, skb
,
1660 (dma_addr_t
*)skb
->head
)) {
1662 spin_lock(&q
->sendq
.lock
);
1665 int last_desc
, hdr_len
= skb_transport_offset(skb
);
1667 /* The WR headers may not fit within one descriptor.
1668 * So we need to deal with wrap-around here.
1670 before
= (u64
*)pos
;
1671 end
= (u64
*)pos
+ flits
;
1673 pos
= (void *)inline_tx_skb_header(skb
, &q
->q
,
1676 if (before
> (u64
*)pos
) {
1677 left
= (u8
*)end
- (u8
*)txq
->stat
;
1678 end
= (void *)txq
->desc
+ left
;
1681 /* If current position is already at the end of the
1682 * ofld queue, reset the current to point to
1683 * start of the queue and update the end ptr as well.
1685 if (pos
== (u64
*)txq
->stat
) {
1686 left
= (u8
*)end
- (u8
*)txq
->stat
;
1687 end
= (void *)txq
->desc
+ left
;
1688 pos
= (void *)txq
->desc
;
1691 write_sgl(skb
, &q
->q
, (void *)pos
,
1693 (dma_addr_t
*)skb
->head
);
1694 #ifdef CONFIG_NEED_DMA_MAP_STATE
1695 skb
->dev
= q
->adap
->port
[0];
1696 skb
->destructor
= deferred_unmap_destructor
;
1698 last_desc
= q
->q
.pidx
+ ndesc
- 1;
1699 if (last_desc
>= q
->q
.size
)
1700 last_desc
-= q
->q
.size
;
1701 q
->q
.sdesc
[last_desc
].skb
= skb
;
1704 txq_advance(&q
->q
, ndesc
);
1706 if (unlikely(written
> 32)) {
1707 ring_tx_db(q
->adap
, &q
->q
, written
);
1711 /* Reacquire the Pending Send Queue Lock so we can unlink the
1712 * skb we've just successfully transferred to the TX Ring and
1713 * loop for the next skb which may be at the head of the
1714 * Pending Send Queue.
1716 spin_lock(&q
->sendq
.lock
);
1717 __skb_unlink(skb
, &q
->sendq
);
1718 if (is_ofld_imm(skb
))
1721 if (likely(written
))
1722 ring_tx_db(q
->adap
, &q
->q
, written
);
1724 /*Indicate that no thread is processing the Pending Send Queue
1727 q
->service_ofldq_running
= false;
1731 * ofld_xmit - send a packet through an offload queue
1732 * @q: the Tx offload queue
1735 * Send an offload packet through an SGE offload queue.
1737 static int ofld_xmit(struct sge_uld_txq
*q
, struct sk_buff
*skb
)
1739 skb
->priority
= calc_tx_flits_ofld(skb
); /* save for restart */
1740 spin_lock(&q
->sendq
.lock
);
1742 /* Queue the new skb onto the Offload Queue's Pending Send Queue. If
1743 * that results in this new skb being the only one on the queue, start
1744 * servicing it. If there are other skbs already on the list, then
1745 * either the queue is currently being processed or it's been stopped
1746 * for some reason and it'll be restarted at a later time. Restart
1747 * paths are triggered by events like experiencing a DMA Mapping Error
1748 * or filling the Hardware TX Ring.
1750 __skb_queue_tail(&q
->sendq
, skb
);
1751 if (q
->sendq
.qlen
== 1)
1754 spin_unlock(&q
->sendq
.lock
);
1755 return NET_XMIT_SUCCESS
;
1759 * restart_ofldq - restart a suspended offload queue
1760 * @data: the offload queue to restart
1762 * Resumes transmission on a suspended Tx offload queue.
1764 static void restart_ofldq(unsigned long data
)
1766 struct sge_uld_txq
*q
= (struct sge_uld_txq
*)data
;
1768 spin_lock(&q
->sendq
.lock
);
1769 q
->full
= 0; /* the queue actually is completely empty now */
1771 spin_unlock(&q
->sendq
.lock
);
1775 * skb_txq - return the Tx queue an offload packet should use
1778 * Returns the Tx queue an offload packet should use as indicated by bits
1779 * 1-15 in the packet's queue_mapping.
1781 static inline unsigned int skb_txq(const struct sk_buff
*skb
)
1783 return skb
->queue_mapping
>> 1;
1787 * is_ctrl_pkt - return whether an offload packet is a control packet
1790 * Returns whether an offload packet should use an OFLD or a CTRL
1791 * Tx queue as indicated by bit 0 in the packet's queue_mapping.
1793 static inline unsigned int is_ctrl_pkt(const struct sk_buff
*skb
)
1795 return skb
->queue_mapping
& 1;
1798 static inline int uld_send(struct adapter
*adap
, struct sk_buff
*skb
,
1799 unsigned int tx_uld_type
)
1801 struct sge_uld_txq_info
*txq_info
;
1802 struct sge_uld_txq
*txq
;
1803 unsigned int idx
= skb_txq(skb
);
1805 if (unlikely(is_ctrl_pkt(skb
))) {
1806 /* Single ctrl queue is a requirement for LE workaround path */
1807 if (adap
->tids
.nsftids
)
1809 return ctrl_xmit(&adap
->sge
.ctrlq
[idx
], skb
);
1812 txq_info
= adap
->sge
.uld_txq_info
[tx_uld_type
];
1813 if (unlikely(!txq_info
)) {
1815 return NET_XMIT_DROP
;
1818 txq
= &txq_info
->uldtxq
[idx
];
1819 return ofld_xmit(txq
, skb
);
1823 * t4_ofld_send - send an offload packet
1824 * @adap: the adapter
1827 * Sends an offload packet. We use the packet queue_mapping to select the
1828 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1829 * should be sent as regular or control, bits 1-15 select the queue.
1831 int t4_ofld_send(struct adapter
*adap
, struct sk_buff
*skb
)
1836 ret
= uld_send(adap
, skb
, CXGB4_TX_OFLD
);
1842 * cxgb4_ofld_send - send an offload packet
1843 * @dev: the net device
1846 * Sends an offload packet. This is an exported version of @t4_ofld_send,
1847 * intended for ULDs.
1849 int cxgb4_ofld_send(struct net_device
*dev
, struct sk_buff
*skb
)
1851 return t4_ofld_send(netdev2adap(dev
), skb
);
1853 EXPORT_SYMBOL(cxgb4_ofld_send
);
1856 * t4_crypto_send - send crypto packet
1857 * @adap: the adapter
1860 * Sends crypto packet. We use the packet queue_mapping to select the
1861 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1862 * should be sent as regular or control, bits 1-15 select the queue.
1864 static int t4_crypto_send(struct adapter
*adap
, struct sk_buff
*skb
)
1869 ret
= uld_send(adap
, skb
, CXGB4_TX_CRYPTO
);
1875 * cxgb4_crypto_send - send crypto packet
1876 * @dev: the net device
1879 * Sends crypto packet. This is an exported version of @t4_crypto_send,
1880 * intended for ULDs.
1882 int cxgb4_crypto_send(struct net_device
*dev
, struct sk_buff
*skb
)
1884 return t4_crypto_send(netdev2adap(dev
), skb
);
1886 EXPORT_SYMBOL(cxgb4_crypto_send
);
1888 static inline void copy_frags(struct sk_buff
*skb
,
1889 const struct pkt_gl
*gl
, unsigned int offset
)
1893 /* usually there's just one frag */
1894 __skb_fill_page_desc(skb
, 0, gl
->frags
[0].page
,
1895 gl
->frags
[0].offset
+ offset
,
1896 gl
->frags
[0].size
- offset
);
1897 skb_shinfo(skb
)->nr_frags
= gl
->nfrags
;
1898 for (i
= 1; i
< gl
->nfrags
; i
++)
1899 __skb_fill_page_desc(skb
, i
, gl
->frags
[i
].page
,
1900 gl
->frags
[i
].offset
,
1903 /* get a reference to the last page, we don't own it */
1904 get_page(gl
->frags
[gl
->nfrags
- 1].page
);
1908 * cxgb4_pktgl_to_skb - build an sk_buff from a packet gather list
1909 * @gl: the gather list
1910 * @skb_len: size of sk_buff main body if it carries fragments
1911 * @pull_len: amount of data to move to the sk_buff's main body
1913 * Builds an sk_buff from the given packet gather list. Returns the
1914 * sk_buff or %NULL if sk_buff allocation failed.
1916 struct sk_buff
*cxgb4_pktgl_to_skb(const struct pkt_gl
*gl
,
1917 unsigned int skb_len
, unsigned int pull_len
)
1919 struct sk_buff
*skb
;
1922 * Below we rely on RX_COPY_THRES being less than the smallest Rx buffer
1923 * size, which is expected since buffers are at least PAGE_SIZEd.
1924 * In this case packets up to RX_COPY_THRES have only one fragment.
1926 if (gl
->tot_len
<= RX_COPY_THRES
) {
1927 skb
= dev_alloc_skb(gl
->tot_len
);
1930 __skb_put(skb
, gl
->tot_len
);
1931 skb_copy_to_linear_data(skb
, gl
->va
, gl
->tot_len
);
1933 skb
= dev_alloc_skb(skb_len
);
1936 __skb_put(skb
, pull_len
);
1937 skb_copy_to_linear_data(skb
, gl
->va
, pull_len
);
1939 copy_frags(skb
, gl
, pull_len
);
1940 skb
->len
= gl
->tot_len
;
1941 skb
->data_len
= skb
->len
- pull_len
;
1942 skb
->truesize
+= skb
->data_len
;
1946 EXPORT_SYMBOL(cxgb4_pktgl_to_skb
);
1949 * t4_pktgl_free - free a packet gather list
1950 * @gl: the gather list
1952 * Releases the pages of a packet gather list. We do not own the last
1953 * page on the list and do not free it.
1955 static void t4_pktgl_free(const struct pkt_gl
*gl
)
1958 const struct page_frag
*p
;
1960 for (p
= gl
->frags
, n
= gl
->nfrags
- 1; n
--; p
++)
1965 * Process an MPS trace packet. Give it an unused protocol number so it won't
1966 * be delivered to anyone and send it to the stack for capture.
1968 static noinline
int handle_trace_pkt(struct adapter
*adap
,
1969 const struct pkt_gl
*gl
)
1971 struct sk_buff
*skb
;
1973 skb
= cxgb4_pktgl_to_skb(gl
, RX_PULL_LEN
, RX_PULL_LEN
);
1974 if (unlikely(!skb
)) {
1979 if (is_t4(adap
->params
.chip
))
1980 __skb_pull(skb
, sizeof(struct cpl_trace_pkt
));
1982 __skb_pull(skb
, sizeof(struct cpl_t5_trace_pkt
));
1984 skb_reset_mac_header(skb
);
1985 skb
->protocol
= htons(0xffff);
1986 skb
->dev
= adap
->port
[0];
1987 netif_receive_skb(skb
);
1992 * cxgb4_sgetim_to_hwtstamp - convert sge time stamp to hw time stamp
1993 * @adap: the adapter
1994 * @hwtstamps: time stamp structure to update
1995 * @sgetstamp: 60bit iqe timestamp
1997 * Every ingress queue entry has the 60-bit timestamp, convert that timestamp
1998 * which is in Core Clock ticks into ktime_t and assign it
2000 static void cxgb4_sgetim_to_hwtstamp(struct adapter
*adap
,
2001 struct skb_shared_hwtstamps
*hwtstamps
,
2005 u64 tmp
= (sgetstamp
* 1000 * 1000 + adap
->params
.vpd
.cclk
/ 2);
2007 ns
= div_u64(tmp
, adap
->params
.vpd
.cclk
);
2009 memset(hwtstamps
, 0, sizeof(*hwtstamps
));
2010 hwtstamps
->hwtstamp
= ns_to_ktime(ns
);
2013 static void do_gro(struct sge_eth_rxq
*rxq
, const struct pkt_gl
*gl
,
2014 const struct cpl_rx_pkt
*pkt
)
2016 struct adapter
*adapter
= rxq
->rspq
.adap
;
2017 struct sge
*s
= &adapter
->sge
;
2018 struct port_info
*pi
;
2020 struct sk_buff
*skb
;
2022 skb
= napi_get_frags(&rxq
->rspq
.napi
);
2023 if (unlikely(!skb
)) {
2025 rxq
->stats
.rx_drops
++;
2029 copy_frags(skb
, gl
, s
->pktshift
);
2030 skb
->len
= gl
->tot_len
- s
->pktshift
;
2031 skb
->data_len
= skb
->len
;
2032 skb
->truesize
+= skb
->data_len
;
2033 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
2034 skb_record_rx_queue(skb
, rxq
->rspq
.idx
);
2035 pi
= netdev_priv(skb
->dev
);
2037 cxgb4_sgetim_to_hwtstamp(adapter
, skb_hwtstamps(skb
),
2039 if (rxq
->rspq
.netdev
->features
& NETIF_F_RXHASH
)
2040 skb_set_hash(skb
, (__force u32
)pkt
->rsshdr
.hash_val
,
2043 if (unlikely(pkt
->vlan_ex
)) {
2044 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), ntohs(pkt
->vlan
));
2045 rxq
->stats
.vlan_ex
++;
2047 ret
= napi_gro_frags(&rxq
->rspq
.napi
);
2048 if (ret
== GRO_HELD
)
2049 rxq
->stats
.lro_pkts
++;
2050 else if (ret
== GRO_MERGED
|| ret
== GRO_MERGED_FREE
)
2051 rxq
->stats
.lro_merged
++;
2053 rxq
->stats
.rx_cso
++;
2063 * t4_systim_to_hwstamp - read hardware time stamp
2064 * @adap: the adapter
2067 * Read Time Stamp from MPS packet and insert in skb which
2068 * is forwarded to PTP application
2070 static noinline
int t4_systim_to_hwstamp(struct adapter
*adapter
,
2071 struct sk_buff
*skb
)
2073 struct skb_shared_hwtstamps
*hwtstamps
;
2074 struct cpl_rx_mps_pkt
*cpl
= NULL
;
2075 unsigned char *data
;
2078 cpl
= (struct cpl_rx_mps_pkt
*)skb
->data
;
2079 if (!(CPL_RX_MPS_PKT_TYPE_G(ntohl(cpl
->op_to_r1_hi
)) &
2080 X_CPL_RX_MPS_PKT_TYPE_PTP
))
2081 return RX_PTP_PKT_ERR
;
2083 data
= skb
->data
+ sizeof(*cpl
);
2084 skb_pull(skb
, 2 * sizeof(u64
) + sizeof(struct cpl_rx_mps_pkt
));
2085 offset
= ETH_HLEN
+ IPV4_HLEN(skb
->data
) + UDP_HLEN
;
2086 if (skb
->len
< offset
+ OFF_PTP_SEQUENCE_ID
+ sizeof(short))
2087 return RX_PTP_PKT_ERR
;
2089 hwtstamps
= skb_hwtstamps(skb
);
2090 memset(hwtstamps
, 0, sizeof(*hwtstamps
));
2091 hwtstamps
->hwtstamp
= ns_to_ktime(be64_to_cpu(*((u64
*)data
)));
2093 return RX_PTP_PKT_SUC
;
2097 * t4_rx_hststamp - Recv PTP Event Message
2098 * @adap: the adapter
2099 * @rsp: the response queue descriptor holding the RX_PKT message
2102 * PTP enabled and MPS packet, read HW timestamp
2104 static int t4_rx_hststamp(struct adapter
*adapter
, const __be64
*rsp
,
2105 struct sge_eth_rxq
*rxq
, struct sk_buff
*skb
)
2109 if (unlikely((*(u8
*)rsp
== CPL_RX_MPS_PKT
) &&
2110 !is_t4(adapter
->params
.chip
))) {
2111 ret
= t4_systim_to_hwstamp(adapter
, skb
);
2112 if (ret
== RX_PTP_PKT_ERR
) {
2114 rxq
->stats
.rx_drops
++;
2118 return RX_NON_PTP_PKT
;
2122 * t4_tx_hststamp - Loopback PTP Transmit Event Message
2123 * @adap: the adapter
2125 * @dev: the ingress net device
2127 * Read hardware timestamp for the loopback PTP Tx event message
2129 static int t4_tx_hststamp(struct adapter
*adapter
, struct sk_buff
*skb
,
2130 struct net_device
*dev
)
2132 struct port_info
*pi
= netdev_priv(dev
);
2134 if (!is_t4(adapter
->params
.chip
) && adapter
->ptp_tx_skb
) {
2135 cxgb4_ptp_read_hwstamp(adapter
, pi
);
2143 * t4_ethrx_handler - process an ingress ethernet packet
2144 * @q: the response queue that received the packet
2145 * @rsp: the response queue descriptor holding the RX_PKT message
2146 * @si: the gather list of packet fragments
2148 * Process an ingress ethernet packet and deliver it to the stack.
2150 int t4_ethrx_handler(struct sge_rspq
*q
, const __be64
*rsp
,
2151 const struct pkt_gl
*si
)
2154 struct sk_buff
*skb
;
2155 const struct cpl_rx_pkt
*pkt
;
2156 struct sge_eth_rxq
*rxq
= container_of(q
, struct sge_eth_rxq
, rspq
);
2157 struct adapter
*adapter
= q
->adap
;
2158 struct sge
*s
= &q
->adap
->sge
;
2159 int cpl_trace_pkt
= is_t4(q
->adap
->params
.chip
) ?
2160 CPL_TRACE_PKT
: CPL_TRACE_PKT_T5
;
2162 struct port_info
*pi
;
2165 if (unlikely(*(u8
*)rsp
== cpl_trace_pkt
))
2166 return handle_trace_pkt(q
->adap
, si
);
2168 pkt
= (const struct cpl_rx_pkt
*)rsp
;
2169 /* Compressed error vector is enabled for T6 only */
2170 if (q
->adap
->params
.tp
.rx_pkt_encap
)
2171 err_vec
= T6_COMPR_RXERR_VEC_G(be16_to_cpu(pkt
->err_vec
));
2173 err_vec
= be16_to_cpu(pkt
->err_vec
);
2175 csum_ok
= pkt
->csum_calc
&& !err_vec
&&
2176 (q
->netdev
->features
& NETIF_F_RXCSUM
);
2177 if ((pkt
->l2info
& htonl(RXF_TCP_F
)) &&
2178 (q
->netdev
->features
& NETIF_F_GRO
) && csum_ok
&& !pkt
->ip_frag
) {
2179 do_gro(rxq
, si
, pkt
);
2183 skb
= cxgb4_pktgl_to_skb(si
, RX_PKT_SKB_LEN
, RX_PULL_LEN
);
2184 if (unlikely(!skb
)) {
2186 rxq
->stats
.rx_drops
++;
2189 pi
= netdev_priv(q
->netdev
);
2191 /* Handle PTP Event Rx packet */
2192 if (unlikely(pi
->ptp_enable
)) {
2193 ret
= t4_rx_hststamp(adapter
, rsp
, rxq
, skb
);
2194 if (ret
== RX_PTP_PKT_ERR
)
2198 __skb_pull(skb
, s
->pktshift
); /* remove ethernet header pad */
2200 /* Handle the PTP Event Tx Loopback packet */
2201 if (unlikely(pi
->ptp_enable
&& !ret
&&
2202 (pkt
->l2info
& htonl(RXF_UDP_F
)) &&
2203 cxgb4_ptp_is_ptp_rx(skb
))) {
2204 if (!t4_tx_hststamp(adapter
, skb
, q
->netdev
))
2208 skb
->protocol
= eth_type_trans(skb
, q
->netdev
);
2209 skb_record_rx_queue(skb
, q
->idx
);
2210 if (skb
->dev
->features
& NETIF_F_RXHASH
)
2211 skb_set_hash(skb
, (__force u32
)pkt
->rsshdr
.hash_val
,
2217 cxgb4_sgetim_to_hwtstamp(q
->adap
, skb_hwtstamps(skb
),
2219 if (csum_ok
&& (pkt
->l2info
& htonl(RXF_UDP_F
| RXF_TCP_F
))) {
2220 if (!pkt
->ip_frag
) {
2221 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
2222 rxq
->stats
.rx_cso
++;
2223 } else if (pkt
->l2info
& htonl(RXF_IP_F
)) {
2224 __sum16 c
= (__force __sum16
)pkt
->csum
;
2225 skb
->csum
= csum_unfold(c
);
2226 skb
->ip_summed
= CHECKSUM_COMPLETE
;
2227 rxq
->stats
.rx_cso
++;
2230 skb_checksum_none_assert(skb
);
2231 #ifdef CONFIG_CHELSIO_T4_FCOE
2232 #define CPL_RX_PKT_FLAGS (RXF_PSH_F | RXF_SYN_F | RXF_UDP_F | \
2233 RXF_TCP_F | RXF_IP_F | RXF_IP6_F | RXF_LRO_F)
2235 if (!(pkt
->l2info
& cpu_to_be32(CPL_RX_PKT_FLAGS
))) {
2236 if ((pkt
->l2info
& cpu_to_be32(RXF_FCOE_F
)) &&
2237 (pi
->fcoe
.flags
& CXGB_FCOE_ENABLED
)) {
2238 if (q
->adap
->params
.tp
.rx_pkt_encap
)
2240 T6_COMPR_RXERR_SUM_F
;
2242 csum_ok
= err_vec
& RXERR_CSUM_F
;
2244 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
2248 #undef CPL_RX_PKT_FLAGS
2249 #endif /* CONFIG_CHELSIO_T4_FCOE */
2252 if (unlikely(pkt
->vlan_ex
)) {
2253 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), ntohs(pkt
->vlan
));
2254 rxq
->stats
.vlan_ex
++;
2256 skb_mark_napi_id(skb
, &q
->napi
);
2257 netif_receive_skb(skb
);
2262 * restore_rx_bufs - put back a packet's Rx buffers
2263 * @si: the packet gather list
2264 * @q: the SGE free list
2265 * @frags: number of FL buffers to restore
2267 * Puts back on an FL the Rx buffers associated with @si. The buffers
2268 * have already been unmapped and are left unmapped, we mark them so to
2269 * prevent further unmapping attempts.
2271 * This function undoes a series of @unmap_rx_buf calls when we find out
2272 * that the current packet can't be processed right away afterall and we
2273 * need to come back to it later. This is a very rare event and there's
2274 * no effort to make this particularly efficient.
2276 static void restore_rx_bufs(const struct pkt_gl
*si
, struct sge_fl
*q
,
2279 struct rx_sw_desc
*d
;
2283 q
->cidx
= q
->size
- 1;
2286 d
= &q
->sdesc
[q
->cidx
];
2287 d
->page
= si
->frags
[frags
].page
;
2288 d
->dma_addr
|= RX_UNMAPPED_BUF
;
2294 * is_new_response - check if a response is newly written
2295 * @r: the response descriptor
2296 * @q: the response queue
2298 * Returns true if a response descriptor contains a yet unprocessed
2301 static inline bool is_new_response(const struct rsp_ctrl
*r
,
2302 const struct sge_rspq
*q
)
2304 return (r
->type_gen
>> RSPD_GEN_S
) == q
->gen
;
2308 * rspq_next - advance to the next entry in a response queue
2311 * Updates the state of a response queue to advance it to the next entry.
2313 static inline void rspq_next(struct sge_rspq
*q
)
2315 q
->cur_desc
= (void *)q
->cur_desc
+ q
->iqe_len
;
2316 if (unlikely(++q
->cidx
== q
->size
)) {
2319 q
->cur_desc
= q
->desc
;
2324 * process_responses - process responses from an SGE response queue
2325 * @q: the ingress queue to process
2326 * @budget: how many responses can be processed in this round
2328 * Process responses from an SGE response queue up to the supplied budget.
2329 * Responses include received packets as well as control messages from FW
2332 * Additionally choose the interrupt holdoff time for the next interrupt
2333 * on this queue. If the system is under memory shortage use a fairly
2334 * long delay to help recovery.
2336 static int process_responses(struct sge_rspq
*q
, int budget
)
2339 int budget_left
= budget
;
2340 const struct rsp_ctrl
*rc
;
2341 struct sge_eth_rxq
*rxq
= container_of(q
, struct sge_eth_rxq
, rspq
);
2342 struct adapter
*adapter
= q
->adap
;
2343 struct sge
*s
= &adapter
->sge
;
2345 while (likely(budget_left
)) {
2346 rc
= (void *)q
->cur_desc
+ (q
->iqe_len
- sizeof(*rc
));
2347 if (!is_new_response(rc
, q
)) {
2348 if (q
->flush_handler
)
2349 q
->flush_handler(q
);
2354 rsp_type
= RSPD_TYPE_G(rc
->type_gen
);
2355 if (likely(rsp_type
== RSPD_TYPE_FLBUF_X
)) {
2356 struct page_frag
*fp
;
2358 const struct rx_sw_desc
*rsd
;
2359 u32 len
= ntohl(rc
->pldbuflen_qid
), bufsz
, frags
;
2361 if (len
& RSPD_NEWBUF_F
) {
2362 if (likely(q
->offset
> 0)) {
2363 free_rx_bufs(q
->adap
, &rxq
->fl
, 1);
2366 len
= RSPD_LEN_G(len
);
2370 /* gather packet fragments */
2371 for (frags
= 0, fp
= si
.frags
; ; frags
++, fp
++) {
2372 rsd
= &rxq
->fl
.sdesc
[rxq
->fl
.cidx
];
2373 bufsz
= get_buf_size(adapter
, rsd
);
2374 fp
->page
= rsd
->page
;
2375 fp
->offset
= q
->offset
;
2376 fp
->size
= min(bufsz
, len
);
2380 unmap_rx_buf(q
->adap
, &rxq
->fl
);
2383 si
.sgetstamp
= SGE_TIMESTAMP_G(
2384 be64_to_cpu(rc
->last_flit
));
2386 * Last buffer remains mapped so explicitly make it
2387 * coherent for CPU access.
2389 dma_sync_single_for_cpu(q
->adap
->pdev_dev
,
2391 fp
->size
, DMA_FROM_DEVICE
);
2393 si
.va
= page_address(si
.frags
[0].page
) +
2397 si
.nfrags
= frags
+ 1;
2398 ret
= q
->handler(q
, q
->cur_desc
, &si
);
2399 if (likely(ret
== 0))
2400 q
->offset
+= ALIGN(fp
->size
, s
->fl_align
);
2402 restore_rx_bufs(&si
, &rxq
->fl
, frags
);
2403 } else if (likely(rsp_type
== RSPD_TYPE_CPL_X
)) {
2404 ret
= q
->handler(q
, q
->cur_desc
, NULL
);
2406 ret
= q
->handler(q
, (const __be64
*)rc
, CXGB4_MSG_AN
);
2409 if (unlikely(ret
)) {
2410 /* couldn't process descriptor, back off for recovery */
2411 q
->next_intr_params
= QINTR_TIMER_IDX_V(NOMEM_TMR_IDX
);
2419 if (q
->offset
>= 0 && fl_cap(&rxq
->fl
) - rxq
->fl
.avail
>= 16)
2420 __refill_fl(q
->adap
, &rxq
->fl
);
2421 return budget
- budget_left
;
2425 * napi_rx_handler - the NAPI handler for Rx processing
2426 * @napi: the napi instance
2427 * @budget: how many packets we can process in this round
2429 * Handler for new data events when using NAPI. This does not need any
2430 * locking or protection from interrupts as data interrupts are off at
2431 * this point and other adapter interrupts do not interfere (the latter
2432 * in not a concern at all with MSI-X as non-data interrupts then have
2433 * a separate handler).
2435 static int napi_rx_handler(struct napi_struct
*napi
, int budget
)
2437 unsigned int params
;
2438 struct sge_rspq
*q
= container_of(napi
, struct sge_rspq
, napi
);
2442 work_done
= process_responses(q
, budget
);
2443 if (likely(work_done
< budget
)) {
2446 napi_complete_done(napi
, work_done
);
2447 timer_index
= QINTR_TIMER_IDX_G(q
->next_intr_params
);
2449 if (q
->adaptive_rx
) {
2450 if (work_done
> max(timer_pkt_quota
[timer_index
],
2452 timer_index
= (timer_index
+ 1);
2454 timer_index
= timer_index
- 1;
2456 timer_index
= clamp(timer_index
, 0, SGE_TIMERREGS
- 1);
2457 q
->next_intr_params
=
2458 QINTR_TIMER_IDX_V(timer_index
) |
2460 params
= q
->next_intr_params
;
2462 params
= q
->next_intr_params
;
2463 q
->next_intr_params
= q
->intr_params
;
2466 params
= QINTR_TIMER_IDX_V(7);
2468 val
= CIDXINC_V(work_done
) | SEINTARM_V(params
);
2470 /* If we don't have access to the new User GTS (T5+), use the old
2471 * doorbell mechanism; otherwise use the new BAR2 mechanism.
2473 if (unlikely(q
->bar2_addr
== NULL
)) {
2474 t4_write_reg(q
->adap
, MYPF_REG(SGE_PF_GTS_A
),
2475 val
| INGRESSQID_V((u32
)q
->cntxt_id
));
2477 writel(val
| INGRESSQID_V(q
->bar2_qid
),
2478 q
->bar2_addr
+ SGE_UDB_GTS
);
2485 * The MSI-X interrupt handler for an SGE response queue.
2487 irqreturn_t
t4_sge_intr_msix(int irq
, void *cookie
)
2489 struct sge_rspq
*q
= cookie
;
2491 napi_schedule(&q
->napi
);
2496 * Process the indirect interrupt entries in the interrupt queue and kick off
2497 * NAPI for each queue that has generated an entry.
2499 static unsigned int process_intrq(struct adapter
*adap
)
2501 unsigned int credits
;
2502 const struct rsp_ctrl
*rc
;
2503 struct sge_rspq
*q
= &adap
->sge
.intrq
;
2506 spin_lock(&adap
->sge
.intrq_lock
);
2507 for (credits
= 0; ; credits
++) {
2508 rc
= (void *)q
->cur_desc
+ (q
->iqe_len
- sizeof(*rc
));
2509 if (!is_new_response(rc
, q
))
2513 if (RSPD_TYPE_G(rc
->type_gen
) == RSPD_TYPE_INTR_X
) {
2514 unsigned int qid
= ntohl(rc
->pldbuflen_qid
);
2516 qid
-= adap
->sge
.ingr_start
;
2517 napi_schedule(&adap
->sge
.ingr_map
[qid
]->napi
);
2523 val
= CIDXINC_V(credits
) | SEINTARM_V(q
->intr_params
);
2525 /* If we don't have access to the new User GTS (T5+), use the old
2526 * doorbell mechanism; otherwise use the new BAR2 mechanism.
2528 if (unlikely(q
->bar2_addr
== NULL
)) {
2529 t4_write_reg(adap
, MYPF_REG(SGE_PF_GTS_A
),
2530 val
| INGRESSQID_V(q
->cntxt_id
));
2532 writel(val
| INGRESSQID_V(q
->bar2_qid
),
2533 q
->bar2_addr
+ SGE_UDB_GTS
);
2536 spin_unlock(&adap
->sge
.intrq_lock
);
2541 * The MSI interrupt handler, which handles data events from SGE response queues
2542 * as well as error and other async events as they all use the same MSI vector.
2544 static irqreturn_t
t4_intr_msi(int irq
, void *cookie
)
2546 struct adapter
*adap
= cookie
;
2548 if (adap
->flags
& MASTER_PF
)
2549 t4_slow_intr_handler(adap
);
2550 process_intrq(adap
);
2555 * Interrupt handler for legacy INTx interrupts.
2556 * Handles data events from SGE response queues as well as error and other
2557 * async events as they all use the same interrupt line.
2559 static irqreturn_t
t4_intr_intx(int irq
, void *cookie
)
2561 struct adapter
*adap
= cookie
;
2563 t4_write_reg(adap
, MYPF_REG(PCIE_PF_CLI_A
), 0);
2564 if (((adap
->flags
& MASTER_PF
) && t4_slow_intr_handler(adap
)) |
2565 process_intrq(adap
))
2567 return IRQ_NONE
; /* probably shared interrupt */
2571 * t4_intr_handler - select the top-level interrupt handler
2572 * @adap: the adapter
2574 * Selects the top-level interrupt handler based on the type of interrupts
2575 * (MSI-X, MSI, or INTx).
2577 irq_handler_t
t4_intr_handler(struct adapter
*adap
)
2579 if (adap
->flags
& USING_MSIX
)
2580 return t4_sge_intr_msix
;
2581 if (adap
->flags
& USING_MSI
)
2583 return t4_intr_intx
;
2586 static void sge_rx_timer_cb(unsigned long data
)
2590 struct adapter
*adap
= (struct adapter
*)data
;
2591 struct sge
*s
= &adap
->sge
;
2593 for (i
= 0; i
< BITS_TO_LONGS(s
->egr_sz
); i
++)
2594 for (m
= s
->starving_fl
[i
]; m
; m
&= m
- 1) {
2595 struct sge_eth_rxq
*rxq
;
2596 unsigned int id
= __ffs(m
) + i
* BITS_PER_LONG
;
2597 struct sge_fl
*fl
= s
->egr_map
[id
];
2599 clear_bit(id
, s
->starving_fl
);
2600 smp_mb__after_atomic();
2602 if (fl_starving(adap
, fl
)) {
2603 rxq
= container_of(fl
, struct sge_eth_rxq
, fl
);
2604 if (napi_reschedule(&rxq
->rspq
.napi
))
2607 set_bit(id
, s
->starving_fl
);
2610 /* The remainder of the SGE RX Timer Callback routine is dedicated to
2611 * global Master PF activities like checking for chip ingress stalls,
2614 if (!(adap
->flags
& MASTER_PF
))
2617 t4_idma_monitor(adap
, &s
->idma_monitor
, HZ
, RX_QCHECK_PERIOD
);
2620 mod_timer(&s
->rx_timer
, jiffies
+ RX_QCHECK_PERIOD
);
2623 static void sge_tx_timer_cb(unsigned long data
)
2626 unsigned int i
, budget
;
2627 struct adapter
*adap
= (struct adapter
*)data
;
2628 struct sge
*s
= &adap
->sge
;
2630 for (i
= 0; i
< BITS_TO_LONGS(s
->egr_sz
); i
++)
2631 for (m
= s
->txq_maperr
[i
]; m
; m
&= m
- 1) {
2632 unsigned long id
= __ffs(m
) + i
* BITS_PER_LONG
;
2633 struct sge_uld_txq
*txq
= s
->egr_map
[id
];
2635 clear_bit(id
, s
->txq_maperr
);
2636 tasklet_schedule(&txq
->qresume_tsk
);
2639 if (!is_t4(adap
->params
.chip
)) {
2640 struct sge_eth_txq
*q
= &s
->ptptxq
;
2643 spin_lock(&adap
->ptp_lock
);
2644 avail
= reclaimable(&q
->q
);
2647 free_tx_desc(adap
, &q
->q
, avail
, false);
2648 q
->q
.in_use
-= avail
;
2650 spin_unlock(&adap
->ptp_lock
);
2653 budget
= MAX_TIMER_TX_RECLAIM
;
2654 i
= s
->ethtxq_rover
;
2656 struct sge_eth_txq
*q
= &s
->ethtxq
[i
];
2659 time_after_eq(jiffies
, q
->txq
->trans_start
+ HZ
/ 100) &&
2660 __netif_tx_trylock(q
->txq
)) {
2661 int avail
= reclaimable(&q
->q
);
2667 free_tx_desc(adap
, &q
->q
, avail
, true);
2668 q
->q
.in_use
-= avail
;
2671 __netif_tx_unlock(q
->txq
);
2674 if (++i
>= s
->ethqsets
)
2676 } while (budget
&& i
!= s
->ethtxq_rover
);
2677 s
->ethtxq_rover
= i
;
2678 mod_timer(&s
->tx_timer
, jiffies
+ (budget
? TX_QCHECK_PERIOD
: 2));
2682 * bar2_address - return the BAR2 address for an SGE Queue's Registers
2683 * @adapter: the adapter
2684 * @qid: the SGE Queue ID
2685 * @qtype: the SGE Queue Type (Egress or Ingress)
2686 * @pbar2_qid: BAR2 Queue ID or 0 for Queue ID inferred SGE Queues
2688 * Returns the BAR2 address for the SGE Queue Registers associated with
2689 * @qid. If BAR2 SGE Registers aren't available, returns NULL. Also
2690 * returns the BAR2 Queue ID to be used with writes to the BAR2 SGE
2691 * Queue Registers. If the BAR2 Queue ID is 0, then "Inferred Queue ID"
2692 * Registers are supported (e.g. the Write Combining Doorbell Buffer).
2694 static void __iomem
*bar2_address(struct adapter
*adapter
,
2696 enum t4_bar2_qtype qtype
,
2697 unsigned int *pbar2_qid
)
2702 ret
= t4_bar2_sge_qregs(adapter
, qid
, qtype
, 0,
2703 &bar2_qoffset
, pbar2_qid
);
2707 return adapter
->bar2
+ bar2_qoffset
;
2710 /* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
2711 * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
2713 int t4_sge_alloc_rxq(struct adapter
*adap
, struct sge_rspq
*iq
, bool fwevtq
,
2714 struct net_device
*dev
, int intr_idx
,
2715 struct sge_fl
*fl
, rspq_handler_t hnd
,
2716 rspq_flush_handler_t flush_hnd
, int cong
)
2720 struct sge
*s
= &adap
->sge
;
2721 struct port_info
*pi
= netdev_priv(dev
);
2722 int relaxed
= !(adap
->flags
& ROOT_NO_RELAXED_ORDERING
);
2724 /* Size needs to be multiple of 16, including status entry. */
2725 iq
->size
= roundup(iq
->size
, 16);
2727 iq
->desc
= alloc_ring(adap
->pdev_dev
, iq
->size
, iq
->iqe_len
, 0,
2728 &iq
->phys_addr
, NULL
, 0,
2729 dev_to_node(adap
->pdev_dev
));
2733 memset(&c
, 0, sizeof(c
));
2734 c
.op_to_vfn
= htonl(FW_CMD_OP_V(FW_IQ_CMD
) | FW_CMD_REQUEST_F
|
2735 FW_CMD_WRITE_F
| FW_CMD_EXEC_F
|
2736 FW_IQ_CMD_PFN_V(adap
->pf
) | FW_IQ_CMD_VFN_V(0));
2737 c
.alloc_to_len16
= htonl(FW_IQ_CMD_ALLOC_F
| FW_IQ_CMD_IQSTART_F
|
2739 c
.type_to_iqandstindex
= htonl(FW_IQ_CMD_TYPE_V(FW_IQ_TYPE_FL_INT_CAP
) |
2740 FW_IQ_CMD_IQASYNCH_V(fwevtq
) | FW_IQ_CMD_VIID_V(pi
->viid
) |
2741 FW_IQ_CMD_IQANDST_V(intr_idx
< 0) |
2742 FW_IQ_CMD_IQANUD_V(UPDATEDELIVERY_INTERRUPT_X
) |
2743 FW_IQ_CMD_IQANDSTINDEX_V(intr_idx
>= 0 ? intr_idx
:
2745 c
.iqdroprss_to_iqesize
= htons(FW_IQ_CMD_IQPCIECH_V(pi
->tx_chan
) |
2746 FW_IQ_CMD_IQGTSMODE_F
|
2747 FW_IQ_CMD_IQINTCNTTHRESH_V(iq
->pktcnt_idx
) |
2748 FW_IQ_CMD_IQESIZE_V(ilog2(iq
->iqe_len
) - 4));
2749 c
.iqsize
= htons(iq
->size
);
2750 c
.iqaddr
= cpu_to_be64(iq
->phys_addr
);
2752 c
.iqns_to_fl0congen
= htonl(FW_IQ_CMD_IQFLINTCONGEN_F
);
2755 enum chip_type chip
= CHELSIO_CHIP_VERSION(adap
->params
.chip
);
2757 /* Allocate the ring for the hardware free list (with space
2758 * for its status page) along with the associated software
2759 * descriptor ring. The free list size needs to be a multiple
2760 * of the Egress Queue Unit and at least 2 Egress Units larger
2761 * than the SGE's Egress Congrestion Threshold
2762 * (fl_starve_thres - 1).
2764 if (fl
->size
< s
->fl_starve_thres
- 1 + 2 * 8)
2765 fl
->size
= s
->fl_starve_thres
- 1 + 2 * 8;
2766 fl
->size
= roundup(fl
->size
, 8);
2767 fl
->desc
= alloc_ring(adap
->pdev_dev
, fl
->size
, sizeof(__be64
),
2768 sizeof(struct rx_sw_desc
), &fl
->addr
,
2769 &fl
->sdesc
, s
->stat_len
,
2770 dev_to_node(adap
->pdev_dev
));
2774 flsz
= fl
->size
/ 8 + s
->stat_len
/ sizeof(struct tx_desc
);
2775 c
.iqns_to_fl0congen
|= htonl(FW_IQ_CMD_FL0PACKEN_F
|
2776 FW_IQ_CMD_FL0FETCHRO_V(relaxed
) |
2777 FW_IQ_CMD_FL0DATARO_V(relaxed
) |
2778 FW_IQ_CMD_FL0PADEN_F
);
2780 c
.iqns_to_fl0congen
|=
2781 htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong
) |
2782 FW_IQ_CMD_FL0CONGCIF_F
|
2783 FW_IQ_CMD_FL0CONGEN_F
);
2784 /* In T6, for egress queue type FL there is internal overhead
2785 * of 16B for header going into FLM module. Hence the maximum
2786 * allowed burst size is 448 bytes. For T4/T5, the hardware
2787 * doesn't coalesce fetch requests if more than 64 bytes of
2788 * Free List pointers are provided, so we use a 128-byte Fetch
2789 * Burst Minimum there (T6 implements coalescing so we can use
2790 * the smaller 64-byte value there).
2792 c
.fl0dcaen_to_fl0cidxfthresh
=
2793 htons(FW_IQ_CMD_FL0FBMIN_V(chip
<= CHELSIO_T5
?
2794 FETCHBURSTMIN_128B_X
:
2795 FETCHBURSTMIN_64B_X
) |
2796 FW_IQ_CMD_FL0FBMAX_V((chip
<= CHELSIO_T5
) ?
2797 FETCHBURSTMAX_512B_X
:
2798 FETCHBURSTMAX_256B_X
));
2799 c
.fl0size
= htons(flsz
);
2800 c
.fl0addr
= cpu_to_be64(fl
->addr
);
2803 ret
= t4_wr_mbox(adap
, adap
->mbox
, &c
, sizeof(c
), &c
);
2807 netif_napi_add(dev
, &iq
->napi
, napi_rx_handler
, 64);
2808 iq
->cur_desc
= iq
->desc
;
2811 iq
->next_intr_params
= iq
->intr_params
;
2812 iq
->cntxt_id
= ntohs(c
.iqid
);
2813 iq
->abs_id
= ntohs(c
.physiqid
);
2814 iq
->bar2_addr
= bar2_address(adap
,
2816 T4_BAR2_QTYPE_INGRESS
,
2818 iq
->size
--; /* subtract status entry */
2821 iq
->flush_handler
= flush_hnd
;
2823 memset(&iq
->lro_mgr
, 0, sizeof(struct t4_lro_mgr
));
2824 skb_queue_head_init(&iq
->lro_mgr
.lroq
);
2826 /* set offset to -1 to distinguish ingress queues without FL */
2827 iq
->offset
= fl
? 0 : -1;
2829 adap
->sge
.ingr_map
[iq
->cntxt_id
- adap
->sge
.ingr_start
] = iq
;
2832 fl
->cntxt_id
= ntohs(c
.fl0id
);
2833 fl
->avail
= fl
->pend_cred
= 0;
2834 fl
->pidx
= fl
->cidx
= 0;
2835 fl
->alloc_failed
= fl
->large_alloc_failed
= fl
->starving
= 0;
2836 adap
->sge
.egr_map
[fl
->cntxt_id
- adap
->sge
.egr_start
] = fl
;
2838 /* Note, we must initialize the BAR2 Free List User Doorbell
2839 * information before refilling the Free List!
2841 fl
->bar2_addr
= bar2_address(adap
,
2843 T4_BAR2_QTYPE_EGRESS
,
2845 refill_fl(adap
, fl
, fl_cap(fl
), GFP_KERNEL
);
2848 /* For T5 and later we attempt to set up the Congestion Manager values
2849 * of the new RX Ethernet Queue. This should really be handled by
2850 * firmware because it's more complex than any host driver wants to
2851 * get involved with and it's different per chip and this is almost
2852 * certainly wrong. Firmware would be wrong as well, but it would be
2853 * a lot easier to fix in one place ... For now we do something very
2854 * simple (and hopefully less wrong).
2856 if (!is_t4(adap
->params
.chip
) && cong
>= 0) {
2857 u32 param
, val
, ch_map
= 0;
2859 u16 cng_ch_bits_log
= adap
->params
.arch
.cng_ch_bits_log
;
2861 param
= (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ
) |
2862 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT
) |
2863 FW_PARAMS_PARAM_YZ_V(iq
->cntxt_id
));
2865 val
= CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X
);
2868 CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X
);
2869 for (i
= 0; i
< 4; i
++) {
2870 if (cong
& (1 << i
))
2871 ch_map
|= 1 << (i
<< cng_ch_bits_log
);
2873 val
|= CONMCTXT_CNGCHMAP_V(ch_map
);
2875 ret
= t4_set_params(adap
, adap
->mbox
, adap
->pf
, 0, 1,
2878 dev_warn(adap
->pdev_dev
, "Failed to set Congestion"
2879 " Manager Context for Ingress Queue %d: %d\n",
2880 iq
->cntxt_id
, -ret
);
2889 dma_free_coherent(adap
->pdev_dev
, iq
->size
* iq
->iqe_len
,
2890 iq
->desc
, iq
->phys_addr
);
2893 if (fl
&& fl
->desc
) {
2896 dma_free_coherent(adap
->pdev_dev
, flsz
* sizeof(struct tx_desc
),
2897 fl
->desc
, fl
->addr
);
2903 static void init_txq(struct adapter
*adap
, struct sge_txq
*q
, unsigned int id
)
2906 q
->bar2_addr
= bar2_address(adap
,
2908 T4_BAR2_QTYPE_EGRESS
,
2911 q
->cidx
= q
->pidx
= 0;
2912 q
->stops
= q
->restarts
= 0;
2913 q
->stat
= (void *)&q
->desc
[q
->size
];
2914 spin_lock_init(&q
->db_lock
);
2915 adap
->sge
.egr_map
[id
- adap
->sge
.egr_start
] = q
;
2918 int t4_sge_alloc_eth_txq(struct adapter
*adap
, struct sge_eth_txq
*txq
,
2919 struct net_device
*dev
, struct netdev_queue
*netdevq
,
2923 struct fw_eq_eth_cmd c
;
2924 struct sge
*s
= &adap
->sge
;
2925 struct port_info
*pi
= netdev_priv(dev
);
2927 /* Add status entries */
2928 nentries
= txq
->q
.size
+ s
->stat_len
/ sizeof(struct tx_desc
);
2930 txq
->q
.desc
= alloc_ring(adap
->pdev_dev
, txq
->q
.size
,
2931 sizeof(struct tx_desc
), sizeof(struct tx_sw_desc
),
2932 &txq
->q
.phys_addr
, &txq
->q
.sdesc
, s
->stat_len
,
2933 netdev_queue_numa_node_read(netdevq
));
2937 memset(&c
, 0, sizeof(c
));
2938 c
.op_to_vfn
= htonl(FW_CMD_OP_V(FW_EQ_ETH_CMD
) | FW_CMD_REQUEST_F
|
2939 FW_CMD_WRITE_F
| FW_CMD_EXEC_F
|
2940 FW_EQ_ETH_CMD_PFN_V(adap
->pf
) |
2941 FW_EQ_ETH_CMD_VFN_V(0));
2942 c
.alloc_to_len16
= htonl(FW_EQ_ETH_CMD_ALLOC_F
|
2943 FW_EQ_ETH_CMD_EQSTART_F
| FW_LEN16(c
));
2944 c
.viid_pkd
= htonl(FW_EQ_ETH_CMD_AUTOEQUEQE_F
|
2945 FW_EQ_ETH_CMD_VIID_V(pi
->viid
));
2946 c
.fetchszm_to_iqid
=
2947 htonl(FW_EQ_ETH_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X
) |
2948 FW_EQ_ETH_CMD_PCIECHN_V(pi
->tx_chan
) |
2949 FW_EQ_ETH_CMD_FETCHRO_F
| FW_EQ_ETH_CMD_IQID_V(iqid
));
2951 htonl(FW_EQ_ETH_CMD_FBMIN_V(FETCHBURSTMIN_64B_X
) |
2952 FW_EQ_ETH_CMD_FBMAX_V(FETCHBURSTMAX_512B_X
) |
2953 FW_EQ_ETH_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X
) |
2954 FW_EQ_ETH_CMD_EQSIZE_V(nentries
));
2955 c
.eqaddr
= cpu_to_be64(txq
->q
.phys_addr
);
2957 ret
= t4_wr_mbox(adap
, adap
->mbox
, &c
, sizeof(c
), &c
);
2959 kfree(txq
->q
.sdesc
);
2960 txq
->q
.sdesc
= NULL
;
2961 dma_free_coherent(adap
->pdev_dev
,
2962 nentries
* sizeof(struct tx_desc
),
2963 txq
->q
.desc
, txq
->q
.phys_addr
);
2968 txq
->q
.q_type
= CXGB4_TXQ_ETH
;
2969 init_txq(adap
, &txq
->q
, FW_EQ_ETH_CMD_EQID_G(ntohl(c
.eqid_pkd
)));
2971 txq
->tso
= txq
->tx_cso
= txq
->vlan_ins
= 0;
2972 txq
->mapping_err
= 0;
2976 int t4_sge_alloc_ctrl_txq(struct adapter
*adap
, struct sge_ctrl_txq
*txq
,
2977 struct net_device
*dev
, unsigned int iqid
,
2978 unsigned int cmplqid
)
2981 struct fw_eq_ctrl_cmd c
;
2982 struct sge
*s
= &adap
->sge
;
2983 struct port_info
*pi
= netdev_priv(dev
);
2985 /* Add status entries */
2986 nentries
= txq
->q
.size
+ s
->stat_len
/ sizeof(struct tx_desc
);
2988 txq
->q
.desc
= alloc_ring(adap
->pdev_dev
, nentries
,
2989 sizeof(struct tx_desc
), 0, &txq
->q
.phys_addr
,
2990 NULL
, 0, dev_to_node(adap
->pdev_dev
));
2994 c
.op_to_vfn
= htonl(FW_CMD_OP_V(FW_EQ_CTRL_CMD
) | FW_CMD_REQUEST_F
|
2995 FW_CMD_WRITE_F
| FW_CMD_EXEC_F
|
2996 FW_EQ_CTRL_CMD_PFN_V(adap
->pf
) |
2997 FW_EQ_CTRL_CMD_VFN_V(0));
2998 c
.alloc_to_len16
= htonl(FW_EQ_CTRL_CMD_ALLOC_F
|
2999 FW_EQ_CTRL_CMD_EQSTART_F
| FW_LEN16(c
));
3000 c
.cmpliqid_eqid
= htonl(FW_EQ_CTRL_CMD_CMPLIQID_V(cmplqid
));
3001 c
.physeqid_pkd
= htonl(0);
3002 c
.fetchszm_to_iqid
=
3003 htonl(FW_EQ_CTRL_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X
) |
3004 FW_EQ_CTRL_CMD_PCIECHN_V(pi
->tx_chan
) |
3005 FW_EQ_CTRL_CMD_FETCHRO_F
| FW_EQ_CTRL_CMD_IQID_V(iqid
));
3007 htonl(FW_EQ_CTRL_CMD_FBMIN_V(FETCHBURSTMIN_64B_X
) |
3008 FW_EQ_CTRL_CMD_FBMAX_V(FETCHBURSTMAX_512B_X
) |
3009 FW_EQ_CTRL_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X
) |
3010 FW_EQ_CTRL_CMD_EQSIZE_V(nentries
));
3011 c
.eqaddr
= cpu_to_be64(txq
->q
.phys_addr
);
3013 ret
= t4_wr_mbox(adap
, adap
->mbox
, &c
, sizeof(c
), &c
);
3015 dma_free_coherent(adap
->pdev_dev
,
3016 nentries
* sizeof(struct tx_desc
),
3017 txq
->q
.desc
, txq
->q
.phys_addr
);
3022 txq
->q
.q_type
= CXGB4_TXQ_CTRL
;
3023 init_txq(adap
, &txq
->q
, FW_EQ_CTRL_CMD_EQID_G(ntohl(c
.cmpliqid_eqid
)));
3025 skb_queue_head_init(&txq
->sendq
);
3026 tasklet_init(&txq
->qresume_tsk
, restart_ctrlq
, (unsigned long)txq
);
3031 int t4_sge_mod_ctrl_txq(struct adapter
*adap
, unsigned int eqid
,
3032 unsigned int cmplqid
)
3036 param
= (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ
) |
3037 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL
) |
3038 FW_PARAMS_PARAM_YZ_V(eqid
));
3040 return t4_set_params(adap
, adap
->mbox
, adap
->pf
, 0, 1, ¶m
, &val
);
3043 int t4_sge_alloc_uld_txq(struct adapter
*adap
, struct sge_uld_txq
*txq
,
3044 struct net_device
*dev
, unsigned int iqid
,
3045 unsigned int uld_type
)
3048 struct fw_eq_ofld_cmd c
;
3049 struct sge
*s
= &adap
->sge
;
3050 struct port_info
*pi
= netdev_priv(dev
);
3051 int cmd
= FW_EQ_OFLD_CMD
;
3053 /* Add status entries */
3054 nentries
= txq
->q
.size
+ s
->stat_len
/ sizeof(struct tx_desc
);
3056 txq
->q
.desc
= alloc_ring(adap
->pdev_dev
, txq
->q
.size
,
3057 sizeof(struct tx_desc
), sizeof(struct tx_sw_desc
),
3058 &txq
->q
.phys_addr
, &txq
->q
.sdesc
, s
->stat_len
,
3063 memset(&c
, 0, sizeof(c
));
3064 if (unlikely(uld_type
== CXGB4_TX_CRYPTO
))
3065 cmd
= FW_EQ_CTRL_CMD
;
3066 c
.op_to_vfn
= htonl(FW_CMD_OP_V(cmd
) | FW_CMD_REQUEST_F
|
3067 FW_CMD_WRITE_F
| FW_CMD_EXEC_F
|
3068 FW_EQ_OFLD_CMD_PFN_V(adap
->pf
) |
3069 FW_EQ_OFLD_CMD_VFN_V(0));
3070 c
.alloc_to_len16
= htonl(FW_EQ_OFLD_CMD_ALLOC_F
|
3071 FW_EQ_OFLD_CMD_EQSTART_F
| FW_LEN16(c
));
3072 c
.fetchszm_to_iqid
=
3073 htonl(FW_EQ_OFLD_CMD_HOSTFCMODE_V(HOSTFCMODE_STATUS_PAGE_X
) |
3074 FW_EQ_OFLD_CMD_PCIECHN_V(pi
->tx_chan
) |
3075 FW_EQ_OFLD_CMD_FETCHRO_F
| FW_EQ_OFLD_CMD_IQID_V(iqid
));
3077 htonl(FW_EQ_OFLD_CMD_FBMIN_V(FETCHBURSTMIN_64B_X
) |
3078 FW_EQ_OFLD_CMD_FBMAX_V(FETCHBURSTMAX_512B_X
) |
3079 FW_EQ_OFLD_CMD_CIDXFTHRESH_V(CIDXFLUSHTHRESH_32_X
) |
3080 FW_EQ_OFLD_CMD_EQSIZE_V(nentries
));
3081 c
.eqaddr
= cpu_to_be64(txq
->q
.phys_addr
);
3083 ret
= t4_wr_mbox(adap
, adap
->mbox
, &c
, sizeof(c
), &c
);
3085 kfree(txq
->q
.sdesc
);
3086 txq
->q
.sdesc
= NULL
;
3087 dma_free_coherent(adap
->pdev_dev
,
3088 nentries
* sizeof(struct tx_desc
),
3089 txq
->q
.desc
, txq
->q
.phys_addr
);
3094 txq
->q
.q_type
= CXGB4_TXQ_ULD
;
3095 init_txq(adap
, &txq
->q
, FW_EQ_OFLD_CMD_EQID_G(ntohl(c
.eqid_pkd
)));
3097 skb_queue_head_init(&txq
->sendq
);
3098 tasklet_init(&txq
->qresume_tsk
, restart_ofldq
, (unsigned long)txq
);
3100 txq
->mapping_err
= 0;
3104 void free_txq(struct adapter
*adap
, struct sge_txq
*q
)
3106 struct sge
*s
= &adap
->sge
;
3108 dma_free_coherent(adap
->pdev_dev
,
3109 q
->size
* sizeof(struct tx_desc
) + s
->stat_len
,
3110 q
->desc
, q
->phys_addr
);
3116 void free_rspq_fl(struct adapter
*adap
, struct sge_rspq
*rq
,
3119 struct sge
*s
= &adap
->sge
;
3120 unsigned int fl_id
= fl
? fl
->cntxt_id
: 0xffff;
3122 adap
->sge
.ingr_map
[rq
->cntxt_id
- adap
->sge
.ingr_start
] = NULL
;
3123 t4_iq_free(adap
, adap
->mbox
, adap
->pf
, 0, FW_IQ_TYPE_FL_INT_CAP
,
3124 rq
->cntxt_id
, fl_id
, 0xffff);
3125 dma_free_coherent(adap
->pdev_dev
, (rq
->size
+ 1) * rq
->iqe_len
,
3126 rq
->desc
, rq
->phys_addr
);
3127 netif_napi_del(&rq
->napi
);
3129 rq
->cntxt_id
= rq
->abs_id
= 0;
3133 free_rx_bufs(adap
, fl
, fl
->avail
);
3134 dma_free_coherent(adap
->pdev_dev
, fl
->size
* 8 + s
->stat_len
,
3135 fl
->desc
, fl
->addr
);
3144 * t4_free_ofld_rxqs - free a block of consecutive Rx queues
3145 * @adap: the adapter
3146 * @n: number of queues
3147 * @q: pointer to first queue
3149 * Release the resources of a consecutive block of offload Rx queues.
3151 void t4_free_ofld_rxqs(struct adapter
*adap
, int n
, struct sge_ofld_rxq
*q
)
3153 for ( ; n
; n
--, q
++)
3155 free_rspq_fl(adap
, &q
->rspq
,
3156 q
->fl
.size
? &q
->fl
: NULL
);
3160 * t4_free_sge_resources - free SGE resources
3161 * @adap: the adapter
3163 * Frees resources used by the SGE queue sets.
3165 void t4_free_sge_resources(struct adapter
*adap
)
3168 struct sge_eth_rxq
*eq
;
3169 struct sge_eth_txq
*etq
;
3171 /* stop all Rx queues in order to start them draining */
3172 for (i
= 0; i
< adap
->sge
.ethqsets
; i
++) {
3173 eq
= &adap
->sge
.ethrxq
[i
];
3175 t4_iq_stop(adap
, adap
->mbox
, adap
->pf
, 0,
3176 FW_IQ_TYPE_FL_INT_CAP
,
3178 eq
->fl
.size
? eq
->fl
.cntxt_id
: 0xffff,
3182 /* clean up Ethernet Tx/Rx queues */
3183 for (i
= 0; i
< adap
->sge
.ethqsets
; i
++) {
3184 eq
= &adap
->sge
.ethrxq
[i
];
3186 free_rspq_fl(adap
, &eq
->rspq
,
3187 eq
->fl
.size
? &eq
->fl
: NULL
);
3189 etq
= &adap
->sge
.ethtxq
[i
];
3191 t4_eth_eq_free(adap
, adap
->mbox
, adap
->pf
, 0,
3193 __netif_tx_lock_bh(etq
->txq
);
3194 free_tx_desc(adap
, &etq
->q
, etq
->q
.in_use
, true);
3195 __netif_tx_unlock_bh(etq
->txq
);
3196 kfree(etq
->q
.sdesc
);
3197 free_txq(adap
, &etq
->q
);
3201 /* clean up control Tx queues */
3202 for (i
= 0; i
< ARRAY_SIZE(adap
->sge
.ctrlq
); i
++) {
3203 struct sge_ctrl_txq
*cq
= &adap
->sge
.ctrlq
[i
];
3206 tasklet_kill(&cq
->qresume_tsk
);
3207 t4_ctrl_eq_free(adap
, adap
->mbox
, adap
->pf
, 0,
3209 __skb_queue_purge(&cq
->sendq
);
3210 free_txq(adap
, &cq
->q
);
3214 if (adap
->sge
.fw_evtq
.desc
)
3215 free_rspq_fl(adap
, &adap
->sge
.fw_evtq
, NULL
);
3217 if (adap
->sge
.intrq
.desc
)
3218 free_rspq_fl(adap
, &adap
->sge
.intrq
, NULL
);
3220 if (!is_t4(adap
->params
.chip
)) {
3221 etq
= &adap
->sge
.ptptxq
;
3223 t4_eth_eq_free(adap
, adap
->mbox
, adap
->pf
, 0,
3225 spin_lock_bh(&adap
->ptp_lock
);
3226 free_tx_desc(adap
, &etq
->q
, etq
->q
.in_use
, true);
3227 spin_unlock_bh(&adap
->ptp_lock
);
3228 kfree(etq
->q
.sdesc
);
3229 free_txq(adap
, &etq
->q
);
3233 /* clear the reverse egress queue map */
3234 memset(adap
->sge
.egr_map
, 0,
3235 adap
->sge
.egr_sz
* sizeof(*adap
->sge
.egr_map
));
3238 void t4_sge_start(struct adapter
*adap
)
3240 adap
->sge
.ethtxq_rover
= 0;
3241 mod_timer(&adap
->sge
.rx_timer
, jiffies
+ RX_QCHECK_PERIOD
);
3242 mod_timer(&adap
->sge
.tx_timer
, jiffies
+ TX_QCHECK_PERIOD
);
3246 * t4_sge_stop - disable SGE operation
3247 * @adap: the adapter
3249 * Stop tasklets and timers associated with the DMA engine. Note that
3250 * this is effective only if measures have been taken to disable any HW
3251 * events that may restart them.
3253 void t4_sge_stop(struct adapter
*adap
)
3256 struct sge
*s
= &adap
->sge
;
3258 if (in_interrupt()) /* actions below require waiting */
3261 if (s
->rx_timer
.function
)
3262 del_timer_sync(&s
->rx_timer
);
3263 if (s
->tx_timer
.function
)
3264 del_timer_sync(&s
->tx_timer
);
3266 if (is_offload(adap
)) {
3267 struct sge_uld_txq_info
*txq_info
;
3269 txq_info
= adap
->sge
.uld_txq_info
[CXGB4_TX_OFLD
];
3271 struct sge_uld_txq
*txq
= txq_info
->uldtxq
;
3273 for_each_ofldtxq(&adap
->sge
, i
) {
3275 tasklet_kill(&txq
->qresume_tsk
);
3280 if (is_pci_uld(adap
)) {
3281 struct sge_uld_txq_info
*txq_info
;
3283 txq_info
= adap
->sge
.uld_txq_info
[CXGB4_TX_CRYPTO
];
3285 struct sge_uld_txq
*txq
= txq_info
->uldtxq
;
3287 for_each_ofldtxq(&adap
->sge
, i
) {
3289 tasklet_kill(&txq
->qresume_tsk
);
3294 for (i
= 0; i
< ARRAY_SIZE(s
->ctrlq
); i
++) {
3295 struct sge_ctrl_txq
*cq
= &s
->ctrlq
[i
];
3298 tasklet_kill(&cq
->qresume_tsk
);
3303 * t4_sge_init_soft - grab core SGE values needed by SGE code
3304 * @adap: the adapter
3306 * We need to grab the SGE operating parameters that we need to have
3307 * in order to do our job and make sure we can live with them.
3310 static int t4_sge_init_soft(struct adapter
*adap
)
3312 struct sge
*s
= &adap
->sge
;
3313 u32 fl_small_pg
, fl_large_pg
, fl_small_mtu
, fl_large_mtu
;
3314 u32 timer_value_0_and_1
, timer_value_2_and_3
, timer_value_4_and_5
;
3315 u32 ingress_rx_threshold
;
3318 * Verify that CPL messages are going to the Ingress Queue for
3319 * process_responses() and that only packet data is going to the
3322 if ((t4_read_reg(adap
, SGE_CONTROL_A
) & RXPKTCPLMODE_F
) !=
3323 RXPKTCPLMODE_V(RXPKTCPLMODE_SPLIT_X
)) {
3324 dev_err(adap
->pdev_dev
, "bad SGE CPL MODE\n");
3329 * Validate the Host Buffer Register Array indices that we want to
3332 * XXX Note that we should really read through the Host Buffer Size
3333 * XXX register array and find the indices of the Buffer Sizes which
3334 * XXX meet our needs!
3336 #define READ_FL_BUF(x) \
3337 t4_read_reg(adap, SGE_FL_BUFFER_SIZE0_A+(x)*sizeof(u32))
3339 fl_small_pg
= READ_FL_BUF(RX_SMALL_PG_BUF
);
3340 fl_large_pg
= READ_FL_BUF(RX_LARGE_PG_BUF
);
3341 fl_small_mtu
= READ_FL_BUF(RX_SMALL_MTU_BUF
);
3342 fl_large_mtu
= READ_FL_BUF(RX_LARGE_MTU_BUF
);
3344 /* We only bother using the Large Page logic if the Large Page Buffer
3345 * is larger than our Page Size Buffer.
3347 if (fl_large_pg
<= fl_small_pg
)
3352 /* The Page Size Buffer must be exactly equal to our Page Size and the
3353 * Large Page Size Buffer should be 0 (per above) or a power of 2.
3355 if (fl_small_pg
!= PAGE_SIZE
||
3356 (fl_large_pg
& (fl_large_pg
-1)) != 0) {
3357 dev_err(adap
->pdev_dev
, "bad SGE FL page buffer sizes [%d, %d]\n",
3358 fl_small_pg
, fl_large_pg
);
3362 s
->fl_pg_order
= ilog2(fl_large_pg
) - PAGE_SHIFT
;
3364 if (fl_small_mtu
< FL_MTU_SMALL_BUFSIZE(adap
) ||
3365 fl_large_mtu
< FL_MTU_LARGE_BUFSIZE(adap
)) {
3366 dev_err(adap
->pdev_dev
, "bad SGE FL MTU sizes [%d, %d]\n",
3367 fl_small_mtu
, fl_large_mtu
);
3372 * Retrieve our RX interrupt holdoff timer values and counter
3373 * threshold values from the SGE parameters.
3375 timer_value_0_and_1
= t4_read_reg(adap
, SGE_TIMER_VALUE_0_AND_1_A
);
3376 timer_value_2_and_3
= t4_read_reg(adap
, SGE_TIMER_VALUE_2_AND_3_A
);
3377 timer_value_4_and_5
= t4_read_reg(adap
, SGE_TIMER_VALUE_4_AND_5_A
);
3378 s
->timer_val
[0] = core_ticks_to_us(adap
,
3379 TIMERVALUE0_G(timer_value_0_and_1
));
3380 s
->timer_val
[1] = core_ticks_to_us(adap
,
3381 TIMERVALUE1_G(timer_value_0_and_1
));
3382 s
->timer_val
[2] = core_ticks_to_us(adap
,
3383 TIMERVALUE2_G(timer_value_2_and_3
));
3384 s
->timer_val
[3] = core_ticks_to_us(adap
,
3385 TIMERVALUE3_G(timer_value_2_and_3
));
3386 s
->timer_val
[4] = core_ticks_to_us(adap
,
3387 TIMERVALUE4_G(timer_value_4_and_5
));
3388 s
->timer_val
[5] = core_ticks_to_us(adap
,
3389 TIMERVALUE5_G(timer_value_4_and_5
));
3391 ingress_rx_threshold
= t4_read_reg(adap
, SGE_INGRESS_RX_THRESHOLD_A
);
3392 s
->counter_val
[0] = THRESHOLD_0_G(ingress_rx_threshold
);
3393 s
->counter_val
[1] = THRESHOLD_1_G(ingress_rx_threshold
);
3394 s
->counter_val
[2] = THRESHOLD_2_G(ingress_rx_threshold
);
3395 s
->counter_val
[3] = THRESHOLD_3_G(ingress_rx_threshold
);
3401 * t4_sge_init - initialize SGE
3402 * @adap: the adapter
3404 * Perform low-level SGE code initialization needed every time after a
3407 int t4_sge_init(struct adapter
*adap
)
3409 struct sge
*s
= &adap
->sge
;
3410 u32 sge_control
, sge_conm_ctrl
;
3411 int ret
, egress_threshold
;
3414 * Ingress Padding Boundary and Egress Status Page Size are set up by
3415 * t4_fixup_host_params().
3417 sge_control
= t4_read_reg(adap
, SGE_CONTROL_A
);
3418 s
->pktshift
= PKTSHIFT_G(sge_control
);
3419 s
->stat_len
= (sge_control
& EGRSTATUSPAGESIZE_F
) ? 128 : 64;
3421 s
->fl_align
= t4_fl_pkt_align(adap
);
3422 ret
= t4_sge_init_soft(adap
);
3427 * A FL with <= fl_starve_thres buffers is starving and a periodic
3428 * timer will attempt to refill it. This needs to be larger than the
3429 * SGE's Egress Congestion Threshold. If it isn't, then we can get
3430 * stuck waiting for new packets while the SGE is waiting for us to
3431 * give it more Free List entries. (Note that the SGE's Egress
3432 * Congestion Threshold is in units of 2 Free List pointers.) For T4,
3433 * there was only a single field to control this. For T5 there's the
3434 * original field which now only applies to Unpacked Mode Free List
3435 * buffers and a new field which only applies to Packed Mode Free List
3438 sge_conm_ctrl
= t4_read_reg(adap
, SGE_CONM_CTRL_A
);
3439 switch (CHELSIO_CHIP_VERSION(adap
->params
.chip
)) {
3441 egress_threshold
= EGRTHRESHOLD_G(sge_conm_ctrl
);
3444 egress_threshold
= EGRTHRESHOLDPACKING_G(sge_conm_ctrl
);
3447 egress_threshold
= T6_EGRTHRESHOLDPACKING_G(sge_conm_ctrl
);
3450 dev_err(adap
->pdev_dev
, "Unsupported Chip version %d\n",
3451 CHELSIO_CHIP_VERSION(adap
->params
.chip
));
3454 s
->fl_starve_thres
= 2*egress_threshold
+ 1;
3456 t4_idma_monitor_init(adap
, &s
->idma_monitor
);
3458 /* Set up timers used for recuring callbacks to process RX and TX
3459 * administrative tasks.
3461 setup_timer(&s
->rx_timer
, sge_rx_timer_cb
, (unsigned long)adap
);
3462 setup_timer(&s
->tx_timer
, sge_tx_timer_cb
, (unsigned long)adap
);
3464 spin_lock_init(&s
->intrq_lock
);