4 * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_ethdev.h>
37 #include <rte_common.h>
39 #include "base/fm10k_type.h"
41 #ifdef RTE_PMD_PACKET_PREFETCH
42 #define rte_packet_prefetch(p) rte_prefetch1(p)
44 #define rte_packet_prefetch(p) do {} while (0)
47 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
48 static inline void dump_rxd(union fm10k_rx_desc
*rxd
)
50 PMD_RX_LOG(DEBUG
, "+----------------|----------------+");
51 PMD_RX_LOG(DEBUG
, "| GLORT | PKT HDR & TYPE |");
52 PMD_RX_LOG(DEBUG
, "| 0x%08x | 0x%08x |", rxd
->d
.glort
,
54 PMD_RX_LOG(DEBUG
, "+----------------|----------------+");
55 PMD_RX_LOG(DEBUG
, "| VLAN & LEN | STATUS |");
56 PMD_RX_LOG(DEBUG
, "| 0x%08x | 0x%08x |", rxd
->d
.vlan_len
,
58 PMD_RX_LOG(DEBUG
, "+----------------|----------------+");
59 PMD_RX_LOG(DEBUG
, "| RESERVED | RSS_HASH |");
60 PMD_RX_LOG(DEBUG
, "| 0x%08x | 0x%08x |", 0, rxd
->d
.rss
);
61 PMD_RX_LOG(DEBUG
, "+----------------|----------------+");
62 PMD_RX_LOG(DEBUG
, "| TIME TAG |");
63 PMD_RX_LOG(DEBUG
, "| 0x%016"PRIx64
" |", rxd
->q
.timestamp
);
64 PMD_RX_LOG(DEBUG
, "+----------------|----------------+");
68 /* @note: When this function is changed, make corresponding change to
69 * fm10k_dev_supported_ptypes_get()
72 rx_desc_to_ol_flags(struct rte_mbuf
*m
, const union fm10k_rx_desc
*d
)
75 ptype_table
[FM10K_RXD_PKTTYPE_MASK
>> FM10K_RXD_PKTTYPE_SHIFT
]
76 __rte_cache_aligned
= {
77 [FM10K_PKTTYPE_OTHER
] = RTE_PTYPE_L2_ETHER
,
78 [FM10K_PKTTYPE_IPV4
] = RTE_PTYPE_L2_ETHER
| RTE_PTYPE_L3_IPV4
,
79 [FM10K_PKTTYPE_IPV4_EX
] = RTE_PTYPE_L2_ETHER
|
80 RTE_PTYPE_L3_IPV4_EXT
,
81 [FM10K_PKTTYPE_IPV6
] = RTE_PTYPE_L2_ETHER
| RTE_PTYPE_L3_IPV6
,
82 [FM10K_PKTTYPE_IPV6_EX
] = RTE_PTYPE_L2_ETHER
|
83 RTE_PTYPE_L3_IPV6_EXT
,
84 [FM10K_PKTTYPE_IPV4
| FM10K_PKTTYPE_TCP
] = RTE_PTYPE_L2_ETHER
|
85 RTE_PTYPE_L3_IPV4
| RTE_PTYPE_L4_TCP
,
86 [FM10K_PKTTYPE_IPV6
| FM10K_PKTTYPE_TCP
] = RTE_PTYPE_L2_ETHER
|
87 RTE_PTYPE_L3_IPV6
| RTE_PTYPE_L4_TCP
,
88 [FM10K_PKTTYPE_IPV4
| FM10K_PKTTYPE_UDP
] = RTE_PTYPE_L2_ETHER
|
89 RTE_PTYPE_L3_IPV4
| RTE_PTYPE_L4_UDP
,
90 [FM10K_PKTTYPE_IPV6
| FM10K_PKTTYPE_UDP
] = RTE_PTYPE_L2_ETHER
|
91 RTE_PTYPE_L3_IPV6
| RTE_PTYPE_L4_UDP
,
94 m
->packet_type
= ptype_table
[(d
->w
.pkt_info
& FM10K_RXD_PKTTYPE_MASK
)
95 >> FM10K_RXD_PKTTYPE_SHIFT
];
97 if (d
->w
.pkt_info
& FM10K_RXD_RSSTYPE_MASK
)
98 m
->ol_flags
|= PKT_RX_RSS_HASH
;
100 if (unlikely((d
->d
.staterr
&
101 (FM10K_RXD_STATUS_IPCS
| FM10K_RXD_STATUS_IPE
)) ==
102 (FM10K_RXD_STATUS_IPCS
| FM10K_RXD_STATUS_IPE
)))
103 m
->ol_flags
|= PKT_RX_IP_CKSUM_BAD
;
105 m
->ol_flags
|= PKT_RX_IP_CKSUM_GOOD
;
107 if (unlikely((d
->d
.staterr
&
108 (FM10K_RXD_STATUS_L4CS
| FM10K_RXD_STATUS_L4E
)) ==
109 (FM10K_RXD_STATUS_L4CS
| FM10K_RXD_STATUS_L4E
)))
110 m
->ol_flags
|= PKT_RX_L4_CKSUM_BAD
;
112 m
->ol_flags
|= PKT_RX_L4_CKSUM_GOOD
;
116 fm10k_recv_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
119 struct rte_mbuf
*mbuf
;
120 union fm10k_rx_desc desc
;
121 struct fm10k_rx_queue
*q
= rx_queue
;
127 next_dd
= q
->next_dd
;
129 nb_pkts
= RTE_MIN(nb_pkts
, q
->alloc_thresh
);
130 for (count
= 0; count
< nb_pkts
; ++count
) {
131 if (!(q
->hw_ring
[next_dd
].d
.staterr
& FM10K_RXD_STATUS_DD
))
133 mbuf
= q
->sw_ring
[next_dd
];
134 desc
= q
->hw_ring
[next_dd
];
135 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
138 rte_pktmbuf_pkt_len(mbuf
) = desc
.w
.length
;
139 rte_pktmbuf_data_len(mbuf
) = desc
.w
.length
;
142 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
143 rx_desc_to_ol_flags(mbuf
, &desc
);
146 mbuf
->hash
.rss
= desc
.d
.rss
;
148 * Packets in fm10k device always carry at least one VLAN tag.
149 * For those packets coming in without VLAN tag,
150 * the port default VLAN tag will be used.
151 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
152 * is valid for each RX packet's mbuf.
154 mbuf
->ol_flags
|= PKT_RX_VLAN_PKT
;
155 mbuf
->vlan_tci
= desc
.w
.vlan
;
157 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
158 * so it can be selected to store sglort value.
161 mbuf
->vlan_tci_outer
= rte_le_to_cpu_16(desc
.w
.sglort
);
163 rx_pkts
[count
] = mbuf
;
164 if (++next_dd
== q
->nb_desc
) {
169 /* Prefetch next mbuf while processing current one. */
170 rte_prefetch0(q
->sw_ring
[next_dd
]);
173 * When next RX descriptor is on a cache-line boundary,
174 * prefetch the next 4 RX descriptors and the next 8 pointers
177 if ((next_dd
& 0x3) == 0) {
178 rte_prefetch0(&q
->hw_ring
[next_dd
]);
179 rte_prefetch0(&q
->sw_ring
[next_dd
]);
183 q
->next_dd
= next_dd
;
185 if ((q
->next_dd
> q
->next_trigger
) || (alloc
== 1)) {
186 ret
= rte_mempool_get_bulk(q
->mp
,
187 (void **)&q
->sw_ring
[q
->next_alloc
],
190 if (unlikely(ret
!= 0)) {
191 uint8_t port
= q
->port_id
;
192 PMD_RX_LOG(ERR
, "Failed to alloc mbuf");
194 * Need to restore next_dd if we cannot allocate new
195 * buffers to replenish the old ones.
197 q
->next_dd
= (q
->next_dd
+ q
->nb_desc
- count
) %
199 rte_eth_devices
[port
].data
->rx_mbuf_alloc_failed
++;
203 for (; q
->next_alloc
<= q
->next_trigger
; ++q
->next_alloc
) {
204 mbuf
= q
->sw_ring
[q
->next_alloc
];
206 /* setup static mbuf fields */
207 fm10k_pktmbuf_reset(mbuf
, q
->port_id
);
209 /* write descriptor */
210 desc
.q
.pkt_addr
= MBUF_DMA_ADDR_DEFAULT(mbuf
);
211 desc
.q
.hdr_addr
= MBUF_DMA_ADDR_DEFAULT(mbuf
);
212 q
->hw_ring
[q
->next_alloc
] = desc
;
214 FM10K_PCI_REG_WRITE(q
->tail_ptr
, q
->next_trigger
);
215 q
->next_trigger
+= q
->alloc_thresh
;
216 if (q
->next_trigger
>= q
->nb_desc
) {
217 q
->next_trigger
= q
->alloc_thresh
- 1;
226 fm10k_recv_scattered_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
229 struct rte_mbuf
*mbuf
;
230 union fm10k_rx_desc desc
;
231 struct fm10k_rx_queue
*q
= rx_queue
;
233 uint16_t nb_rcv
, nb_seg
;
236 struct rte_mbuf
*first_seg
= q
->pkt_first_seg
;
237 struct rte_mbuf
*last_seg
= q
->pkt_last_seg
;
240 next_dd
= q
->next_dd
;
243 nb_seg
= RTE_MIN(nb_pkts
, q
->alloc_thresh
);
244 for (count
= 0; count
< nb_seg
; count
++) {
245 if (!(q
->hw_ring
[next_dd
].d
.staterr
& FM10K_RXD_STATUS_DD
))
247 mbuf
= q
->sw_ring
[next_dd
];
248 desc
= q
->hw_ring
[next_dd
];
249 #ifdef RTE_LIBRTE_FM10K_DEBUG_RX
253 if (++next_dd
== q
->nb_desc
) {
258 /* Prefetch next mbuf while processing current one. */
259 rte_prefetch0(q
->sw_ring
[next_dd
]);
262 * When next RX descriptor is on a cache-line boundary,
263 * prefetch the next 4 RX descriptors and the next 8 pointers
266 if ((next_dd
& 0x3) == 0) {
267 rte_prefetch0(&q
->hw_ring
[next_dd
]);
268 rte_prefetch0(&q
->sw_ring
[next_dd
]);
271 /* Fill data length */
272 rte_pktmbuf_data_len(mbuf
) = desc
.w
.length
;
275 * If this is the first buffer of the received packet,
276 * set the pointer to the first mbuf of the packet and
277 * initialize its context.
278 * Otherwise, update the total length and the number of segments
279 * of the current scattered packet, and update the pointer to
280 * the last mbuf of the current packet.
284 first_seg
->pkt_len
= desc
.w
.length
;
287 (uint16_t)(first_seg
->pkt_len
+
288 rte_pktmbuf_data_len(mbuf
));
289 first_seg
->nb_segs
++;
290 last_seg
->next
= mbuf
;
294 * If this is not the last buffer of the received packet,
295 * update the pointer to the last mbuf of the current scattered
296 * packet and continue to parse the RX ring.
298 if (!(desc
.d
.staterr
& FM10K_RXD_STATUS_EOP
)) {
303 first_seg
->ol_flags
= 0;
304 #ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE
305 rx_desc_to_ol_flags(first_seg
, &desc
);
307 first_seg
->hash
.rss
= desc
.d
.rss
;
309 * Packets in fm10k device always carry at least one VLAN tag.
310 * For those packets coming in without VLAN tag,
311 * the port default VLAN tag will be used.
312 * So, always PKT_RX_VLAN_PKT flag is set and vlan_tci
313 * is valid for each RX packet's mbuf.
315 first_seg
->ol_flags
|= PKT_RX_VLAN_PKT
;
316 first_seg
->vlan_tci
= desc
.w
.vlan
;
318 * mbuf->vlan_tci_outer is an idle field in fm10k driver,
319 * so it can be selected to store sglort value.
322 first_seg
->vlan_tci_outer
=
323 rte_le_to_cpu_16(desc
.w
.sglort
);
325 /* Prefetch data of first segment, if configured to do so. */
326 rte_packet_prefetch((char *)first_seg
->buf_addr
+
327 first_seg
->data_off
);
330 * Store the mbuf address into the next entry of the array
331 * of returned packets.
333 rx_pkts
[nb_rcv
++] = first_seg
;
336 * Setup receipt context for a new packet.
341 q
->next_dd
= next_dd
;
343 if ((q
->next_dd
> q
->next_trigger
) || (alloc
== 1)) {
344 ret
= rte_mempool_get_bulk(q
->mp
,
345 (void **)&q
->sw_ring
[q
->next_alloc
],
348 if (unlikely(ret
!= 0)) {
349 uint8_t port
= q
->port_id
;
350 PMD_RX_LOG(ERR
, "Failed to alloc mbuf");
352 * Need to restore next_dd if we cannot allocate new
353 * buffers to replenish the old ones.
355 q
->next_dd
= (q
->next_dd
+ q
->nb_desc
- count
) %
357 rte_eth_devices
[port
].data
->rx_mbuf_alloc_failed
++;
361 for (; q
->next_alloc
<= q
->next_trigger
; ++q
->next_alloc
) {
362 mbuf
= q
->sw_ring
[q
->next_alloc
];
364 /* setup static mbuf fields */
365 fm10k_pktmbuf_reset(mbuf
, q
->port_id
);
367 /* write descriptor */
368 desc
.q
.pkt_addr
= MBUF_DMA_ADDR_DEFAULT(mbuf
);
369 desc
.q
.hdr_addr
= MBUF_DMA_ADDR_DEFAULT(mbuf
);
370 q
->hw_ring
[q
->next_alloc
] = desc
;
372 FM10K_PCI_REG_WRITE(q
->tail_ptr
, q
->next_trigger
);
373 q
->next_trigger
+= q
->alloc_thresh
;
374 if (q
->next_trigger
>= q
->nb_desc
) {
375 q
->next_trigger
= q
->alloc_thresh
- 1;
380 q
->pkt_first_seg
= first_seg
;
381 q
->pkt_last_seg
= last_seg
;
387 fm10k_dev_rx_descriptor_done(void *rx_queue
, uint16_t offset
)
389 volatile union fm10k_rx_desc
*rxdp
;
390 struct fm10k_rx_queue
*rxq
= rx_queue
;
394 if (unlikely(offset
>= rxq
->nb_desc
)) {
395 PMD_DRV_LOG(ERR
, "Invalid RX descriptor offset %u", offset
);
399 desc
= rxq
->next_dd
+ offset
;
400 if (desc
>= rxq
->nb_desc
)
401 desc
-= rxq
->nb_desc
;
403 rxdp
= &rxq
->hw_ring
[desc
];
405 ret
= !!(rxdp
->w
.status
&
406 rte_cpu_to_le_16(FM10K_RXD_STATUS_DD
));
412 * Free multiple TX mbuf at a time if they are in the same pool
414 * @txep: software desc ring index that starts to free
415 * @num: number of descs to free
418 static inline void tx_free_bulk_mbuf(struct rte_mbuf
**txep
, int num
)
420 struct rte_mbuf
*m
, *free
[RTE_FM10K_TX_MAX_FREE_BUF_SZ
];
424 if (unlikely(num
== 0))
427 m
= __rte_pktmbuf_prefree_seg(txep
[0]);
428 if (likely(m
!= NULL
)) {
431 for (i
= 1; i
< num
; i
++) {
432 m
= __rte_pktmbuf_prefree_seg(txep
[i
]);
433 if (likely(m
!= NULL
)) {
434 if (likely(m
->pool
== free
[0]->pool
))
437 rte_mempool_put_bulk(free
[0]->pool
,
438 (void *)free
, nb_free
);
445 rte_mempool_put_bulk(free
[0]->pool
, (void **)free
, nb_free
);
447 for (i
= 1; i
< num
; i
++) {
448 m
= __rte_pktmbuf_prefree_seg(txep
[i
]);
450 rte_mempool_put(m
->pool
, m
);
456 static inline void tx_free_descriptors(struct fm10k_tx_queue
*q
)
458 uint16_t next_rs
, count
= 0;
460 next_rs
= fifo_peek(&q
->rs_tracker
);
461 if (!(q
->hw_ring
[next_rs
].flags
& FM10K_TXD_FLAG_DONE
))
464 /* the DONE flag is set on this descriptor so remove the ID
465 * from the RS bit tracker and free the buffers */
466 fifo_remove(&q
->rs_tracker
);
468 /* wrap around? if so, free buffers from last_free up to but NOT
469 * including nb_desc */
470 if (q
->last_free
> next_rs
) {
471 count
= q
->nb_desc
- q
->last_free
;
472 tx_free_bulk_mbuf(&q
->sw_ring
[q
->last_free
], count
);
476 /* adjust free descriptor count before the next loop */
477 q
->nb_free
+= count
+ (next_rs
+ 1 - q
->last_free
);
479 /* free buffers from last_free, up to and including next_rs */
480 if (q
->last_free
<= next_rs
) {
481 count
= next_rs
- q
->last_free
+ 1;
482 tx_free_bulk_mbuf(&q
->sw_ring
[q
->last_free
], count
);
483 q
->last_free
+= count
;
486 if (q
->last_free
== q
->nb_desc
)
490 static inline void tx_xmit_pkt(struct fm10k_tx_queue
*q
, struct rte_mbuf
*mb
)
493 uint8_t flags
, hdrlen
;
495 /* always set the LAST flag on the last descriptor used to
496 * transmit the packet */
497 flags
= FM10K_TXD_FLAG_LAST
;
498 last_id
= q
->next_free
+ mb
->nb_segs
- 1;
499 if (last_id
>= q
->nb_desc
)
500 last_id
= last_id
- q
->nb_desc
;
502 /* but only set the RS flag on the last descriptor if rs_thresh
503 * descriptors will be used since the RS flag was last set */
504 if ((q
->nb_used
+ mb
->nb_segs
) >= q
->rs_thresh
) {
505 flags
|= FM10K_TXD_FLAG_RS
;
506 fifo_insert(&q
->rs_tracker
, last_id
);
509 q
->nb_used
= q
->nb_used
+ mb
->nb_segs
;
512 q
->nb_free
-= mb
->nb_segs
;
514 q
->hw_ring
[q
->next_free
].flags
= 0;
516 q
->hw_ring
[q
->next_free
].flags
|= FM10K_TXD_FLAG_FTAG
;
517 /* set checksum flags on first descriptor of packet. SCTP checksum
518 * offload is not supported, but we do not explicitly check for this
519 * case in favor of greatly simplified processing. */
520 if (mb
->ol_flags
& (PKT_TX_IP_CKSUM
| PKT_TX_L4_MASK
| PKT_TX_TCP_SEG
))
521 q
->hw_ring
[q
->next_free
].flags
|= FM10K_TXD_FLAG_CSUM
;
523 /* set vlan if requested */
524 if (mb
->ol_flags
& PKT_TX_VLAN_PKT
)
525 q
->hw_ring
[q
->next_free
].vlan
= mb
->vlan_tci
;
527 q
->sw_ring
[q
->next_free
] = mb
;
528 q
->hw_ring
[q
->next_free
].buffer_addr
=
529 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb
));
530 q
->hw_ring
[q
->next_free
].buflen
=
531 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb
));
533 if (mb
->ol_flags
& PKT_TX_TCP_SEG
) {
534 hdrlen
= mb
->outer_l2_len
+ mb
->outer_l3_len
+ mb
->l2_len
+
535 mb
->l3_len
+ mb
->l4_len
;
536 if (q
->hw_ring
[q
->next_free
].flags
& FM10K_TXD_FLAG_FTAG
)
537 hdrlen
+= sizeof(struct fm10k_ftag
);
539 if (likely((hdrlen
>= FM10K_TSO_MIN_HEADERLEN
) &&
540 (hdrlen
<= FM10K_TSO_MAX_HEADERLEN
) &&
541 (mb
->tso_segsz
>= FM10K_TSO_MINMSS
))) {
542 q
->hw_ring
[q
->next_free
].mss
= mb
->tso_segsz
;
543 q
->hw_ring
[q
->next_free
].hdrlen
= hdrlen
;
547 if (++q
->next_free
== q
->nb_desc
)
550 /* fill up the rings */
551 for (mb
= mb
->next
; mb
!= NULL
; mb
= mb
->next
) {
552 q
->sw_ring
[q
->next_free
] = mb
;
553 q
->hw_ring
[q
->next_free
].buffer_addr
=
554 rte_cpu_to_le_64(MBUF_DMA_ADDR(mb
));
555 q
->hw_ring
[q
->next_free
].buflen
=
556 rte_cpu_to_le_16(rte_pktmbuf_data_len(mb
));
557 q
->hw_ring
[q
->next_free
].flags
= 0;
558 if (++q
->next_free
== q
->nb_desc
)
562 q
->hw_ring
[last_id
].flags
|= flags
;
566 fm10k_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
,
569 struct fm10k_tx_queue
*q
= tx_queue
;
573 for (count
= 0; count
< nb_pkts
; ++count
) {
576 /* running low on descriptors? try to free some... */
577 if (q
->nb_free
< q
->free_thresh
)
578 tx_free_descriptors(q
);
580 /* make sure there are enough free descriptors to transmit the
581 * entire packet before doing anything */
582 if (q
->nb_free
< mb
->nb_segs
)
585 /* sanity check to make sure the mbuf is valid */
586 if ((mb
->nb_segs
== 0) ||
587 ((mb
->nb_segs
> 1) && (mb
->next
== NULL
)))
590 /* process the packet */
594 /* update the tail pointer if any packets were processed */
595 if (likely(count
> 0))
596 FM10K_PCI_REG_WRITE(q
->tail_ptr
, q
->next_free
);