]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/net/e1000/igb_rxtx.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / e1000 / igb_rxtx.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <inttypes.h>
14
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_pci.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
25 #include <rte_eal.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
32 #include <rte_mbuf.h>
33 #include <rte_ether.h>
34 #include <rte_ethdev_driver.h>
35 #include <rte_prefetch.h>
36 #include <rte_udp.h>
37 #include <rte_tcp.h>
38 #include <rte_sctp.h>
39 #include <rte_net.h>
40 #include <rte_string_fns.h>
41
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
45
46 #ifdef RTE_LIBRTE_IEEE1588
47 #define IGB_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
48 #else
49 #define IGB_TX_IEEE1588_TMST 0
50 #endif
51 /* Bit Mask to indicate what bits required for building TX context */
52 #define IGB_TX_OFFLOAD_MASK ( \
53 PKT_TX_OUTER_IPV6 | \
54 PKT_TX_OUTER_IPV4 | \
55 PKT_TX_IPV6 | \
56 PKT_TX_IPV4 | \
57 PKT_TX_VLAN_PKT | \
58 PKT_TX_IP_CKSUM | \
59 PKT_TX_L4_MASK | \
60 PKT_TX_TCP_SEG | \
61 IGB_TX_IEEE1588_TMST)
62
63 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
64 (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
65
66 /**
67 * Structure associated with each descriptor of the RX ring of a RX queue.
68 */
69 struct igb_rx_entry {
70 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
71 };
72
73 /**
74 * Structure associated with each descriptor of the TX ring of a TX queue.
75 */
76 struct igb_tx_entry {
77 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
78 uint16_t next_id; /**< Index of next descriptor in ring. */
79 uint16_t last_id; /**< Index of last scattered descriptor. */
80 };
81
82 /**
83 * rx queue flags
84 */
85 enum igb_rxq_flags {
86 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
87 };
88
89 /**
90 * Structure associated with each RX queue.
91 */
92 struct igb_rx_queue {
93 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
94 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
95 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
96 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
97 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
98 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
99 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
100 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
101 uint16_t nb_rx_desc; /**< number of RX descriptors. */
102 uint16_t rx_tail; /**< current value of RDT register. */
103 uint16_t nb_rx_hold; /**< number of held free RX desc. */
104 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
105 uint16_t queue_id; /**< RX queue index. */
106 uint16_t reg_idx; /**< RX queue register index. */
107 uint16_t port_id; /**< Device port identifier. */
108 uint8_t pthresh; /**< Prefetch threshold register. */
109 uint8_t hthresh; /**< Host threshold register. */
110 uint8_t wthresh; /**< Write-back threshold register. */
111 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
112 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
113 uint32_t flags; /**< RX flags. */
114 uint64_t offloads; /**< offloads of DEV_RX_OFFLOAD_* */
115 };
116
117 /**
118 * Hardware context number
119 */
120 enum igb_advctx_num {
121 IGB_CTX_0 = 0, /**< CTX0 */
122 IGB_CTX_1 = 1, /**< CTX1 */
123 IGB_CTX_NUM = 2, /**< CTX_NUM */
124 };
125
126 /** Offload features */
127 union igb_tx_offload {
128 uint64_t data;
129 struct {
130 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
131 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
132 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
133 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
134 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
135
136 /* uint64_t unused:8; */
137 };
138 };
139
140 /*
141 * Compare mask for igb_tx_offload.data,
142 * should be in sync with igb_tx_offload layout.
143 * */
144 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
145 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
146 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
147 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
148 /** Mac + IP + TCP + Mss mask. */
149 #define TX_TSO_CMP_MASK \
150 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
151
152 /**
153 * Strucutre to check if new context need be built
154 */
155 struct igb_advctx_info {
156 uint64_t flags; /**< ol_flags related to context build. */
157 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
158 union igb_tx_offload tx_offload;
159 /** compare mask for tx offload. */
160 union igb_tx_offload tx_offload_mask;
161 };
162
163 /**
164 * Structure associated with each TX queue.
165 */
166 struct igb_tx_queue {
167 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
168 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
169 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
170 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
171 uint32_t txd_type; /**< Device-specific TXD type */
172 uint16_t nb_tx_desc; /**< number of TX descriptors. */
173 uint16_t tx_tail; /**< Current value of TDT register. */
174 uint16_t tx_head;
175 /**< Index of first used TX descriptor. */
176 uint16_t queue_id; /**< TX queue index. */
177 uint16_t reg_idx; /**< TX queue register index. */
178 uint16_t port_id; /**< Device port identifier. */
179 uint8_t pthresh; /**< Prefetch threshold register. */
180 uint8_t hthresh; /**< Host threshold register. */
181 uint8_t wthresh; /**< Write-back threshold register. */
182 uint32_t ctx_curr;
183 /**< Current used hardware descriptor. */
184 uint32_t ctx_start;
185 /**< Start context position for transmit queue. */
186 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
187 /**< Hardware context history.*/
188 uint64_t offloads; /**< offloads of DEV_TX_OFFLOAD_* */
189 };
190
191 #if 1
192 #define RTE_PMD_USE_PREFETCH
193 #endif
194
195 #ifdef RTE_PMD_USE_PREFETCH
196 #define rte_igb_prefetch(p) rte_prefetch0(p)
197 #else
198 #define rte_igb_prefetch(p) do {} while(0)
199 #endif
200
201 #ifdef RTE_PMD_PACKET_PREFETCH
202 #define rte_packet_prefetch(p) rte_prefetch1(p)
203 #else
204 #define rte_packet_prefetch(p) do {} while(0)
205 #endif
206
207 /*
208 * Macro for VMDq feature for 1 GbE NIC.
209 */
210 #define E1000_VMOLR_SIZE (8)
211 #define IGB_TSO_MAX_HDRLEN (512)
212 #define IGB_TSO_MAX_MSS (9216)
213
214 /*********************************************************************
215 *
216 * TX function
217 *
218 **********************************************************************/
219
220 /*
221 *There're some limitations in hardware for TCP segmentation offload. We
222 *should check whether the parameters are valid.
223 */
224 static inline uint64_t
225 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
226 {
227 if (!(ol_req & PKT_TX_TCP_SEG))
228 return ol_req;
229 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
230 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
231 ol_req &= ~PKT_TX_TCP_SEG;
232 ol_req |= PKT_TX_TCP_CKSUM;
233 }
234 return ol_req;
235 }
236
237 /*
238 * Advanced context descriptor are almost same between igb/ixgbe
239 * This is a separate function, looking for optimization opportunity here
240 * Rework required to go with the pre-defined values.
241 */
242
243 static inline void
244 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
245 volatile struct e1000_adv_tx_context_desc *ctx_txd,
246 uint64_t ol_flags, union igb_tx_offload tx_offload)
247 {
248 uint32_t type_tucmd_mlhl;
249 uint32_t mss_l4len_idx;
250 uint32_t ctx_idx, ctx_curr;
251 uint32_t vlan_macip_lens;
252 union igb_tx_offload tx_offload_mask;
253
254 ctx_curr = txq->ctx_curr;
255 ctx_idx = ctx_curr + txq->ctx_start;
256
257 tx_offload_mask.data = 0;
258 type_tucmd_mlhl = 0;
259
260 /* Specify which HW CTX to upload. */
261 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
262
263 if (ol_flags & PKT_TX_VLAN_PKT)
264 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
265
266 /* check if TCP segmentation required for this packet */
267 if (ol_flags & PKT_TX_TCP_SEG) {
268 /* implies IP cksum in IPv4 */
269 if (ol_flags & PKT_TX_IP_CKSUM)
270 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
271 E1000_ADVTXD_TUCMD_L4T_TCP |
272 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
273 else
274 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
275 E1000_ADVTXD_TUCMD_L4T_TCP |
276 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
277
278 tx_offload_mask.data |= TX_TSO_CMP_MASK;
279 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
280 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
281 } else { /* no TSO, check if hardware checksum is needed */
282 if (ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_L4_MASK))
283 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
284
285 if (ol_flags & PKT_TX_IP_CKSUM)
286 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
287
288 switch (ol_flags & PKT_TX_L4_MASK) {
289 case PKT_TX_UDP_CKSUM:
290 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
291 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
292 mss_l4len_idx |= sizeof(struct rte_udp_hdr)
293 << E1000_ADVTXD_L4LEN_SHIFT;
294 break;
295 case PKT_TX_TCP_CKSUM:
296 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
297 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
298 mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
299 << E1000_ADVTXD_L4LEN_SHIFT;
300 break;
301 case PKT_TX_SCTP_CKSUM:
302 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
303 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
304 mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
305 << E1000_ADVTXD_L4LEN_SHIFT;
306 break;
307 default:
308 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
309 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
310 break;
311 }
312 }
313
314 txq->ctx_cache[ctx_curr].flags = ol_flags;
315 txq->ctx_cache[ctx_curr].tx_offload.data =
316 tx_offload_mask.data & tx_offload.data;
317 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
318
319 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
320 vlan_macip_lens = (uint32_t)tx_offload.data;
321 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
322 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
323 ctx_txd->seqnum_seed = 0;
324 }
325
326 /*
327 * Check which hardware context can be used. Use the existing match
328 * or create a new context descriptor.
329 */
330 static inline uint32_t
331 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
332 union igb_tx_offload tx_offload)
333 {
334 /* If match with the current context */
335 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
336 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
337 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
338 return txq->ctx_curr;
339 }
340
341 /* If match with the second context */
342 txq->ctx_curr ^= 1;
343 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
344 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
345 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
346 return txq->ctx_curr;
347 }
348
349 /* Mismatch, use the previous context */
350 return IGB_CTX_NUM;
351 }
352
353 static inline uint32_t
354 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
355 {
356 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
357 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
358 uint32_t tmp;
359
360 tmp = l4_olinfo[(ol_flags & PKT_TX_L4_MASK) != PKT_TX_L4_NO_CKSUM];
361 tmp |= l3_olinfo[(ol_flags & PKT_TX_IP_CKSUM) != 0];
362 tmp |= l4_olinfo[(ol_flags & PKT_TX_TCP_SEG) != 0];
363 return tmp;
364 }
365
366 static inline uint32_t
367 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
368 {
369 uint32_t cmdtype;
370 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
371 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
372 cmdtype = vlan_cmd[(ol_flags & PKT_TX_VLAN_PKT) != 0];
373 cmdtype |= tso_cmd[(ol_flags & PKT_TX_TCP_SEG) != 0];
374 return cmdtype;
375 }
376
377 uint16_t
378 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
379 uint16_t nb_pkts)
380 {
381 struct igb_tx_queue *txq;
382 struct igb_tx_entry *sw_ring;
383 struct igb_tx_entry *txe, *txn;
384 volatile union e1000_adv_tx_desc *txr;
385 volatile union e1000_adv_tx_desc *txd;
386 struct rte_mbuf *tx_pkt;
387 struct rte_mbuf *m_seg;
388 uint64_t buf_dma_addr;
389 uint32_t olinfo_status;
390 uint32_t cmd_type_len;
391 uint32_t pkt_len;
392 uint16_t slen;
393 uint64_t ol_flags;
394 uint16_t tx_end;
395 uint16_t tx_id;
396 uint16_t tx_last;
397 uint16_t nb_tx;
398 uint64_t tx_ol_req;
399 uint32_t new_ctx = 0;
400 uint32_t ctx = 0;
401 union igb_tx_offload tx_offload = {0};
402
403 txq = tx_queue;
404 sw_ring = txq->sw_ring;
405 txr = txq->tx_ring;
406 tx_id = txq->tx_tail;
407 txe = &sw_ring[tx_id];
408
409 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
410 tx_pkt = *tx_pkts++;
411 pkt_len = tx_pkt->pkt_len;
412
413 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
414
415 /*
416 * The number of descriptors that must be allocated for a
417 * packet is the number of segments of that packet, plus 1
418 * Context Descriptor for the VLAN Tag Identifier, if any.
419 * Determine the last TX descriptor to allocate in the TX ring
420 * for the packet, starting from the current position (tx_id)
421 * in the ring.
422 */
423 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
424
425 ol_flags = tx_pkt->ol_flags;
426 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
427
428 /* If a Context Descriptor need be built . */
429 if (tx_ol_req) {
430 tx_offload.l2_len = tx_pkt->l2_len;
431 tx_offload.l3_len = tx_pkt->l3_len;
432 tx_offload.l4_len = tx_pkt->l4_len;
433 tx_offload.vlan_tci = tx_pkt->vlan_tci;
434 tx_offload.tso_segsz = tx_pkt->tso_segsz;
435 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
436
437 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
438 /* Only allocate context descriptor if required*/
439 new_ctx = (ctx == IGB_CTX_NUM);
440 ctx = txq->ctx_curr + txq->ctx_start;
441 tx_last = (uint16_t) (tx_last + new_ctx);
442 }
443 if (tx_last >= txq->nb_tx_desc)
444 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
445
446 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
447 " tx_first=%u tx_last=%u",
448 (unsigned) txq->port_id,
449 (unsigned) txq->queue_id,
450 (unsigned) pkt_len,
451 (unsigned) tx_id,
452 (unsigned) tx_last);
453
454 /*
455 * Check if there are enough free descriptors in the TX ring
456 * to transmit the next packet.
457 * This operation is based on the two following rules:
458 *
459 * 1- Only check that the last needed TX descriptor can be
460 * allocated (by construction, if that descriptor is free,
461 * all intermediate ones are also free).
462 *
463 * For this purpose, the index of the last TX descriptor
464 * used for a packet (the "last descriptor" of a packet)
465 * is recorded in the TX entries (the last one included)
466 * that are associated with all TX descriptors allocated
467 * for that packet.
468 *
469 * 2- Avoid to allocate the last free TX descriptor of the
470 * ring, in order to never set the TDT register with the
471 * same value stored in parallel by the NIC in the TDH
472 * register, which makes the TX engine of the NIC enter
473 * in a deadlock situation.
474 *
475 * By extension, avoid to allocate a free descriptor that
476 * belongs to the last set of free descriptors allocated
477 * to the same packet previously transmitted.
478 */
479
480 /*
481 * The "last descriptor" of the previously sent packet, if any,
482 * which used the last descriptor to allocate.
483 */
484 tx_end = sw_ring[tx_last].last_id;
485
486 /*
487 * The next descriptor following that "last descriptor" in the
488 * ring.
489 */
490 tx_end = sw_ring[tx_end].next_id;
491
492 /*
493 * The "last descriptor" associated with that next descriptor.
494 */
495 tx_end = sw_ring[tx_end].last_id;
496
497 /*
498 * Check that this descriptor is free.
499 */
500 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
501 if (nb_tx == 0)
502 return 0;
503 goto end_of_tx;
504 }
505
506 /*
507 * Set common flags of all TX Data Descriptors.
508 *
509 * The following bits must be set in all Data Descriptors:
510 * - E1000_ADVTXD_DTYP_DATA
511 * - E1000_ADVTXD_DCMD_DEXT
512 *
513 * The following bits must be set in the first Data Descriptor
514 * and are ignored in the other ones:
515 * - E1000_ADVTXD_DCMD_IFCS
516 * - E1000_ADVTXD_MAC_1588
517 * - E1000_ADVTXD_DCMD_VLE
518 *
519 * The following bits must only be set in the last Data
520 * Descriptor:
521 * - E1000_TXD_CMD_EOP
522 *
523 * The following bits can be set in any Data Descriptor, but
524 * are only set in the last Data Descriptor:
525 * - E1000_TXD_CMD_RS
526 */
527 cmd_type_len = txq->txd_type |
528 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
529 if (tx_ol_req & PKT_TX_TCP_SEG)
530 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
531 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
532 #if defined(RTE_LIBRTE_IEEE1588)
533 if (ol_flags & PKT_TX_IEEE1588_TMST)
534 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
535 #endif
536 if (tx_ol_req) {
537 /* Setup TX Advanced context descriptor if required */
538 if (new_ctx) {
539 volatile struct e1000_adv_tx_context_desc *
540 ctx_txd;
541
542 ctx_txd = (volatile struct
543 e1000_adv_tx_context_desc *)
544 &txr[tx_id];
545
546 txn = &sw_ring[txe->next_id];
547 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
548
549 if (txe->mbuf != NULL) {
550 rte_pktmbuf_free_seg(txe->mbuf);
551 txe->mbuf = NULL;
552 }
553
554 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
555
556 txe->last_id = tx_last;
557 tx_id = txe->next_id;
558 txe = txn;
559 }
560
561 /* Setup the TX Advanced Data Descriptor */
562 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
563 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
564 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
565 }
566
567 m_seg = tx_pkt;
568 do {
569 txn = &sw_ring[txe->next_id];
570 txd = &txr[tx_id];
571
572 if (txe->mbuf != NULL)
573 rte_pktmbuf_free_seg(txe->mbuf);
574 txe->mbuf = m_seg;
575
576 /*
577 * Set up transmit descriptor.
578 */
579 slen = (uint16_t) m_seg->data_len;
580 buf_dma_addr = rte_mbuf_data_iova(m_seg);
581 txd->read.buffer_addr =
582 rte_cpu_to_le_64(buf_dma_addr);
583 txd->read.cmd_type_len =
584 rte_cpu_to_le_32(cmd_type_len | slen);
585 txd->read.olinfo_status =
586 rte_cpu_to_le_32(olinfo_status);
587 txe->last_id = tx_last;
588 tx_id = txe->next_id;
589 txe = txn;
590 m_seg = m_seg->next;
591 } while (m_seg != NULL);
592
593 /*
594 * The last packet data descriptor needs End Of Packet (EOP)
595 * and Report Status (RS).
596 */
597 txd->read.cmd_type_len |=
598 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
599 }
600 end_of_tx:
601 rte_wmb();
602
603 /*
604 * Set the Transmit Descriptor Tail (TDT).
605 */
606 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
607 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
608 (unsigned) txq->port_id, (unsigned) txq->queue_id,
609 (unsigned) tx_id, (unsigned) nb_tx);
610 txq->tx_tail = tx_id;
611
612 return nb_tx;
613 }
614
615 /*********************************************************************
616 *
617 * TX prep functions
618 *
619 **********************************************************************/
620 uint16_t
621 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
622 uint16_t nb_pkts)
623 {
624 int i, ret;
625 struct rte_mbuf *m;
626
627 for (i = 0; i < nb_pkts; i++) {
628 m = tx_pkts[i];
629
630 /* Check some limitations for TSO in hardware */
631 if (m->ol_flags & PKT_TX_TCP_SEG)
632 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
633 (m->l2_len + m->l3_len + m->l4_len >
634 IGB_TSO_MAX_HDRLEN)) {
635 rte_errno = EINVAL;
636 return i;
637 }
638
639 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
640 rte_errno = ENOTSUP;
641 return i;
642 }
643
644 #ifdef RTE_LIBRTE_ETHDEV_DEBUG
645 ret = rte_validate_tx_offload(m);
646 if (ret != 0) {
647 rte_errno = -ret;
648 return i;
649 }
650 #endif
651 ret = rte_net_intel_cksum_prepare(m);
652 if (ret != 0) {
653 rte_errno = -ret;
654 return i;
655 }
656 }
657
658 return i;
659 }
660
661 /*********************************************************************
662 *
663 * RX functions
664 *
665 **********************************************************************/
666 #define IGB_PACKET_TYPE_IPV4 0X01
667 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
668 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
669 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
670 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
671 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
672 #define IGB_PACKET_TYPE_IPV6 0X04
673 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
674 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
675 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
676 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
677 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
678 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
679 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
680 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
681 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
682 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
683 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
684 #define IGB_PACKET_TYPE_MAX 0X80
685 #define IGB_PACKET_TYPE_MASK 0X7F
686 #define IGB_PACKET_TYPE_SHIFT 0X04
687 static inline uint32_t
688 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
689 {
690 static const uint32_t
691 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
692 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
693 RTE_PTYPE_L3_IPV4,
694 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
695 RTE_PTYPE_L3_IPV4_EXT,
696 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
697 RTE_PTYPE_L3_IPV6,
698 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
699 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
700 RTE_PTYPE_INNER_L3_IPV6,
701 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
702 RTE_PTYPE_L3_IPV6_EXT,
703 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
704 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
705 RTE_PTYPE_INNER_L3_IPV6_EXT,
706 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
707 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
708 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
709 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
710 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
711 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
712 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
713 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
714 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
715 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
716 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
717 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
718 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
719 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
720 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
721 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
722 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
723 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
724 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
725 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
726 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
727 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
728 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
729 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
730 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
731 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
732 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
733 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
734 };
735 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
736 return RTE_PTYPE_UNKNOWN;
737
738 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
739
740 return ptype_table[pkt_info];
741 }
742
743 static inline uint64_t
744 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
745 {
746 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : PKT_RX_RSS_HASH;
747
748 #if defined(RTE_LIBRTE_IEEE1588)
749 static uint32_t ip_pkt_etqf_map[8] = {
750 0, 0, 0, PKT_RX_IEEE1588_PTP,
751 0, 0, 0, 0,
752 };
753
754 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
755 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
756
757 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
758 if (hw->mac.type == e1000_i210)
759 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
760 else
761 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
762 #else
763 RTE_SET_USED(rxq);
764 #endif
765
766 return pkt_flags;
767 }
768
769 static inline uint64_t
770 rx_desc_status_to_pkt_flags(uint32_t rx_status)
771 {
772 uint64_t pkt_flags;
773
774 /* Check if VLAN present */
775 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
776 PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED : 0);
777
778 #if defined(RTE_LIBRTE_IEEE1588)
779 if (rx_status & E1000_RXD_STAT_TMST)
780 pkt_flags = pkt_flags | PKT_RX_IEEE1588_TMST;
781 #endif
782 return pkt_flags;
783 }
784
785 static inline uint64_t
786 rx_desc_error_to_pkt_flags(uint32_t rx_status)
787 {
788 /*
789 * Bit 30: IPE, IPv4 checksum error
790 * Bit 29: L4I, L4I integrity error
791 */
792
793 static uint64_t error_to_pkt_flags_map[4] = {
794 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD,
795 PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD,
796 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD,
797 PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD
798 };
799 return error_to_pkt_flags_map[(rx_status >>
800 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
801 }
802
803 uint16_t
804 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
805 uint16_t nb_pkts)
806 {
807 struct igb_rx_queue *rxq;
808 volatile union e1000_adv_rx_desc *rx_ring;
809 volatile union e1000_adv_rx_desc *rxdp;
810 struct igb_rx_entry *sw_ring;
811 struct igb_rx_entry *rxe;
812 struct rte_mbuf *rxm;
813 struct rte_mbuf *nmb;
814 union e1000_adv_rx_desc rxd;
815 uint64_t dma_addr;
816 uint32_t staterr;
817 uint32_t hlen_type_rss;
818 uint16_t pkt_len;
819 uint16_t rx_id;
820 uint16_t nb_rx;
821 uint16_t nb_hold;
822 uint64_t pkt_flags;
823
824 nb_rx = 0;
825 nb_hold = 0;
826 rxq = rx_queue;
827 rx_id = rxq->rx_tail;
828 rx_ring = rxq->rx_ring;
829 sw_ring = rxq->sw_ring;
830 while (nb_rx < nb_pkts) {
831 /*
832 * The order of operations here is important as the DD status
833 * bit must not be read after any other descriptor fields.
834 * rx_ring and rxdp are pointing to volatile data so the order
835 * of accesses cannot be reordered by the compiler. If they were
836 * not volatile, they could be reordered which could lead to
837 * using invalid descriptor fields when read from rxd.
838 */
839 rxdp = &rx_ring[rx_id];
840 staterr = rxdp->wb.upper.status_error;
841 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
842 break;
843 rxd = *rxdp;
844
845 /*
846 * End of packet.
847 *
848 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
849 * likely to be invalid and to be dropped by the various
850 * validation checks performed by the network stack.
851 *
852 * Allocate a new mbuf to replenish the RX ring descriptor.
853 * If the allocation fails:
854 * - arrange for that RX descriptor to be the first one
855 * being parsed the next time the receive function is
856 * invoked [on the same queue].
857 *
858 * - Stop parsing the RX ring and return immediately.
859 *
860 * This policy do not drop the packet received in the RX
861 * descriptor for which the allocation of a new mbuf failed.
862 * Thus, it allows that packet to be later retrieved if
863 * mbuf have been freed in the mean time.
864 * As a side effect, holding RX descriptors instead of
865 * systematically giving them back to the NIC may lead to
866 * RX ring exhaustion situations.
867 * However, the NIC can gracefully prevent such situations
868 * to happen by sending specific "back-pressure" flow control
869 * frames to its peer(s).
870 */
871 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
872 "staterr=0x%x pkt_len=%u",
873 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
874 (unsigned) rx_id, (unsigned) staterr,
875 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
876
877 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
878 if (nmb == NULL) {
879 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
880 "queue_id=%u", (unsigned) rxq->port_id,
881 (unsigned) rxq->queue_id);
882 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
883 break;
884 }
885
886 nb_hold++;
887 rxe = &sw_ring[rx_id];
888 rx_id++;
889 if (rx_id == rxq->nb_rx_desc)
890 rx_id = 0;
891
892 /* Prefetch next mbuf while processing current one. */
893 rte_igb_prefetch(sw_ring[rx_id].mbuf);
894
895 /*
896 * When next RX descriptor is on a cache-line boundary,
897 * prefetch the next 4 RX descriptors and the next 8 pointers
898 * to mbufs.
899 */
900 if ((rx_id & 0x3) == 0) {
901 rte_igb_prefetch(&rx_ring[rx_id]);
902 rte_igb_prefetch(&sw_ring[rx_id]);
903 }
904
905 rxm = rxe->mbuf;
906 rxe->mbuf = nmb;
907 dma_addr =
908 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
909 rxdp->read.hdr_addr = 0;
910 rxdp->read.pkt_addr = dma_addr;
911
912 /*
913 * Initialize the returned mbuf.
914 * 1) setup generic mbuf fields:
915 * - number of segments,
916 * - next segment,
917 * - packet length,
918 * - RX port identifier.
919 * 2) integrate hardware offload data, if any:
920 * - RSS flag & hash,
921 * - IP checksum flag,
922 * - VLAN TCI, if any,
923 * - error flags.
924 */
925 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
926 rxq->crc_len);
927 rxm->data_off = RTE_PKTMBUF_HEADROOM;
928 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
929 rxm->nb_segs = 1;
930 rxm->next = NULL;
931 rxm->pkt_len = pkt_len;
932 rxm->data_len = pkt_len;
933 rxm->port = rxq->port_id;
934
935 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
936 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
937
938 /*
939 * The vlan_tci field is only valid when PKT_RX_VLAN is
940 * set in the pkt_flags field and must be in CPU byte order.
941 */
942 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
943 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
944 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
945 } else {
946 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
947 }
948 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
949 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
950 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
951 rxm->ol_flags = pkt_flags;
952 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
953 lo_dword.hs_rss.pkt_info);
954
955 /*
956 * Store the mbuf address into the next entry of the array
957 * of returned packets.
958 */
959 rx_pkts[nb_rx++] = rxm;
960 }
961 rxq->rx_tail = rx_id;
962
963 /*
964 * If the number of free RX descriptors is greater than the RX free
965 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
966 * register.
967 * Update the RDT with the value of the last processed RX descriptor
968 * minus 1, to guarantee that the RDT register is never equal to the
969 * RDH register, which creates a "full" ring situtation from the
970 * hardware point of view...
971 */
972 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
973 if (nb_hold > rxq->rx_free_thresh) {
974 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
975 "nb_hold=%u nb_rx=%u",
976 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
977 (unsigned) rx_id, (unsigned) nb_hold,
978 (unsigned) nb_rx);
979 rx_id = (uint16_t) ((rx_id == 0) ?
980 (rxq->nb_rx_desc - 1) : (rx_id - 1));
981 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
982 nb_hold = 0;
983 }
984 rxq->nb_rx_hold = nb_hold;
985 return nb_rx;
986 }
987
988 uint16_t
989 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
990 uint16_t nb_pkts)
991 {
992 struct igb_rx_queue *rxq;
993 volatile union e1000_adv_rx_desc *rx_ring;
994 volatile union e1000_adv_rx_desc *rxdp;
995 struct igb_rx_entry *sw_ring;
996 struct igb_rx_entry *rxe;
997 struct rte_mbuf *first_seg;
998 struct rte_mbuf *last_seg;
999 struct rte_mbuf *rxm;
1000 struct rte_mbuf *nmb;
1001 union e1000_adv_rx_desc rxd;
1002 uint64_t dma; /* Physical address of mbuf data buffer */
1003 uint32_t staterr;
1004 uint32_t hlen_type_rss;
1005 uint16_t rx_id;
1006 uint16_t nb_rx;
1007 uint16_t nb_hold;
1008 uint16_t data_len;
1009 uint64_t pkt_flags;
1010
1011 nb_rx = 0;
1012 nb_hold = 0;
1013 rxq = rx_queue;
1014 rx_id = rxq->rx_tail;
1015 rx_ring = rxq->rx_ring;
1016 sw_ring = rxq->sw_ring;
1017
1018 /*
1019 * Retrieve RX context of current packet, if any.
1020 */
1021 first_seg = rxq->pkt_first_seg;
1022 last_seg = rxq->pkt_last_seg;
1023
1024 while (nb_rx < nb_pkts) {
1025 next_desc:
1026 /*
1027 * The order of operations here is important as the DD status
1028 * bit must not be read after any other descriptor fields.
1029 * rx_ring and rxdp are pointing to volatile data so the order
1030 * of accesses cannot be reordered by the compiler. If they were
1031 * not volatile, they could be reordered which could lead to
1032 * using invalid descriptor fields when read from rxd.
1033 */
1034 rxdp = &rx_ring[rx_id];
1035 staterr = rxdp->wb.upper.status_error;
1036 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1037 break;
1038 rxd = *rxdp;
1039
1040 /*
1041 * Descriptor done.
1042 *
1043 * Allocate a new mbuf to replenish the RX ring descriptor.
1044 * If the allocation fails:
1045 * - arrange for that RX descriptor to be the first one
1046 * being parsed the next time the receive function is
1047 * invoked [on the same queue].
1048 *
1049 * - Stop parsing the RX ring and return immediately.
1050 *
1051 * This policy does not drop the packet received in the RX
1052 * descriptor for which the allocation of a new mbuf failed.
1053 * Thus, it allows that packet to be later retrieved if
1054 * mbuf have been freed in the mean time.
1055 * As a side effect, holding RX descriptors instead of
1056 * systematically giving them back to the NIC may lead to
1057 * RX ring exhaustion situations.
1058 * However, the NIC can gracefully prevent such situations
1059 * to happen by sending specific "back-pressure" flow control
1060 * frames to its peer(s).
1061 */
1062 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1063 "staterr=0x%x data_len=%u",
1064 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1065 (unsigned) rx_id, (unsigned) staterr,
1066 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1067
1068 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1069 if (nmb == NULL) {
1070 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1071 "queue_id=%u", (unsigned) rxq->port_id,
1072 (unsigned) rxq->queue_id);
1073 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1074 break;
1075 }
1076
1077 nb_hold++;
1078 rxe = &sw_ring[rx_id];
1079 rx_id++;
1080 if (rx_id == rxq->nb_rx_desc)
1081 rx_id = 0;
1082
1083 /* Prefetch next mbuf while processing current one. */
1084 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1085
1086 /*
1087 * When next RX descriptor is on a cache-line boundary,
1088 * prefetch the next 4 RX descriptors and the next 8 pointers
1089 * to mbufs.
1090 */
1091 if ((rx_id & 0x3) == 0) {
1092 rte_igb_prefetch(&rx_ring[rx_id]);
1093 rte_igb_prefetch(&sw_ring[rx_id]);
1094 }
1095
1096 /*
1097 * Update RX descriptor with the physical address of the new
1098 * data buffer of the new allocated mbuf.
1099 */
1100 rxm = rxe->mbuf;
1101 rxe->mbuf = nmb;
1102 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1103 rxdp->read.pkt_addr = dma;
1104 rxdp->read.hdr_addr = 0;
1105
1106 /*
1107 * Set data length & data buffer address of mbuf.
1108 */
1109 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1110 rxm->data_len = data_len;
1111 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1112
1113 /*
1114 * If this is the first buffer of the received packet,
1115 * set the pointer to the first mbuf of the packet and
1116 * initialize its context.
1117 * Otherwise, update the total length and the number of segments
1118 * of the current scattered packet, and update the pointer to
1119 * the last mbuf of the current packet.
1120 */
1121 if (first_seg == NULL) {
1122 first_seg = rxm;
1123 first_seg->pkt_len = data_len;
1124 first_seg->nb_segs = 1;
1125 } else {
1126 first_seg->pkt_len += data_len;
1127 first_seg->nb_segs++;
1128 last_seg->next = rxm;
1129 }
1130
1131 /*
1132 * If this is not the last buffer of the received packet,
1133 * update the pointer to the last mbuf of the current scattered
1134 * packet and continue to parse the RX ring.
1135 */
1136 if (! (staterr & E1000_RXD_STAT_EOP)) {
1137 last_seg = rxm;
1138 goto next_desc;
1139 }
1140
1141 /*
1142 * This is the last buffer of the received packet.
1143 * If the CRC is not stripped by the hardware:
1144 * - Subtract the CRC length from the total packet length.
1145 * - If the last buffer only contains the whole CRC or a part
1146 * of it, free the mbuf associated to the last buffer.
1147 * If part of the CRC is also contained in the previous
1148 * mbuf, subtract the length of that CRC part from the
1149 * data length of the previous mbuf.
1150 */
1151 rxm->next = NULL;
1152 if (unlikely(rxq->crc_len > 0)) {
1153 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1154 if (data_len <= RTE_ETHER_CRC_LEN) {
1155 rte_pktmbuf_free_seg(rxm);
1156 first_seg->nb_segs--;
1157 last_seg->data_len = (uint16_t)
1158 (last_seg->data_len -
1159 (RTE_ETHER_CRC_LEN - data_len));
1160 last_seg->next = NULL;
1161 } else
1162 rxm->data_len = (uint16_t)
1163 (data_len - RTE_ETHER_CRC_LEN);
1164 }
1165
1166 /*
1167 * Initialize the first mbuf of the returned packet:
1168 * - RX port identifier,
1169 * - hardware offload data, if any:
1170 * - RSS flag & hash,
1171 * - IP checksum flag,
1172 * - VLAN TCI, if any,
1173 * - error flags.
1174 */
1175 first_seg->port = rxq->port_id;
1176 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1177
1178 /*
1179 * The vlan_tci field is only valid when PKT_RX_VLAN is
1180 * set in the pkt_flags field and must be in CPU byte order.
1181 */
1182 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1183 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1184 first_seg->vlan_tci =
1185 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1186 } else {
1187 first_seg->vlan_tci =
1188 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1189 }
1190 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1191 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1192 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1193 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1194 first_seg->ol_flags = pkt_flags;
1195 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1196 lower.lo_dword.hs_rss.pkt_info);
1197
1198 /* Prefetch data of first segment, if configured to do so. */
1199 rte_packet_prefetch((char *)first_seg->buf_addr +
1200 first_seg->data_off);
1201
1202 /*
1203 * Store the mbuf address into the next entry of the array
1204 * of returned packets.
1205 */
1206 rx_pkts[nb_rx++] = first_seg;
1207
1208 /*
1209 * Setup receipt context for a new packet.
1210 */
1211 first_seg = NULL;
1212 }
1213
1214 /*
1215 * Record index of the next RX descriptor to probe.
1216 */
1217 rxq->rx_tail = rx_id;
1218
1219 /*
1220 * Save receive context.
1221 */
1222 rxq->pkt_first_seg = first_seg;
1223 rxq->pkt_last_seg = last_seg;
1224
1225 /*
1226 * If the number of free RX descriptors is greater than the RX free
1227 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1228 * register.
1229 * Update the RDT with the value of the last processed RX descriptor
1230 * minus 1, to guarantee that the RDT register is never equal to the
1231 * RDH register, which creates a "full" ring situtation from the
1232 * hardware point of view...
1233 */
1234 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1235 if (nb_hold > rxq->rx_free_thresh) {
1236 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1237 "nb_hold=%u nb_rx=%u",
1238 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1239 (unsigned) rx_id, (unsigned) nb_hold,
1240 (unsigned) nb_rx);
1241 rx_id = (uint16_t) ((rx_id == 0) ?
1242 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1243 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1244 nb_hold = 0;
1245 }
1246 rxq->nb_rx_hold = nb_hold;
1247 return nb_rx;
1248 }
1249
1250 /*
1251 * Maximum number of Ring Descriptors.
1252 *
1253 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1254 * desscriptors should meet the following condition:
1255 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1256 */
1257
1258 static void
1259 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1260 {
1261 unsigned i;
1262
1263 if (txq->sw_ring != NULL) {
1264 for (i = 0; i < txq->nb_tx_desc; i++) {
1265 if (txq->sw_ring[i].mbuf != NULL) {
1266 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1267 txq->sw_ring[i].mbuf = NULL;
1268 }
1269 }
1270 }
1271 }
1272
1273 static void
1274 igb_tx_queue_release(struct igb_tx_queue *txq)
1275 {
1276 if (txq != NULL) {
1277 igb_tx_queue_release_mbufs(txq);
1278 rte_free(txq->sw_ring);
1279 rte_free(txq);
1280 }
1281 }
1282
1283 void
1284 eth_igb_tx_queue_release(void *txq)
1285 {
1286 igb_tx_queue_release(txq);
1287 }
1288
1289 static int
1290 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1291 {
1292 struct igb_tx_entry *sw_ring;
1293 volatile union e1000_adv_tx_desc *txr;
1294 uint16_t tx_first; /* First segment analyzed. */
1295 uint16_t tx_id; /* Current segment being processed. */
1296 uint16_t tx_last; /* Last segment in the current packet. */
1297 uint16_t tx_next; /* First segment of the next packet. */
1298 int count;
1299
1300 if (txq != NULL) {
1301 count = 0;
1302 sw_ring = txq->sw_ring;
1303 txr = txq->tx_ring;
1304
1305 /*
1306 * tx_tail is the last sent packet on the sw_ring. Goto the end
1307 * of that packet (the last segment in the packet chain) and
1308 * then the next segment will be the start of the oldest segment
1309 * in the sw_ring. This is the first packet that will be
1310 * attempted to be freed.
1311 */
1312
1313 /* Get last segment in most recently added packet. */
1314 tx_first = sw_ring[txq->tx_tail].last_id;
1315
1316 /* Get the next segment, which is the oldest segment in ring. */
1317 tx_first = sw_ring[tx_first].next_id;
1318
1319 /* Set the current index to the first. */
1320 tx_id = tx_first;
1321
1322 /*
1323 * Loop through each packet. For each packet, verify that an
1324 * mbuf exists and that the last segment is free. If so, free
1325 * it and move on.
1326 */
1327 while (1) {
1328 tx_last = sw_ring[tx_id].last_id;
1329
1330 if (sw_ring[tx_last].mbuf) {
1331 if (txr[tx_last].wb.status &
1332 E1000_TXD_STAT_DD) {
1333 /*
1334 * Increment the number of packets
1335 * freed.
1336 */
1337 count++;
1338
1339 /* Get the start of the next packet. */
1340 tx_next = sw_ring[tx_last].next_id;
1341
1342 /*
1343 * Loop through all segments in a
1344 * packet.
1345 */
1346 do {
1347 rte_pktmbuf_free_seg(sw_ring[tx_id].mbuf);
1348 sw_ring[tx_id].mbuf = NULL;
1349 sw_ring[tx_id].last_id = tx_id;
1350
1351 /* Move to next segemnt. */
1352 tx_id = sw_ring[tx_id].next_id;
1353
1354 } while (tx_id != tx_next);
1355
1356 if (unlikely(count == (int)free_cnt))
1357 break;
1358 } else
1359 /*
1360 * mbuf still in use, nothing left to
1361 * free.
1362 */
1363 break;
1364 } else {
1365 /*
1366 * There are multiple reasons to be here:
1367 * 1) All the packets on the ring have been
1368 * freed - tx_id is equal to tx_first
1369 * and some packets have been freed.
1370 * - Done, exit
1371 * 2) Interfaces has not sent a rings worth of
1372 * packets yet, so the segment after tail is
1373 * still empty. Or a previous call to this
1374 * function freed some of the segments but
1375 * not all so there is a hole in the list.
1376 * Hopefully this is a rare case.
1377 * - Walk the list and find the next mbuf. If
1378 * there isn't one, then done.
1379 */
1380 if (likely((tx_id == tx_first) && (count != 0)))
1381 break;
1382
1383 /*
1384 * Walk the list and find the next mbuf, if any.
1385 */
1386 do {
1387 /* Move to next segemnt. */
1388 tx_id = sw_ring[tx_id].next_id;
1389
1390 if (sw_ring[tx_id].mbuf)
1391 break;
1392
1393 } while (tx_id != tx_first);
1394
1395 /*
1396 * Determine why previous loop bailed. If there
1397 * is not an mbuf, done.
1398 */
1399 if (sw_ring[tx_id].mbuf == NULL)
1400 break;
1401 }
1402 }
1403 } else
1404 count = -ENODEV;
1405
1406 return count;
1407 }
1408
1409 int
1410 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1411 {
1412 return igb_tx_done_cleanup(txq, free_cnt);
1413 }
1414
1415 static void
1416 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1417 {
1418 txq->tx_head = 0;
1419 txq->tx_tail = 0;
1420 txq->ctx_curr = 0;
1421 memset((void*)&txq->ctx_cache, 0,
1422 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1423 }
1424
1425 static void
1426 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1427 {
1428 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1429 struct igb_tx_entry *txe = txq->sw_ring;
1430 uint16_t i, prev;
1431 struct e1000_hw *hw;
1432
1433 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1434 /* Zero out HW ring memory */
1435 for (i = 0; i < txq->nb_tx_desc; i++) {
1436 txq->tx_ring[i] = zeroed_desc;
1437 }
1438
1439 /* Initialize ring entries */
1440 prev = (uint16_t)(txq->nb_tx_desc - 1);
1441 for (i = 0; i < txq->nb_tx_desc; i++) {
1442 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1443
1444 txd->wb.status = E1000_TXD_STAT_DD;
1445 txe[i].mbuf = NULL;
1446 txe[i].last_id = i;
1447 txe[prev].next_id = i;
1448 prev = i;
1449 }
1450
1451 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1452 /* 82575 specific, each tx queue will use 2 hw contexts */
1453 if (hw->mac.type == e1000_82575)
1454 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1455
1456 igb_reset_tx_queue_stat(txq);
1457 }
1458
1459 uint64_t
1460 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1461 {
1462 uint64_t tx_offload_capa;
1463
1464 RTE_SET_USED(dev);
1465 tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
1466 DEV_TX_OFFLOAD_IPV4_CKSUM |
1467 DEV_TX_OFFLOAD_UDP_CKSUM |
1468 DEV_TX_OFFLOAD_TCP_CKSUM |
1469 DEV_TX_OFFLOAD_SCTP_CKSUM |
1470 DEV_TX_OFFLOAD_TCP_TSO |
1471 DEV_TX_OFFLOAD_MULTI_SEGS;
1472
1473 return tx_offload_capa;
1474 }
1475
1476 uint64_t
1477 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1478 {
1479 uint64_t tx_queue_offload_capa;
1480
1481 tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1482
1483 return tx_queue_offload_capa;
1484 }
1485
1486 int
1487 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1488 uint16_t queue_idx,
1489 uint16_t nb_desc,
1490 unsigned int socket_id,
1491 const struct rte_eth_txconf *tx_conf)
1492 {
1493 const struct rte_memzone *tz;
1494 struct igb_tx_queue *txq;
1495 struct e1000_hw *hw;
1496 uint32_t size;
1497 uint64_t offloads;
1498
1499 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1500
1501 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1502
1503 /*
1504 * Validate number of transmit descriptors.
1505 * It must not exceed hardware maximum, and must be multiple
1506 * of E1000_ALIGN.
1507 */
1508 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1509 (nb_desc > E1000_MAX_RING_DESC) ||
1510 (nb_desc < E1000_MIN_RING_DESC)) {
1511 return -EINVAL;
1512 }
1513
1514 /*
1515 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1516 * driver.
1517 */
1518 if (tx_conf->tx_free_thresh != 0)
1519 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1520 "used for the 1G driver.");
1521 if (tx_conf->tx_rs_thresh != 0)
1522 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1523 "used for the 1G driver.");
1524 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1525 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1526 "consider setting the TX WTHRESH value to 4, 8, "
1527 "or 16.");
1528
1529 /* Free memory prior to re-allocation if needed */
1530 if (dev->data->tx_queues[queue_idx] != NULL) {
1531 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1532 dev->data->tx_queues[queue_idx] = NULL;
1533 }
1534
1535 /* First allocate the tx queue data structure */
1536 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1537 RTE_CACHE_LINE_SIZE);
1538 if (txq == NULL)
1539 return -ENOMEM;
1540
1541 /*
1542 * Allocate TX ring hardware descriptors. A memzone large enough to
1543 * handle the maximum ring size is allocated in order to allow for
1544 * resizing in later calls to the queue setup function.
1545 */
1546 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1547 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1548 E1000_ALIGN, socket_id);
1549 if (tz == NULL) {
1550 igb_tx_queue_release(txq);
1551 return -ENOMEM;
1552 }
1553
1554 txq->nb_tx_desc = nb_desc;
1555 txq->pthresh = tx_conf->tx_thresh.pthresh;
1556 txq->hthresh = tx_conf->tx_thresh.hthresh;
1557 txq->wthresh = tx_conf->tx_thresh.wthresh;
1558 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1559 txq->wthresh = 1;
1560 txq->queue_id = queue_idx;
1561 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1562 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1563 txq->port_id = dev->data->port_id;
1564
1565 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1566 txq->tx_ring_phys_addr = tz->iova;
1567
1568 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1569 /* Allocate software ring */
1570 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1571 sizeof(struct igb_tx_entry) * nb_desc,
1572 RTE_CACHE_LINE_SIZE);
1573 if (txq->sw_ring == NULL) {
1574 igb_tx_queue_release(txq);
1575 return -ENOMEM;
1576 }
1577 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1578 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1579
1580 igb_reset_tx_queue(txq, dev);
1581 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1582 dev->tx_pkt_prepare = &eth_igb_prep_pkts;
1583 dev->data->tx_queues[queue_idx] = txq;
1584 txq->offloads = offloads;
1585
1586 return 0;
1587 }
1588
1589 static void
1590 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1591 {
1592 unsigned i;
1593
1594 if (rxq->sw_ring != NULL) {
1595 for (i = 0; i < rxq->nb_rx_desc; i++) {
1596 if (rxq->sw_ring[i].mbuf != NULL) {
1597 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1598 rxq->sw_ring[i].mbuf = NULL;
1599 }
1600 }
1601 }
1602 }
1603
1604 static void
1605 igb_rx_queue_release(struct igb_rx_queue *rxq)
1606 {
1607 if (rxq != NULL) {
1608 igb_rx_queue_release_mbufs(rxq);
1609 rte_free(rxq->sw_ring);
1610 rte_free(rxq);
1611 }
1612 }
1613
1614 void
1615 eth_igb_rx_queue_release(void *rxq)
1616 {
1617 igb_rx_queue_release(rxq);
1618 }
1619
1620 static void
1621 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1622 {
1623 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1624 unsigned i;
1625
1626 /* Zero out HW ring memory */
1627 for (i = 0; i < rxq->nb_rx_desc; i++) {
1628 rxq->rx_ring[i] = zeroed_desc;
1629 }
1630
1631 rxq->rx_tail = 0;
1632 rxq->pkt_first_seg = NULL;
1633 rxq->pkt_last_seg = NULL;
1634 }
1635
1636 uint64_t
1637 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1638 {
1639 uint64_t rx_offload_capa;
1640
1641 RTE_SET_USED(dev);
1642 rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
1643 DEV_RX_OFFLOAD_VLAN_FILTER |
1644 DEV_RX_OFFLOAD_IPV4_CKSUM |
1645 DEV_RX_OFFLOAD_UDP_CKSUM |
1646 DEV_RX_OFFLOAD_TCP_CKSUM |
1647 DEV_RX_OFFLOAD_JUMBO_FRAME |
1648 DEV_RX_OFFLOAD_KEEP_CRC |
1649 DEV_RX_OFFLOAD_SCATTER |
1650 DEV_RX_OFFLOAD_RSS_HASH;
1651
1652 return rx_offload_capa;
1653 }
1654
1655 uint64_t
1656 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1657 {
1658 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1659 uint64_t rx_queue_offload_capa;
1660
1661 switch (hw->mac.type) {
1662 case e1000_vfadapt_i350:
1663 /*
1664 * As only one Rx queue can be used, let per queue offloading
1665 * capability be same to per port queue offloading capability
1666 * for better convenience.
1667 */
1668 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1669 break;
1670 default:
1671 rx_queue_offload_capa = 0;
1672 }
1673 return rx_queue_offload_capa;
1674 }
1675
1676 int
1677 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1678 uint16_t queue_idx,
1679 uint16_t nb_desc,
1680 unsigned int socket_id,
1681 const struct rte_eth_rxconf *rx_conf,
1682 struct rte_mempool *mp)
1683 {
1684 const struct rte_memzone *rz;
1685 struct igb_rx_queue *rxq;
1686 struct e1000_hw *hw;
1687 unsigned int size;
1688 uint64_t offloads;
1689
1690 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1691
1692 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1693
1694 /*
1695 * Validate number of receive descriptors.
1696 * It must not exceed hardware maximum, and must be multiple
1697 * of E1000_ALIGN.
1698 */
1699 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1700 (nb_desc > E1000_MAX_RING_DESC) ||
1701 (nb_desc < E1000_MIN_RING_DESC)) {
1702 return -EINVAL;
1703 }
1704
1705 /* Free memory prior to re-allocation if needed */
1706 if (dev->data->rx_queues[queue_idx] != NULL) {
1707 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1708 dev->data->rx_queues[queue_idx] = NULL;
1709 }
1710
1711 /* First allocate the RX queue data structure. */
1712 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1713 RTE_CACHE_LINE_SIZE);
1714 if (rxq == NULL)
1715 return -ENOMEM;
1716 rxq->offloads = offloads;
1717 rxq->mb_pool = mp;
1718 rxq->nb_rx_desc = nb_desc;
1719 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1720 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1721 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1722 if (rxq->wthresh > 0 &&
1723 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1724 rxq->wthresh = 1;
1725 rxq->drop_en = rx_conf->rx_drop_en;
1726 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1727 rxq->queue_id = queue_idx;
1728 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1729 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1730 rxq->port_id = dev->data->port_id;
1731 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
1732 rxq->crc_len = RTE_ETHER_CRC_LEN;
1733 else
1734 rxq->crc_len = 0;
1735
1736 /*
1737 * Allocate RX ring hardware descriptors. A memzone large enough to
1738 * handle the maximum ring size is allocated in order to allow for
1739 * resizing in later calls to the queue setup function.
1740 */
1741 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1742 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1743 E1000_ALIGN, socket_id);
1744 if (rz == NULL) {
1745 igb_rx_queue_release(rxq);
1746 return -ENOMEM;
1747 }
1748 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1749 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1750 rxq->rx_ring_phys_addr = rz->iova;
1751 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1752
1753 /* Allocate software ring. */
1754 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1755 sizeof(struct igb_rx_entry) * nb_desc,
1756 RTE_CACHE_LINE_SIZE);
1757 if (rxq->sw_ring == NULL) {
1758 igb_rx_queue_release(rxq);
1759 return -ENOMEM;
1760 }
1761 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1762 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1763
1764 dev->data->rx_queues[queue_idx] = rxq;
1765 igb_reset_rx_queue(rxq);
1766
1767 return 0;
1768 }
1769
1770 uint32_t
1771 eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
1772 {
1773 #define IGB_RXQ_SCAN_INTERVAL 4
1774 volatile union e1000_adv_rx_desc *rxdp;
1775 struct igb_rx_queue *rxq;
1776 uint32_t desc = 0;
1777
1778 rxq = dev->data->rx_queues[rx_queue_id];
1779 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1780
1781 while ((desc < rxq->nb_rx_desc) &&
1782 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1783 desc += IGB_RXQ_SCAN_INTERVAL;
1784 rxdp += IGB_RXQ_SCAN_INTERVAL;
1785 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1786 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1787 desc - rxq->nb_rx_desc]);
1788 }
1789
1790 return desc;
1791 }
1792
1793 int
1794 eth_igb_rx_descriptor_done(void *rx_queue, uint16_t offset)
1795 {
1796 volatile union e1000_adv_rx_desc *rxdp;
1797 struct igb_rx_queue *rxq = rx_queue;
1798 uint32_t desc;
1799
1800 if (unlikely(offset >= rxq->nb_rx_desc))
1801 return 0;
1802 desc = rxq->rx_tail + offset;
1803 if (desc >= rxq->nb_rx_desc)
1804 desc -= rxq->nb_rx_desc;
1805
1806 rxdp = &rxq->rx_ring[desc];
1807 return !!(rxdp->wb.upper.status_error & E1000_RXD_STAT_DD);
1808 }
1809
1810 int
1811 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1812 {
1813 struct igb_rx_queue *rxq = rx_queue;
1814 volatile uint32_t *status;
1815 uint32_t desc;
1816
1817 if (unlikely(offset >= rxq->nb_rx_desc))
1818 return -EINVAL;
1819
1820 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1821 return RTE_ETH_RX_DESC_UNAVAIL;
1822
1823 desc = rxq->rx_tail + offset;
1824 if (desc >= rxq->nb_rx_desc)
1825 desc -= rxq->nb_rx_desc;
1826
1827 status = &rxq->rx_ring[desc].wb.upper.status_error;
1828 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1829 return RTE_ETH_RX_DESC_DONE;
1830
1831 return RTE_ETH_RX_DESC_AVAIL;
1832 }
1833
1834 int
1835 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1836 {
1837 struct igb_tx_queue *txq = tx_queue;
1838 volatile uint32_t *status;
1839 uint32_t desc;
1840
1841 if (unlikely(offset >= txq->nb_tx_desc))
1842 return -EINVAL;
1843
1844 desc = txq->tx_tail + offset;
1845 if (desc >= txq->nb_tx_desc)
1846 desc -= txq->nb_tx_desc;
1847
1848 status = &txq->tx_ring[desc].wb.status;
1849 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1850 return RTE_ETH_TX_DESC_DONE;
1851
1852 return RTE_ETH_TX_DESC_FULL;
1853 }
1854
1855 void
1856 igb_dev_clear_queues(struct rte_eth_dev *dev)
1857 {
1858 uint16_t i;
1859 struct igb_tx_queue *txq;
1860 struct igb_rx_queue *rxq;
1861
1862 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1863 txq = dev->data->tx_queues[i];
1864 if (txq != NULL) {
1865 igb_tx_queue_release_mbufs(txq);
1866 igb_reset_tx_queue(txq, dev);
1867 }
1868 }
1869
1870 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1871 rxq = dev->data->rx_queues[i];
1872 if (rxq != NULL) {
1873 igb_rx_queue_release_mbufs(rxq);
1874 igb_reset_rx_queue(rxq);
1875 }
1876 }
1877 }
1878
1879 void
1880 igb_dev_free_queues(struct rte_eth_dev *dev)
1881 {
1882 uint16_t i;
1883
1884 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1885 eth_igb_rx_queue_release(dev->data->rx_queues[i]);
1886 dev->data->rx_queues[i] = NULL;
1887 }
1888 dev->data->nb_rx_queues = 0;
1889
1890 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1891 eth_igb_tx_queue_release(dev->data->tx_queues[i]);
1892 dev->data->tx_queues[i] = NULL;
1893 }
1894 dev->data->nb_tx_queues = 0;
1895 }
1896
1897 /**
1898 * Receive Side Scaling (RSS).
1899 * See section 7.1.1.7 in the following document:
1900 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1901 *
1902 * Principles:
1903 * The source and destination IP addresses of the IP header and the source and
1904 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1905 * against a configurable random key to compute a 32-bit RSS hash result.
1906 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1907 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1908 * RSS output index which is used as the RX queue index where to store the
1909 * received packets.
1910 * The following output is supplied in the RX write-back descriptor:
1911 * - 32-bit result of the Microsoft RSS hash function,
1912 * - 4-bit RSS type field.
1913 */
1914
1915 /*
1916 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1917 * Used as the default key.
1918 */
1919 static uint8_t rss_intel_key[40] = {
1920 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1921 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1922 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1923 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1924 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1925 };
1926
1927 static void
1928 igb_rss_disable(struct rte_eth_dev *dev)
1929 {
1930 struct e1000_hw *hw;
1931 uint32_t mrqc;
1932
1933 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1934 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1935 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1936 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1937 }
1938
1939 static void
1940 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1941 {
1942 uint8_t *hash_key;
1943 uint32_t rss_key;
1944 uint32_t mrqc;
1945 uint64_t rss_hf;
1946 uint16_t i;
1947
1948 hash_key = rss_conf->rss_key;
1949 if (hash_key != NULL) {
1950 /* Fill in RSS hash key */
1951 for (i = 0; i < 10; i++) {
1952 rss_key = hash_key[(i * 4)];
1953 rss_key |= hash_key[(i * 4) + 1] << 8;
1954 rss_key |= hash_key[(i * 4) + 2] << 16;
1955 rss_key |= hash_key[(i * 4) + 3] << 24;
1956 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1957 }
1958 }
1959
1960 /* Set configured hashing protocols in MRQC register */
1961 rss_hf = rss_conf->rss_hf;
1962 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1963 if (rss_hf & ETH_RSS_IPV4)
1964 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1965 if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
1966 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1967 if (rss_hf & ETH_RSS_IPV6)
1968 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1969 if (rss_hf & ETH_RSS_IPV6_EX)
1970 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
1971 if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
1972 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
1973 if (rss_hf & ETH_RSS_IPV6_TCP_EX)
1974 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
1975 if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
1976 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
1977 if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
1978 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
1979 if (rss_hf & ETH_RSS_IPV6_UDP_EX)
1980 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
1981 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1982 }
1983
1984 int
1985 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
1986 struct rte_eth_rss_conf *rss_conf)
1987 {
1988 struct e1000_hw *hw;
1989 uint32_t mrqc;
1990 uint64_t rss_hf;
1991
1992 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1993
1994 /*
1995 * Before changing anything, first check that the update RSS operation
1996 * does not attempt to disable RSS, if RSS was enabled at
1997 * initialization time, or does not attempt to enable RSS, if RSS was
1998 * disabled at initialization time.
1999 */
2000 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
2001 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2002 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
2003 if (rss_hf != 0) /* Enable RSS */
2004 return -(EINVAL);
2005 return 0; /* Nothing to do */
2006 }
2007 /* RSS enabled */
2008 if (rss_hf == 0) /* Disable RSS */
2009 return -(EINVAL);
2010 igb_hw_rss_hash_set(hw, rss_conf);
2011 return 0;
2012 }
2013
2014 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2015 struct rte_eth_rss_conf *rss_conf)
2016 {
2017 struct e1000_hw *hw;
2018 uint8_t *hash_key;
2019 uint32_t rss_key;
2020 uint32_t mrqc;
2021 uint64_t rss_hf;
2022 uint16_t i;
2023
2024 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2025 hash_key = rss_conf->rss_key;
2026 if (hash_key != NULL) {
2027 /* Return RSS hash key */
2028 for (i = 0; i < 10; i++) {
2029 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2030 hash_key[(i * 4)] = rss_key & 0x000000FF;
2031 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2032 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2033 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2034 }
2035 }
2036
2037 /* Get RSS functions configured in MRQC register */
2038 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2039 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2040 rss_conf->rss_hf = 0;
2041 return 0;
2042 }
2043 rss_hf = 0;
2044 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2045 rss_hf |= ETH_RSS_IPV4;
2046 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2047 rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP;
2048 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2049 rss_hf |= ETH_RSS_IPV6;
2050 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2051 rss_hf |= ETH_RSS_IPV6_EX;
2052 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2053 rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP;
2054 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2055 rss_hf |= ETH_RSS_IPV6_TCP_EX;
2056 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2057 rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP;
2058 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2059 rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP;
2060 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2061 rss_hf |= ETH_RSS_IPV6_UDP_EX;
2062 rss_conf->rss_hf = rss_hf;
2063 return 0;
2064 }
2065
2066 static void
2067 igb_rss_configure(struct rte_eth_dev *dev)
2068 {
2069 struct rte_eth_rss_conf rss_conf;
2070 struct e1000_hw *hw;
2071 uint32_t shift;
2072 uint16_t i;
2073
2074 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2075
2076 /* Fill in redirection table. */
2077 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2078 for (i = 0; i < 128; i++) {
2079 union e1000_reta {
2080 uint32_t dword;
2081 uint8_t bytes[4];
2082 } reta;
2083 uint8_t q_idx;
2084
2085 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2086 i % dev->data->nb_rx_queues : 0);
2087 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2088 if ((i & 3) == 3)
2089 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2090 }
2091
2092 /*
2093 * Configure the RSS key and the RSS protocols used to compute
2094 * the RSS hash of input packets.
2095 */
2096 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2097 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2098 igb_rss_disable(dev);
2099 return;
2100 }
2101 if (rss_conf.rss_key == NULL)
2102 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2103 igb_hw_rss_hash_set(hw, &rss_conf);
2104 }
2105
2106 /*
2107 * Check if the mac type support VMDq or not.
2108 * Return 1 if it supports, otherwise, return 0.
2109 */
2110 static int
2111 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2112 {
2113 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2114
2115 switch (hw->mac.type) {
2116 case e1000_82576:
2117 case e1000_82580:
2118 case e1000_i350:
2119 return 1;
2120 case e1000_82540:
2121 case e1000_82541:
2122 case e1000_82542:
2123 case e1000_82543:
2124 case e1000_82544:
2125 case e1000_82545:
2126 case e1000_82546:
2127 case e1000_82547:
2128 case e1000_82571:
2129 case e1000_82572:
2130 case e1000_82573:
2131 case e1000_82574:
2132 case e1000_82583:
2133 case e1000_i210:
2134 case e1000_i211:
2135 default:
2136 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2137 return 0;
2138 }
2139 }
2140
2141 static int
2142 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2143 {
2144 struct rte_eth_vmdq_rx_conf *cfg;
2145 struct e1000_hw *hw;
2146 uint32_t mrqc, vt_ctl, vmolr, rctl;
2147 int i;
2148
2149 PMD_INIT_FUNC_TRACE();
2150
2151 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2152 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2153
2154 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2155 if (igb_is_vmdq_supported(dev) == 0)
2156 return -1;
2157
2158 igb_rss_disable(dev);
2159
2160 /* RCTL: eanble VLAN filter */
2161 rctl = E1000_READ_REG(hw, E1000_RCTL);
2162 rctl |= E1000_RCTL_VFE;
2163 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2164
2165 /* MRQC: enable vmdq */
2166 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2167 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2168 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2169
2170 /* VTCTL: pool selection according to VLAN tag */
2171 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2172 if (cfg->enable_default_pool)
2173 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2174 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2175 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2176
2177 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2178 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2179 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2180 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2181 E1000_VMOLR_MPME);
2182
2183 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_UNTAG)
2184 vmolr |= E1000_VMOLR_AUPE;
2185 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_MC)
2186 vmolr |= E1000_VMOLR_ROMPE;
2187 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_HASH_UC)
2188 vmolr |= E1000_VMOLR_ROPE;
2189 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_BROADCAST)
2190 vmolr |= E1000_VMOLR_BAM;
2191 if (cfg->rx_mode & ETH_VMDQ_ACCEPT_MULTICAST)
2192 vmolr |= E1000_VMOLR_MPME;
2193
2194 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2195 }
2196
2197 /*
2198 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2199 * Both 82576 and 82580 support it
2200 */
2201 if (hw->mac.type != e1000_i350) {
2202 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2203 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2204 vmolr |= E1000_VMOLR_STRVLAN;
2205 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2206 }
2207 }
2208
2209 /* VFTA - enable all vlan filters */
2210 for (i = 0; i < IGB_VFTA_SIZE; i++)
2211 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2212
2213 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2214 if (hw->mac.type != e1000_82580)
2215 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2216
2217 /*
2218 * RAH/RAL - allow pools to read specific mac addresses
2219 * In this case, all pools should be able to read from mac addr 0
2220 */
2221 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2222 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2223
2224 /* VLVF: set up filters for vlan tags as configured */
2225 for (i = 0; i < cfg->nb_pool_maps; i++) {
2226 /* set vlan id in VF register and set the valid bit */
2227 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE | \
2228 (cfg->pool_map[i].vlan_id & ETH_VLAN_ID_MAX) | \
2229 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT ) & \
2230 E1000_VLVF_POOLSEL_MASK)));
2231 }
2232
2233 E1000_WRITE_FLUSH(hw);
2234
2235 return 0;
2236 }
2237
2238
2239 /*********************************************************************
2240 *
2241 * Enable receive unit.
2242 *
2243 **********************************************************************/
2244
2245 static int
2246 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2247 {
2248 struct igb_rx_entry *rxe = rxq->sw_ring;
2249 uint64_t dma_addr;
2250 unsigned i;
2251
2252 /* Initialize software ring entries. */
2253 for (i = 0; i < rxq->nb_rx_desc; i++) {
2254 volatile union e1000_adv_rx_desc *rxd;
2255 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2256
2257 if (mbuf == NULL) {
2258 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2259 "queue_id=%hu", rxq->queue_id);
2260 return -ENOMEM;
2261 }
2262 dma_addr =
2263 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2264 rxd = &rxq->rx_ring[i];
2265 rxd->read.hdr_addr = 0;
2266 rxd->read.pkt_addr = dma_addr;
2267 rxe[i].mbuf = mbuf;
2268 }
2269
2270 return 0;
2271 }
2272
2273 #define E1000_MRQC_DEF_Q_SHIFT (3)
2274 static int
2275 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2276 {
2277 struct e1000_hw *hw =
2278 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2279 uint32_t mrqc;
2280
2281 if (RTE_ETH_DEV_SRIOV(dev).active == ETH_8_POOLS) {
2282 /*
2283 * SRIOV active scheme
2284 * FIXME if support RSS together with VMDq & SRIOV
2285 */
2286 mrqc = E1000_MRQC_ENABLE_VMDQ;
2287 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2288 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2289 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2290 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2291 /*
2292 * SRIOV inactive scheme
2293 */
2294 switch (dev->data->dev_conf.rxmode.mq_mode) {
2295 case ETH_MQ_RX_RSS:
2296 igb_rss_configure(dev);
2297 break;
2298 case ETH_MQ_RX_VMDQ_ONLY:
2299 /*Configure general VMDQ only RX parameters*/
2300 igb_vmdq_rx_hw_configure(dev);
2301 break;
2302 case ETH_MQ_RX_NONE:
2303 /* if mq_mode is none, disable rss mode.*/
2304 default:
2305 igb_rss_disable(dev);
2306 break;
2307 }
2308 }
2309
2310 return 0;
2311 }
2312
2313 int
2314 eth_igb_rx_init(struct rte_eth_dev *dev)
2315 {
2316 struct rte_eth_rxmode *rxmode;
2317 struct e1000_hw *hw;
2318 struct igb_rx_queue *rxq;
2319 uint32_t rctl;
2320 uint32_t rxcsum;
2321 uint32_t srrctl;
2322 uint16_t buf_size;
2323 uint16_t rctl_bsize;
2324 uint16_t i;
2325 int ret;
2326
2327 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2328 srrctl = 0;
2329
2330 /*
2331 * Make sure receives are disabled while setting
2332 * up the descriptor ring.
2333 */
2334 rctl = E1000_READ_REG(hw, E1000_RCTL);
2335 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2336
2337 rxmode = &dev->data->dev_conf.rxmode;
2338
2339 /*
2340 * Configure support of jumbo frames, if any.
2341 */
2342 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
2343 rctl |= E1000_RCTL_LPE;
2344
2345 /*
2346 * Set maximum packet length by default, and might be updated
2347 * together with enabling/disabling dual VLAN.
2348 */
2349 E1000_WRITE_REG(hw, E1000_RLPML,
2350 dev->data->dev_conf.rxmode.max_rx_pkt_len +
2351 VLAN_TAG_SIZE);
2352 } else
2353 rctl &= ~E1000_RCTL_LPE;
2354
2355 /* Configure and enable each RX queue. */
2356 rctl_bsize = 0;
2357 dev->rx_pkt_burst = eth_igb_recv_pkts;
2358 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2359 uint64_t bus_addr;
2360 uint32_t rxdctl;
2361
2362 rxq = dev->data->rx_queues[i];
2363
2364 rxq->flags = 0;
2365 /*
2366 * i350 and i354 vlan packets have vlan tags byte swapped.
2367 */
2368 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2369 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2370 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2371 } else {
2372 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2373 }
2374
2375 /* Allocate buffers for descriptor rings and set up queue */
2376 ret = igb_alloc_rx_queue_mbufs(rxq);
2377 if (ret)
2378 return ret;
2379
2380 /*
2381 * Reset crc_len in case it was changed after queue setup by a
2382 * call to configure
2383 */
2384 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
2385 rxq->crc_len = RTE_ETHER_CRC_LEN;
2386 else
2387 rxq->crc_len = 0;
2388
2389 bus_addr = rxq->rx_ring_phys_addr;
2390 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2391 rxq->nb_rx_desc *
2392 sizeof(union e1000_adv_rx_desc));
2393 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2394 (uint32_t)(bus_addr >> 32));
2395 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2396
2397 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2398
2399 /*
2400 * Configure RX buffer size.
2401 */
2402 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2403 RTE_PKTMBUF_HEADROOM);
2404 if (buf_size >= 1024) {
2405 /*
2406 * Configure the BSIZEPACKET field of the SRRCTL
2407 * register of the queue.
2408 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2409 * If this field is equal to 0b, then RCTL.BSIZE
2410 * determines the RX packet buffer size.
2411 */
2412 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2413 E1000_SRRCTL_BSIZEPKT_MASK);
2414 buf_size = (uint16_t) ((srrctl &
2415 E1000_SRRCTL_BSIZEPKT_MASK) <<
2416 E1000_SRRCTL_BSIZEPKT_SHIFT);
2417
2418 /* It adds dual VLAN length for supporting dual VLAN */
2419 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2420 2 * VLAN_TAG_SIZE) > buf_size){
2421 if (!dev->data->scattered_rx)
2422 PMD_INIT_LOG(DEBUG,
2423 "forcing scatter mode");
2424 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2425 dev->data->scattered_rx = 1;
2426 }
2427 } else {
2428 /*
2429 * Use BSIZE field of the device RCTL register.
2430 */
2431 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2432 rctl_bsize = buf_size;
2433 if (!dev->data->scattered_rx)
2434 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2435 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2436 dev->data->scattered_rx = 1;
2437 }
2438
2439 /* Set if packets are dropped when no descriptors available */
2440 if (rxq->drop_en)
2441 srrctl |= E1000_SRRCTL_DROP_EN;
2442
2443 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2444
2445 /* Enable this RX queue. */
2446 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2447 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2448 rxdctl &= 0xFFF00000;
2449 rxdctl |= (rxq->pthresh & 0x1F);
2450 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2451 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2452 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2453 }
2454
2455 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2456 if (!dev->data->scattered_rx)
2457 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2458 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2459 dev->data->scattered_rx = 1;
2460 }
2461
2462 /*
2463 * Setup BSIZE field of RCTL register, if needed.
2464 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2465 * register, since the code above configures the SRRCTL register of
2466 * the RX queue in such a case.
2467 * All configurable sizes are:
2468 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2469 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2470 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2471 * 2048: rctl |= E1000_RCTL_SZ_2048;
2472 * 1024: rctl |= E1000_RCTL_SZ_1024;
2473 * 512: rctl |= E1000_RCTL_SZ_512;
2474 * 256: rctl |= E1000_RCTL_SZ_256;
2475 */
2476 if (rctl_bsize > 0) {
2477 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2478 rctl |= E1000_RCTL_SZ_512;
2479 else /* 256 <= buf_size < 512 - use 256 */
2480 rctl |= E1000_RCTL_SZ_256;
2481 }
2482
2483 /*
2484 * Configure RSS if device configured with multiple RX queues.
2485 */
2486 igb_dev_mq_rx_configure(dev);
2487
2488 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2489 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2490
2491 /*
2492 * Setup the Checksum Register.
2493 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2494 */
2495 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2496 rxcsum |= E1000_RXCSUM_PCSD;
2497
2498 /* Enable both L3/L4 rx checksum offload */
2499 if (rxmode->offloads & DEV_RX_OFFLOAD_IPV4_CKSUM)
2500 rxcsum |= E1000_RXCSUM_IPOFL;
2501 else
2502 rxcsum &= ~E1000_RXCSUM_IPOFL;
2503 if (rxmode->offloads &
2504 (DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM))
2505 rxcsum |= E1000_RXCSUM_TUOFL;
2506 else
2507 rxcsum &= ~E1000_RXCSUM_TUOFL;
2508 if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM)
2509 rxcsum |= E1000_RXCSUM_CRCOFL;
2510 else
2511 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2512
2513 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2514
2515 /* Setup the Receive Control Register. */
2516 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
2517 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2518
2519 /* clear STRCRC bit in all queues */
2520 if (hw->mac.type == e1000_i350 ||
2521 hw->mac.type == e1000_i210 ||
2522 hw->mac.type == e1000_i211 ||
2523 hw->mac.type == e1000_i354) {
2524 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2525 rxq = dev->data->rx_queues[i];
2526 uint32_t dvmolr = E1000_READ_REG(hw,
2527 E1000_DVMOLR(rxq->reg_idx));
2528 dvmolr &= ~E1000_DVMOLR_STRCRC;
2529 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2530 }
2531 }
2532 } else {
2533 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2534
2535 /* set STRCRC bit in all queues */
2536 if (hw->mac.type == e1000_i350 ||
2537 hw->mac.type == e1000_i210 ||
2538 hw->mac.type == e1000_i211 ||
2539 hw->mac.type == e1000_i354) {
2540 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2541 rxq = dev->data->rx_queues[i];
2542 uint32_t dvmolr = E1000_READ_REG(hw,
2543 E1000_DVMOLR(rxq->reg_idx));
2544 dvmolr |= E1000_DVMOLR_STRCRC;
2545 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2546 }
2547 }
2548 }
2549
2550 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2551 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2552 E1000_RCTL_RDMTS_HALF |
2553 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2554
2555 /* Make sure VLAN Filters are off. */
2556 if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_VMDQ_ONLY)
2557 rctl &= ~E1000_RCTL_VFE;
2558 /* Don't store bad packets. */
2559 rctl &= ~E1000_RCTL_SBP;
2560
2561 /* Enable Receives. */
2562 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2563
2564 /*
2565 * Setup the HW Rx Head and Tail Descriptor Pointers.
2566 * This needs to be done after enable.
2567 */
2568 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2569 rxq = dev->data->rx_queues[i];
2570 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2571 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2572 }
2573
2574 return 0;
2575 }
2576
2577 /*********************************************************************
2578 *
2579 * Enable transmit unit.
2580 *
2581 **********************************************************************/
2582 void
2583 eth_igb_tx_init(struct rte_eth_dev *dev)
2584 {
2585 struct e1000_hw *hw;
2586 struct igb_tx_queue *txq;
2587 uint32_t tctl;
2588 uint32_t txdctl;
2589 uint16_t i;
2590
2591 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2592
2593 /* Setup the Base and Length of the Tx Descriptor Rings. */
2594 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2595 uint64_t bus_addr;
2596 txq = dev->data->tx_queues[i];
2597 bus_addr = txq->tx_ring_phys_addr;
2598
2599 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2600 txq->nb_tx_desc *
2601 sizeof(union e1000_adv_tx_desc));
2602 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2603 (uint32_t)(bus_addr >> 32));
2604 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2605
2606 /* Setup the HW Tx Head and Tail descriptor pointers. */
2607 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2608 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2609
2610 /* Setup Transmit threshold registers. */
2611 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2612 txdctl |= txq->pthresh & 0x1F;
2613 txdctl |= ((txq->hthresh & 0x1F) << 8);
2614 txdctl |= ((txq->wthresh & 0x1F) << 16);
2615 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2616 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2617 }
2618
2619 /* Program the Transmit Control Register. */
2620 tctl = E1000_READ_REG(hw, E1000_TCTL);
2621 tctl &= ~E1000_TCTL_CT;
2622 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2623 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2624
2625 e1000_config_collision_dist(hw);
2626
2627 /* This write will effectively turn on the transmit unit. */
2628 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2629 }
2630
2631 /*********************************************************************
2632 *
2633 * Enable VF receive unit.
2634 *
2635 **********************************************************************/
2636 int
2637 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2638 {
2639 struct e1000_hw *hw;
2640 struct igb_rx_queue *rxq;
2641 uint32_t srrctl;
2642 uint16_t buf_size;
2643 uint16_t rctl_bsize;
2644 uint16_t i;
2645 int ret;
2646
2647 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2648
2649 /* setup MTU */
2650 e1000_rlpml_set_vf(hw,
2651 (uint16_t)(dev->data->dev_conf.rxmode.max_rx_pkt_len +
2652 VLAN_TAG_SIZE));
2653
2654 /* Configure and enable each RX queue. */
2655 rctl_bsize = 0;
2656 dev->rx_pkt_burst = eth_igb_recv_pkts;
2657 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2658 uint64_t bus_addr;
2659 uint32_t rxdctl;
2660
2661 rxq = dev->data->rx_queues[i];
2662
2663 rxq->flags = 0;
2664 /*
2665 * i350VF LB vlan packets have vlan tags byte swapped.
2666 */
2667 if (hw->mac.type == e1000_vfadapt_i350) {
2668 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2669 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2670 } else {
2671 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2672 }
2673
2674 /* Allocate buffers for descriptor rings and set up queue */
2675 ret = igb_alloc_rx_queue_mbufs(rxq);
2676 if (ret)
2677 return ret;
2678
2679 bus_addr = rxq->rx_ring_phys_addr;
2680 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2681 rxq->nb_rx_desc *
2682 sizeof(union e1000_adv_rx_desc));
2683 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2684 (uint32_t)(bus_addr >> 32));
2685 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2686
2687 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2688
2689 /*
2690 * Configure RX buffer size.
2691 */
2692 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2693 RTE_PKTMBUF_HEADROOM);
2694 if (buf_size >= 1024) {
2695 /*
2696 * Configure the BSIZEPACKET field of the SRRCTL
2697 * register of the queue.
2698 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2699 * If this field is equal to 0b, then RCTL.BSIZE
2700 * determines the RX packet buffer size.
2701 */
2702 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2703 E1000_SRRCTL_BSIZEPKT_MASK);
2704 buf_size = (uint16_t) ((srrctl &
2705 E1000_SRRCTL_BSIZEPKT_MASK) <<
2706 E1000_SRRCTL_BSIZEPKT_SHIFT);
2707
2708 /* It adds dual VLAN length for supporting dual VLAN */
2709 if ((dev->data->dev_conf.rxmode.max_rx_pkt_len +
2710 2 * VLAN_TAG_SIZE) > buf_size){
2711 if (!dev->data->scattered_rx)
2712 PMD_INIT_LOG(DEBUG,
2713 "forcing scatter mode");
2714 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2715 dev->data->scattered_rx = 1;
2716 }
2717 } else {
2718 /*
2719 * Use BSIZE field of the device RCTL register.
2720 */
2721 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2722 rctl_bsize = buf_size;
2723 if (!dev->data->scattered_rx)
2724 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2725 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2726 dev->data->scattered_rx = 1;
2727 }
2728
2729 /* Set if packets are dropped when no descriptors available */
2730 if (rxq->drop_en)
2731 srrctl |= E1000_SRRCTL_DROP_EN;
2732
2733 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2734
2735 /* Enable this RX queue. */
2736 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2737 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2738 rxdctl &= 0xFFF00000;
2739 rxdctl |= (rxq->pthresh & 0x1F);
2740 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2741 if (hw->mac.type == e1000_vfadapt) {
2742 /*
2743 * Workaround of 82576 VF Erratum
2744 * force set WTHRESH to 1
2745 * to avoid Write-Back not triggered sometimes
2746 */
2747 rxdctl |= 0x10000;
2748 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2749 }
2750 else
2751 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2752 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2753 }
2754
2755 if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_SCATTER) {
2756 if (!dev->data->scattered_rx)
2757 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2758 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2759 dev->data->scattered_rx = 1;
2760 }
2761
2762 /*
2763 * Setup the HW Rx Head and Tail Descriptor Pointers.
2764 * This needs to be done after enable.
2765 */
2766 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2767 rxq = dev->data->rx_queues[i];
2768 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2769 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2770 }
2771
2772 return 0;
2773 }
2774
2775 /*********************************************************************
2776 *
2777 * Enable VF transmit unit.
2778 *
2779 **********************************************************************/
2780 void
2781 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2782 {
2783 struct e1000_hw *hw;
2784 struct igb_tx_queue *txq;
2785 uint32_t txdctl;
2786 uint16_t i;
2787
2788 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2789
2790 /* Setup the Base and Length of the Tx Descriptor Rings. */
2791 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2792 uint64_t bus_addr;
2793
2794 txq = dev->data->tx_queues[i];
2795 bus_addr = txq->tx_ring_phys_addr;
2796 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2797 txq->nb_tx_desc *
2798 sizeof(union e1000_adv_tx_desc));
2799 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2800 (uint32_t)(bus_addr >> 32));
2801 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2802
2803 /* Setup the HW Tx Head and Tail descriptor pointers. */
2804 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2805 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2806
2807 /* Setup Transmit threshold registers. */
2808 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2809 txdctl |= txq->pthresh & 0x1F;
2810 txdctl |= ((txq->hthresh & 0x1F) << 8);
2811 if (hw->mac.type == e1000_82576) {
2812 /*
2813 * Workaround of 82576 VF Erratum
2814 * force set WTHRESH to 1
2815 * to avoid Write-Back not triggered sometimes
2816 */
2817 txdctl |= 0x10000;
2818 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2819 }
2820 else
2821 txdctl |= ((txq->wthresh & 0x1F) << 16);
2822 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2823 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2824 }
2825
2826 }
2827
2828 void
2829 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2830 struct rte_eth_rxq_info *qinfo)
2831 {
2832 struct igb_rx_queue *rxq;
2833
2834 rxq = dev->data->rx_queues[queue_id];
2835
2836 qinfo->mp = rxq->mb_pool;
2837 qinfo->scattered_rx = dev->data->scattered_rx;
2838 qinfo->nb_desc = rxq->nb_rx_desc;
2839
2840 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2841 qinfo->conf.rx_drop_en = rxq->drop_en;
2842 qinfo->conf.offloads = rxq->offloads;
2843 }
2844
2845 void
2846 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2847 struct rte_eth_txq_info *qinfo)
2848 {
2849 struct igb_tx_queue *txq;
2850
2851 txq = dev->data->tx_queues[queue_id];
2852
2853 qinfo->nb_desc = txq->nb_tx_desc;
2854
2855 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2856 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2857 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2858 qinfo->conf.offloads = txq->offloads;
2859 }
2860
2861 int
2862 igb_rss_conf_init(struct rte_eth_dev *dev,
2863 struct igb_rte_flow_rss_conf *out,
2864 const struct rte_flow_action_rss *in)
2865 {
2866 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2867
2868 if (in->key_len > RTE_DIM(out->key) ||
2869 ((hw->mac.type == e1000_82576) &&
2870 (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2871 ((hw->mac.type != e1000_82576) &&
2872 (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2873 return -EINVAL;
2874 out->conf = (struct rte_flow_action_rss){
2875 .func = in->func,
2876 .level = in->level,
2877 .types = in->types,
2878 .key_len = in->key_len,
2879 .queue_num = in->queue_num,
2880 .key = memcpy(out->key, in->key, in->key_len),
2881 .queue = memcpy(out->queue, in->queue,
2882 sizeof(*in->queue) * in->queue_num),
2883 };
2884 return 0;
2885 }
2886
2887 int
2888 igb_action_rss_same(const struct rte_flow_action_rss *comp,
2889 const struct rte_flow_action_rss *with)
2890 {
2891 return (comp->func == with->func &&
2892 comp->level == with->level &&
2893 comp->types == with->types &&
2894 comp->key_len == with->key_len &&
2895 comp->queue_num == with->queue_num &&
2896 !memcmp(comp->key, with->key, with->key_len) &&
2897 !memcmp(comp->queue, with->queue,
2898 sizeof(*with->queue) * with->queue_num));
2899 }
2900
2901 int
2902 igb_config_rss_filter(struct rte_eth_dev *dev,
2903 struct igb_rte_flow_rss_conf *conf, bool add)
2904 {
2905 uint32_t shift;
2906 uint16_t i, j;
2907 struct rte_eth_rss_conf rss_conf = {
2908 .rss_key = conf->conf.key_len ?
2909 (void *)(uintptr_t)conf->conf.key : NULL,
2910 .rss_key_len = conf->conf.key_len,
2911 .rss_hf = conf->conf.types,
2912 };
2913 struct e1000_filter_info *filter_info =
2914 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2915 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2916
2917 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2918
2919 if (!add) {
2920 if (igb_action_rss_same(&filter_info->rss_info.conf,
2921 &conf->conf)) {
2922 igb_rss_disable(dev);
2923 memset(&filter_info->rss_info, 0,
2924 sizeof(struct igb_rte_flow_rss_conf));
2925 return 0;
2926 }
2927 return -EINVAL;
2928 }
2929
2930 if (filter_info->rss_info.conf.queue_num)
2931 return -EINVAL;
2932
2933 /* Fill in redirection table. */
2934 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2935 for (i = 0, j = 0; i < 128; i++, j++) {
2936 union e1000_reta {
2937 uint32_t dword;
2938 uint8_t bytes[4];
2939 } reta;
2940 uint8_t q_idx;
2941
2942 if (j == conf->conf.queue_num)
2943 j = 0;
2944 q_idx = conf->conf.queue[j];
2945 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2946 if ((i & 3) == 3)
2947 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2948 }
2949
2950 /* Configure the RSS key and the RSS protocols used to compute
2951 * the RSS hash of input packets.
2952 */
2953 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2954 igb_rss_disable(dev);
2955 return 0;
2956 }
2957 if (rss_conf.rss_key == NULL)
2958 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2959 igb_hw_rss_hash_set(hw, &rss_conf);
2960
2961 if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))
2962 return -EINVAL;
2963
2964 return 0;
2965 }