4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * Copyright(c) 2017 IBM Corporation.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <rte_ethdev_driver.h>
37 #include <rte_malloc.h>
39 #include "base/i40e_prototype.h"
40 #include "base/i40e_type.h"
41 #include "i40e_ethdev.h"
42 #include "i40e_rxtx.h"
43 #include "i40e_rxtx_vec_common.h"
47 #pragma GCC diagnostic ignored "-Wcast-qual"
50 i40e_rxq_rearm(struct i40e_rx_queue
*rxq
)
54 volatile union i40e_rx_desc
*rxdp
;
56 struct i40e_rx_entry
*rxep
= &rxq
->sw_ring
[rxq
->rxrearm_start
];
57 struct rte_mbuf
*mb0
, *mb1
;
59 vector
unsigned long hdr_room
= (vector
unsigned long){
61 RTE_PKTMBUF_HEADROOM
};
62 vector
unsigned long dma_addr0
, dma_addr1
;
64 rxdp
= rxq
->rx_ring
+ rxq
->rxrearm_start
;
66 /* Pull 'n' more MBUFs into the software ring */
67 if (rte_mempool_get_bulk(rxq
->mp
,
69 RTE_I40E_RXQ_REARM_THRESH
) < 0) {
70 if (rxq
->rxrearm_nb
+ RTE_I40E_RXQ_REARM_THRESH
>=
72 dma_addr0
= (vector
unsigned long){};
73 for (i
= 0; i
< RTE_I40E_DESCS_PER_LOOP
; i
++) {
74 rxep
[i
].mbuf
= &rxq
->fake_mbuf
;
76 (vector
unsigned long *)&rxdp
[i
].read
);
79 rte_eth_devices
[rxq
->port_id
].data
->rx_mbuf_alloc_failed
+=
80 RTE_I40E_RXQ_REARM_THRESH
;
84 /* Initialize the mbufs in vector, process 2 mbufs in one loop */
85 for (i
= 0; i
< RTE_I40E_RXQ_REARM_THRESH
; i
+= 2, rxep
+= 2) {
86 vector
unsigned long vaddr0
, vaddr1
;
92 /* Flush mbuf with pkt template.
93 * Data to be rearmed is 6 bytes long.
94 * Though, RX will overwrite ol_flags that are coming next
95 * anyway. So overwrite whole 8 bytes with one load:
96 * 6 bytes of rearm_data plus first 2 bytes of ol_flags.
98 p0
= (uintptr_t)&mb0
->rearm_data
;
99 *(uint64_t *)p0
= rxq
->mbuf_initializer
;
100 p1
= (uintptr_t)&mb1
->rearm_data
;
101 *(uint64_t *)p1
= rxq
->mbuf_initializer
;
103 /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */
104 vaddr0
= vec_ld(0, (vector
unsigned long *)&mb0
->buf_addr
);
105 vaddr1
= vec_ld(0, (vector
unsigned long *)&mb1
->buf_addr
);
107 /* convert pa to dma_addr hdr/data */
108 dma_addr0
= vec_mergel(vaddr0
, vaddr0
);
109 dma_addr1
= vec_mergel(vaddr1
, vaddr1
);
111 /* add headroom to pa values */
112 dma_addr0
= vec_add(dma_addr0
, hdr_room
);
113 dma_addr1
= vec_add(dma_addr1
, hdr_room
);
115 /* flush desc with pa dma_addr */
116 vec_st(dma_addr0
, 0, (vector
unsigned long *)&rxdp
++->read
);
117 vec_st(dma_addr1
, 0, (vector
unsigned long *)&rxdp
++->read
);
120 rxq
->rxrearm_start
+= RTE_I40E_RXQ_REARM_THRESH
;
121 if (rxq
->rxrearm_start
>= rxq
->nb_rx_desc
)
122 rxq
->rxrearm_start
= 0;
124 rxq
->rxrearm_nb
-= RTE_I40E_RXQ_REARM_THRESH
;
126 rx_id
= (uint16_t)((rxq
->rxrearm_start
== 0) ?
127 (rxq
->nb_rx_desc
- 1) : (rxq
->rxrearm_start
- 1));
129 /* Update the tail pointer on the NIC */
130 I40E_PCI_REG_WRITE(rxq
->qrx_tail
, rx_id
);
134 desc_to_olflags_v(vector
unsigned long descs
[4], struct rte_mbuf
**rx_pkts
)
136 vector
unsigned int vlan0
, vlan1
, rss
, l3_l4e
;
138 /* mask everything except RSS, flow director and VLAN flags
139 * bit2 is for VLAN tag, bit11 for flow director indication
140 * bit13:12 for RSS indication.
142 const vector
unsigned int rss_vlan_msk
= (vector
unsigned int){
143 (int32_t)0x1c03804, (int32_t)0x1c03804,
144 (int32_t)0x1c03804, (int32_t)0x1c03804};
146 /* map rss and vlan type to rss hash and vlan flag */
147 const vector
unsigned char vlan_flags
= (vector
unsigned char){
149 PKT_RX_VLAN
| PKT_RX_VLAN_STRIPPED
, 0, 0, 0,
153 const vector
unsigned char rss_flags
= (vector
unsigned char){
154 0, PKT_RX_FDIR
, 0, 0,
155 0, 0, PKT_RX_RSS_HASH
, PKT_RX_RSS_HASH
| PKT_RX_FDIR
,
159 const vector
unsigned char l3_l4e_flags
= (vector
unsigned char){
163 PKT_RX_L4_CKSUM_BAD
| PKT_RX_IP_CKSUM_BAD
,
164 PKT_RX_EIP_CKSUM_BAD
,
165 PKT_RX_EIP_CKSUM_BAD
| PKT_RX_IP_CKSUM_BAD
,
166 PKT_RX_EIP_CKSUM_BAD
| PKT_RX_L4_CKSUM_BAD
,
167 PKT_RX_EIP_CKSUM_BAD
| PKT_RX_L4_CKSUM_BAD
168 | PKT_RX_IP_CKSUM_BAD
,
169 0, 0, 0, 0, 0, 0, 0, 0};
171 vlan0
= (vector
unsigned int)vec_mergel(descs
[0], descs
[1]);
172 vlan1
= (vector
unsigned int)vec_mergel(descs
[2], descs
[3]);
173 vlan0
= (vector
unsigned int)vec_mergeh(vlan0
, vlan1
);
175 vlan1
= vec_and(vlan0
, rss_vlan_msk
);
176 vlan0
= (vector
unsigned int)vec_perm(vlan_flags
,
177 (vector
unsigned char){},
178 *(vector
unsigned char *)&vlan1
);
180 rss
= vec_sr(vlan1
, (vector
unsigned int){11, 11, 11, 11});
181 rss
= (vector
unsigned int)vec_perm(rss_flags
, (vector
unsigned char){},
182 *(vector
unsigned char *)&rss
);
184 l3_l4e
= vec_sr(vlan1
, (vector
unsigned int){22, 22, 22, 22});
185 l3_l4e
= (vector
unsigned int)vec_perm(l3_l4e_flags
,
186 (vector
unsigned char){},
187 *(vector
unsigned char *)&l3_l4e
);
189 vlan0
= vec_or(vlan0
, rss
);
190 vlan0
= vec_or(vlan0
, l3_l4e
);
192 rx_pkts
[0]->ol_flags
= (uint64_t)vlan0
[2];
193 rx_pkts
[1]->ol_flags
= (uint64_t)vlan0
[3];
194 rx_pkts
[2]->ol_flags
= (uint64_t)vlan0
[0];
195 rx_pkts
[3]->ol_flags
= (uint64_t)vlan0
[1];
198 #define PKTLEN_SHIFT 10
201 desc_to_ptype_v(vector
unsigned long descs
[4], struct rte_mbuf
**rx_pkts
,
204 vector
unsigned long ptype0
= vec_mergel(descs
[0], descs
[1]);
205 vector
unsigned long ptype1
= vec_mergel(descs
[2], descs
[3]);
207 ptype0
= vec_sr(ptype0
, (vector
unsigned long){30, 30});
208 ptype1
= vec_sr(ptype1
, (vector
unsigned long){30, 30});
210 rx_pkts
[0]->packet_type
=
211 ptype_tbl
[(*(vector
unsigned char *)&ptype0
)[0]];
212 rx_pkts
[1]->packet_type
=
213 ptype_tbl
[(*(vector
unsigned char *)&ptype0
)[8]];
214 rx_pkts
[2]->packet_type
=
215 ptype_tbl
[(*(vector
unsigned char *)&ptype1
)[0]];
216 rx_pkts
[3]->packet_type
=
217 ptype_tbl
[(*(vector
unsigned char *)&ptype1
)[8]];
221 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
222 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
225 static inline uint16_t
226 _recv_raw_pkts_vec(struct i40e_rx_queue
*rxq
, struct rte_mbuf
**rx_pkts
,
227 uint16_t nb_pkts
, uint8_t *split_packet
)
229 volatile union i40e_rx_desc
*rxdp
;
230 struct i40e_rx_entry
*sw_ring
;
231 uint16_t nb_pkts_recd
;
234 vector
unsigned char shuf_msk
;
235 uint32_t *ptype_tbl
= rxq
->vsi
->adapter
->ptype_tbl
;
237 vector
unsigned short crc_adjust
= (vector
unsigned short){
238 0, 0, /* ignore pkt_type field */
239 rxq
->crc_len
, /* sub crc on pkt_len */
240 0, /* ignore high-16bits of pkt_len */
241 rxq
->crc_len
, /* sub crc on data_len */
242 0, 0, 0 /* ignore non-length fields */
244 vector
unsigned long dd_check
, eop_check
;
246 /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
247 nb_pkts
= RTE_MIN(nb_pkts
, RTE_I40E_MAX_RX_BURST
);
249 /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
250 nb_pkts
= RTE_ALIGN_FLOOR(nb_pkts
, RTE_I40E_DESCS_PER_LOOP
);
252 /* Just the act of getting into the function from the application is
253 * going to cost about 7 cycles
255 rxdp
= rxq
->rx_ring
+ rxq
->rx_tail
;
259 /* See if we need to rearm the RX queue - gives the prefetch a bit
262 if (rxq
->rxrearm_nb
> RTE_I40E_RXQ_REARM_THRESH
)
265 /* Before we start moving massive data around, check to see if
266 * there is actually a packet available
268 if (!(rxdp
->wb
.qword1
.status_error_len
&
269 rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT
)))
272 /* 4 packets DD mask */
273 dd_check
= (vector
unsigned long){0x0000000100000001ULL
,
274 0x0000000100000001ULL
};
276 /* 4 packets EOP mask */
277 eop_check
= (vector
unsigned long){0x0000000200000002ULL
,
278 0x0000000200000002ULL
};
280 /* mask to shuffle from desc. to mbuf */
281 shuf_msk
= (vector
unsigned char){
282 0xFF, 0xFF, /* pkt_type set as unknown */
283 0xFF, 0xFF, /* pkt_type set as unknown */
284 14, 15, /* octet 15~14, low 16 bits pkt_len */
285 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
286 14, 15, /* octet 15~14, 16 bits data_len */
287 2, 3, /* octet 2~3, low 16 bits vlan_macip */
288 4, 5, 6, 7 /* octet 4~7, 32bits rss */
291 /* Cache is empty -> need to scan the buffer rings, but first move
292 * the next 'n' mbufs into the cache
294 sw_ring
= &rxq
->sw_ring
[rxq
->rx_tail
];
296 /* A. load 4 packet in one loop
297 * [A*. mask out 4 unused dirty field in desc]
298 * B. copy 4 mbuf point from swring to rx_pkts
299 * C. calc the number of DD bits among the 4 packets
300 * [C*. extract the end-of-packet bit, if requested]
301 * D. fill info. from desc to mbuf
304 for (pos
= 0, nb_pkts_recd
= 0; pos
< nb_pkts
;
305 pos
+= RTE_I40E_DESCS_PER_LOOP
,
306 rxdp
+= RTE_I40E_DESCS_PER_LOOP
) {
307 vector
unsigned long descs
[RTE_I40E_DESCS_PER_LOOP
];
308 vector
unsigned char pkt_mb1
, pkt_mb2
, pkt_mb3
, pkt_mb4
;
309 vector
unsigned short staterr
, sterr_tmp1
, sterr_tmp2
;
310 vector
unsigned long mbp1
, mbp2
; /* two mbuf pointer
314 /* B.1 load 1 mbuf point */
315 mbp1
= *(vector
unsigned long *)&sw_ring
[pos
];
316 /* Read desc statuses backwards to avoid race condition */
317 /* A.1 load 4 pkts desc */
318 descs
[3] = *(vector
unsigned long *)(rxdp
+ 3);
319 rte_compiler_barrier();
321 /* B.2 copy 2 mbuf point into rx_pkts */
322 *(vector
unsigned long *)&rx_pkts
[pos
] = mbp1
;
324 /* B.1 load 1 mbuf point */
325 mbp2
= *(vector
unsigned long *)&sw_ring
[pos
+ 2];
327 descs
[2] = *(vector
unsigned long *)(rxdp
+ 2);
328 rte_compiler_barrier();
329 /* B.1 load 2 mbuf point */
330 descs
[1] = *(vector
unsigned long *)(rxdp
+ 1);
331 rte_compiler_barrier();
332 descs
[0] = *(vector
unsigned long *)(rxdp
);
334 /* B.2 copy 2 mbuf point into rx_pkts */
335 *(vector
unsigned long *)&rx_pkts
[pos
+ 2] = mbp2
;
338 rte_mbuf_prefetch_part2(rx_pkts
[pos
]);
339 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 1]);
340 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 2]);
341 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 3]);
344 /* avoid compiler reorder optimization */
345 rte_compiler_barrier();
347 /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
348 const vector
unsigned int len3
= vec_sl(
349 vec_ld(0, (vector
unsigned int *)&descs
[3]),
350 (vector
unsigned int){0, 0, 0, PKTLEN_SHIFT
});
352 const vector
unsigned int len2
= vec_sl(
353 vec_ld(0, (vector
unsigned int *)&descs
[2]),
354 (vector
unsigned int){0, 0, 0, PKTLEN_SHIFT
});
356 /* merge the now-aligned packet length fields back in */
357 descs
[3] = (vector
unsigned long)len3
;
358 descs
[2] = (vector
unsigned long)len2
;
360 /* D.1 pkt 3,4 convert format from desc to pktmbuf */
361 pkt_mb4
= vec_perm((vector
unsigned char)descs
[3],
362 (vector
unsigned char){}, shuf_msk
);
363 pkt_mb3
= vec_perm((vector
unsigned char)descs
[2],
364 (vector
unsigned char){}, shuf_msk
);
366 /* C.1 4=>2 filter staterr info only */
367 sterr_tmp2
= vec_mergel((vector
unsigned short)descs
[3],
368 (vector
unsigned short)descs
[2]);
369 /* C.1 4=>2 filter staterr info only */
370 sterr_tmp1
= vec_mergel((vector
unsigned short)descs
[1],
371 (vector
unsigned short)descs
[0]);
372 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
373 pkt_mb4
= (vector
unsigned char)vec_sub(
374 (vector
unsigned short)pkt_mb4
, crc_adjust
);
375 pkt_mb3
= (vector
unsigned char)vec_sub(
376 (vector
unsigned short)pkt_mb3
, crc_adjust
);
378 /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
379 const vector
unsigned int len1
= vec_sl(
380 vec_ld(0, (vector
unsigned int *)&descs
[1]),
381 (vector
unsigned int){0, 0, 0, PKTLEN_SHIFT
});
382 const vector
unsigned int len0
= vec_sl(
383 vec_ld(0, (vector
unsigned int *)&descs
[0]),
384 (vector
unsigned int){0, 0, 0, PKTLEN_SHIFT
});
386 /* merge the now-aligned packet length fields back in */
387 descs
[1] = (vector
unsigned long)len1
;
388 descs
[0] = (vector
unsigned long)len0
;
390 /* D.1 pkt 1,2 convert format from desc to pktmbuf */
391 pkt_mb2
= vec_perm((vector
unsigned char)descs
[1],
392 (vector
unsigned char){}, shuf_msk
);
393 pkt_mb1
= vec_perm((vector
unsigned char)descs
[0],
394 (vector
unsigned char){}, shuf_msk
);
396 /* C.2 get 4 pkts staterr value */
397 staterr
= (vector
unsigned short)vec_mergeh(
398 sterr_tmp1
, sterr_tmp2
);
400 /* D.3 copy final 3,4 data to rx_pkts */
402 (vector
unsigned char *)&rx_pkts
[pos
+ 3]
403 ->rx_descriptor_fields1
406 (vector
unsigned char *)&rx_pkts
[pos
+ 2]
407 ->rx_descriptor_fields1
410 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
411 pkt_mb2
= (vector
unsigned char)vec_sub(
412 (vector
unsigned short)pkt_mb2
, crc_adjust
);
413 pkt_mb1
= (vector
unsigned char)vec_sub(
414 (vector
unsigned short)pkt_mb1
, crc_adjust
);
416 /* C* extract and record EOP bit */
418 vector
unsigned char eop_shuf_mask
=
419 (vector
unsigned char){
420 0xFF, 0xFF, 0xFF, 0xFF,
421 0xFF, 0xFF, 0xFF, 0xFF,
422 0xFF, 0xFF, 0xFF, 0xFF,
423 0x04, 0x0C, 0x00, 0x08
426 /* and with mask to extract bits, flipping 1-0 */
427 vector
unsigned char eop_bits
= vec_and(
428 (vector
unsigned char)vec_nor(staterr
, staterr
),
429 (vector
unsigned char)eop_check
);
430 /* the staterr values are not in order, as the count
431 * count of dd bits doesn't care. However, for end of
432 * packet tracking, we do care, so shuffle. This also
433 * compresses the 32-bit values to 8-bit
435 eop_bits
= vec_perm(eop_bits
, (vector
unsigned char){},
437 /* store the resulting 32-bit value */
438 *split_packet
= (vec_ld(0,
439 (vector
unsigned int *)&eop_bits
))[0];
440 split_packet
+= RTE_I40E_DESCS_PER_LOOP
;
442 /* zero-out next pointers */
443 rx_pkts
[pos
]->next
= NULL
;
444 rx_pkts
[pos
+ 1]->next
= NULL
;
445 rx_pkts
[pos
+ 2]->next
= NULL
;
446 rx_pkts
[pos
+ 3]->next
= NULL
;
449 /* C.3 calc available number of desc */
450 staterr
= vec_and(staterr
, (vector
unsigned short)dd_check
);
452 /* D.3 copy final 1,2 data to rx_pkts */
454 (vector
unsigned char *)&rx_pkts
[pos
+ 1]
455 ->rx_descriptor_fields1
458 (vector
unsigned char *)&rx_pkts
[pos
]->rx_descriptor_fields1
460 desc_to_ptype_v(descs
, &rx_pkts
[pos
], ptype_tbl
);
461 desc_to_olflags_v(descs
, &rx_pkts
[pos
]);
463 /* C.4 calc avaialbe number of desc */
464 var
= __builtin_popcountll((vec_ld(0,
465 (vector
unsigned long *)&staterr
)[0]));
467 if (likely(var
!= RTE_I40E_DESCS_PER_LOOP
))
471 /* Update our internal tail pointer */
472 rxq
->rx_tail
= (uint16_t)(rxq
->rx_tail
+ nb_pkts_recd
);
473 rxq
->rx_tail
= (uint16_t)(rxq
->rx_tail
& (rxq
->nb_rx_desc
- 1));
474 rxq
->rxrearm_nb
= (uint16_t)(rxq
->rxrearm_nb
+ nb_pkts_recd
);
480 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
481 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
485 i40e_recv_pkts_vec(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
488 return _recv_raw_pkts_vec(rx_queue
, rx_pkts
, nb_pkts
, NULL
);
491 /* vPMD receive routine that reassembles scattered packets
493 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
494 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
498 i40e_recv_scattered_pkts_vec(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
501 struct i40e_rx_queue
*rxq
= rx_queue
;
502 uint8_t split_flags
[RTE_I40E_VPMD_RX_BURST
] = {0};
504 /* get some new buffers */
505 uint16_t nb_bufs
= _recv_raw_pkts_vec(rxq
, rx_pkts
, nb_pkts
,
510 /* happy day case, full burst + no packets to be joined */
511 const uint64_t *split_fl64
= (uint64_t *)split_flags
;
513 if (rxq
->pkt_first_seg
== NULL
&&
514 split_fl64
[0] == 0 && split_fl64
[1] == 0 &&
515 split_fl64
[2] == 0 && split_fl64
[3] == 0)
518 /* reassemble any packets that need reassembly*/
521 if (!rxq
->pkt_first_seg
) {
522 /* find the first split flag, and only reassemble then*/
523 while (i
< nb_bufs
&& !split_flags
[i
])
528 return i
+ reassemble_packets(rxq
, &rx_pkts
[i
], nb_bufs
- i
,
533 vtx1(volatile struct i40e_tx_desc
*txdp
,
534 struct rte_mbuf
*pkt
, uint64_t flags
)
536 uint64_t high_qw
= (I40E_TX_DESC_DTYPE_DATA
|
537 ((uint64_t)flags
<< I40E_TXD_QW1_CMD_SHIFT
) |
538 ((uint64_t)pkt
->data_len
<< I40E_TXD_QW1_TX_BUF_SZ_SHIFT
));
540 vector
unsigned long descriptor
= (vector
unsigned long){
541 pkt
->buf_iova
+ pkt
->data_off
, high_qw
};
542 *(vector
unsigned long *)txdp
= descriptor
;
546 vtx(volatile struct i40e_tx_desc
*txdp
,
547 struct rte_mbuf
**pkt
, uint16_t nb_pkts
, uint64_t flags
)
551 for (i
= 0; i
< nb_pkts
; ++i
, ++txdp
, ++pkt
)
552 vtx1(txdp
, *pkt
, flags
);
556 i40e_xmit_fixed_burst_vec(void *tx_queue
, struct rte_mbuf
**tx_pkts
,
559 struct i40e_tx_queue
*txq
= (struct i40e_tx_queue
*)tx_queue
;
560 volatile struct i40e_tx_desc
*txdp
;
561 struct i40e_tx_entry
*txep
;
562 uint16_t n
, nb_commit
, tx_id
;
563 uint64_t flags
= I40E_TD_CMD
;
564 uint64_t rs
= I40E_TX_DESC_CMD_RS
| I40E_TD_CMD
;
567 /* cross rx_thresh boundary is not allowed */
568 nb_pkts
= RTE_MIN(nb_pkts
, txq
->tx_rs_thresh
);
570 if (txq
->nb_tx_free
< txq
->tx_free_thresh
)
571 i40e_tx_free_bufs(txq
);
573 nb_pkts
= (uint16_t)RTE_MIN(txq
->nb_tx_free
, nb_pkts
);
575 if (unlikely(nb_pkts
== 0))
578 tx_id
= txq
->tx_tail
;
579 txdp
= &txq
->tx_ring
[tx_id
];
580 txep
= &txq
->sw_ring
[tx_id
];
582 txq
->nb_tx_free
= (uint16_t)(txq
->nb_tx_free
- nb_pkts
);
584 n
= (uint16_t)(txq
->nb_tx_desc
- tx_id
);
585 if (nb_commit
>= n
) {
586 tx_backlog_entry(txep
, tx_pkts
, n
);
588 for (i
= 0; i
< n
- 1; ++i
, ++tx_pkts
, ++txdp
)
589 vtx1(txdp
, *tx_pkts
, flags
);
591 vtx1(txdp
, *tx_pkts
++, rs
);
593 nb_commit
= (uint16_t)(nb_commit
- n
);
596 txq
->tx_next_rs
= (uint16_t)(txq
->tx_rs_thresh
- 1);
598 /* avoid reach the end of ring */
599 txdp
= &txq
->tx_ring
[tx_id
];
600 txep
= &txq
->sw_ring
[tx_id
];
603 tx_backlog_entry(txep
, tx_pkts
, nb_commit
);
605 vtx(txdp
, tx_pkts
, nb_commit
, flags
);
607 tx_id
= (uint16_t)(tx_id
+ nb_commit
);
608 if (tx_id
> txq
->tx_next_rs
) {
609 txq
->tx_ring
[txq
->tx_next_rs
].cmd_type_offset_bsz
|=
610 rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS
) <<
611 I40E_TXD_QW1_CMD_SHIFT
);
613 (uint16_t)(txq
->tx_next_rs
+ txq
->tx_rs_thresh
);
616 txq
->tx_tail
= tx_id
;
618 I40E_PCI_REG_WRITE(txq
->qtx_tail
, txq
->tx_tail
);
623 void __attribute__((cold
))
624 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue
*rxq
)
626 _i40e_rx_queue_release_mbufs_vec(rxq
);
629 int __attribute__((cold
))
630 i40e_rxq_vec_setup(struct i40e_rx_queue
*rxq
)
632 return i40e_rxq_vec_setup_default(rxq
);
635 int __attribute__((cold
))
636 i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused
* txq
)
641 int __attribute__((cold
))
642 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev
*dev
)
644 return i40e_rx_vec_dev_conf_condition_check_default(dev
);