1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2015 Intel Corporation.
3 * Copyright(c) 2016-2018, Linaro Limited.
7 #include <rte_ethdev_driver.h>
8 #include <rte_malloc.h>
10 #include "base/i40e_prototype.h"
11 #include "base/i40e_type.h"
12 #include "i40e_ethdev.h"
13 #include "i40e_rxtx.h"
14 #include "i40e_rxtx_vec_common.h"
18 #pragma GCC diagnostic ignored "-Wcast-qual"
21 i40e_rxq_rearm(struct i40e_rx_queue
*rxq
)
25 volatile union i40e_rx_desc
*rxdp
;
26 struct i40e_rx_entry
*rxep
= &rxq
->sw_ring
[rxq
->rxrearm_start
];
27 struct rte_mbuf
*mb0
, *mb1
;
28 uint64x2_t dma_addr0
, dma_addr1
;
29 uint64x2_t zero
= vdupq_n_u64(0);
32 rxdp
= rxq
->rx_ring
+ rxq
->rxrearm_start
;
34 /* Pull 'n' more MBUFs into the software ring */
35 if (unlikely(rte_mempool_get_bulk(rxq
->mp
,
37 RTE_I40E_RXQ_REARM_THRESH
) < 0)) {
38 if (rxq
->rxrearm_nb
+ RTE_I40E_RXQ_REARM_THRESH
>=
40 for (i
= 0; i
< RTE_I40E_DESCS_PER_LOOP
; i
++) {
41 rxep
[i
].mbuf
= &rxq
->fake_mbuf
;
42 vst1q_u64((uint64_t *)&rxdp
[i
].read
, zero
);
45 rte_eth_devices
[rxq
->port_id
].data
->rx_mbuf_alloc_failed
+=
46 RTE_I40E_RXQ_REARM_THRESH
;
50 /* Initialize the mbufs in vector, process 2 mbufs in one loop */
51 for (i
= 0; i
< RTE_I40E_RXQ_REARM_THRESH
; i
+= 2, rxep
+= 2) {
55 paddr
= mb0
->buf_iova
+ RTE_PKTMBUF_HEADROOM
;
56 dma_addr0
= vdupq_n_u64(paddr
);
58 /* flush desc with pa dma_addr */
59 vst1q_u64((uint64_t *)&rxdp
++->read
, dma_addr0
);
61 paddr
= mb1
->buf_iova
+ RTE_PKTMBUF_HEADROOM
;
62 dma_addr1
= vdupq_n_u64(paddr
);
63 vst1q_u64((uint64_t *)&rxdp
++->read
, dma_addr1
);
66 rxq
->rxrearm_start
+= RTE_I40E_RXQ_REARM_THRESH
;
67 if (rxq
->rxrearm_start
>= rxq
->nb_rx_desc
)
68 rxq
->rxrearm_start
= 0;
70 rxq
->rxrearm_nb
-= RTE_I40E_RXQ_REARM_THRESH
;
72 rx_id
= (uint16_t)((rxq
->rxrearm_start
== 0) ?
73 (rxq
->nb_rx_desc
- 1) : (rxq
->rxrearm_start
- 1));
75 /* Update the tail pointer on the NIC */
76 I40E_PCI_REG_WRITE(rxq
->qrx_tail
, rx_id
);
80 desc_to_olflags_v(struct i40e_rx_queue
*rxq
, uint64x2_t descs
[4],
81 struct rte_mbuf
**rx_pkts
)
83 uint32x4_t vlan0
, vlan1
, rss
, l3_l4e
;
84 const uint64x2_t mbuf_init
= {rxq
->mbuf_initializer
, 0};
85 uint64x2_t rearm0
, rearm1
, rearm2
, rearm3
;
87 /* mask everything except RSS, flow director and VLAN flags
88 * bit2 is for VLAN tag, bit11 for flow director indication
89 * bit13:12 for RSS indication.
91 const uint32x4_t rss_vlan_msk
= {
92 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804};
94 const uint32x4_t cksum_mask
= {
95 PKT_RX_IP_CKSUM_GOOD
| PKT_RX_IP_CKSUM_BAD
|
96 PKT_RX_L4_CKSUM_GOOD
| PKT_RX_L4_CKSUM_BAD
|
98 PKT_RX_IP_CKSUM_GOOD
| PKT_RX_IP_CKSUM_BAD
|
99 PKT_RX_L4_CKSUM_GOOD
| PKT_RX_L4_CKSUM_BAD
|
100 PKT_RX_EIP_CKSUM_BAD
,
101 PKT_RX_IP_CKSUM_GOOD
| PKT_RX_IP_CKSUM_BAD
|
102 PKT_RX_L4_CKSUM_GOOD
| PKT_RX_L4_CKSUM_BAD
|
103 PKT_RX_EIP_CKSUM_BAD
,
104 PKT_RX_IP_CKSUM_GOOD
| PKT_RX_IP_CKSUM_BAD
|
105 PKT_RX_L4_CKSUM_GOOD
| PKT_RX_L4_CKSUM_BAD
|
106 PKT_RX_EIP_CKSUM_BAD
};
108 /* map rss and vlan type to rss hash and vlan flag */
109 const uint8x16_t vlan_flags
= {
111 PKT_RX_VLAN
| PKT_RX_VLAN_STRIPPED
, 0, 0, 0,
115 const uint8x16_t rss_flags
= {
116 0, PKT_RX_FDIR
, 0, 0,
117 0, 0, PKT_RX_RSS_HASH
, PKT_RX_RSS_HASH
| PKT_RX_FDIR
,
121 const uint8x16_t l3_l4e_flags
= {
122 (PKT_RX_IP_CKSUM_GOOD
| PKT_RX_L4_CKSUM_GOOD
) >> 1,
123 PKT_RX_IP_CKSUM_BAD
>> 1,
124 (PKT_RX_IP_CKSUM_GOOD
| PKT_RX_L4_CKSUM_BAD
) >> 1,
125 (PKT_RX_L4_CKSUM_BAD
| PKT_RX_IP_CKSUM_BAD
) >> 1,
126 (PKT_RX_IP_CKSUM_GOOD
| PKT_RX_EIP_CKSUM_BAD
) >> 1,
127 (PKT_RX_EIP_CKSUM_BAD
| PKT_RX_IP_CKSUM_BAD
) >> 1,
128 (PKT_RX_IP_CKSUM_GOOD
| PKT_RX_EIP_CKSUM_BAD
|
129 PKT_RX_L4_CKSUM_BAD
) >> 1,
130 (PKT_RX_EIP_CKSUM_BAD
| PKT_RX_L4_CKSUM_BAD
|
131 PKT_RX_IP_CKSUM_BAD
) >> 1,
132 0, 0, 0, 0, 0, 0, 0, 0};
134 vlan0
= vzipq_u32(vreinterpretq_u32_u64(descs
[0]),
135 vreinterpretq_u32_u64(descs
[2])).val
[1];
136 vlan1
= vzipq_u32(vreinterpretq_u32_u64(descs
[1]),
137 vreinterpretq_u32_u64(descs
[3])).val
[1];
138 vlan0
= vzipq_u32(vlan0
, vlan1
).val
[0];
140 vlan1
= vandq_u32(vlan0
, rss_vlan_msk
);
141 vlan0
= vreinterpretq_u32_u8(vqtbl1q_u8(vlan_flags
,
142 vreinterpretq_u8_u32(vlan1
)));
144 rss
= vshrq_n_u32(vlan1
, 11);
145 rss
= vreinterpretq_u32_u8(vqtbl1q_u8(rss_flags
,
146 vreinterpretq_u8_u32(rss
)));
148 l3_l4e
= vshrq_n_u32(vlan1
, 22);
149 l3_l4e
= vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags
,
150 vreinterpretq_u8_u32(l3_l4e
)));
151 /* then we shift left 1 bit */
152 l3_l4e
= vshlq_n_u32(l3_l4e
, 1);
153 /* we need to mask out the reduntant bits */
154 l3_l4e
= vandq_u32(l3_l4e
, cksum_mask
);
156 vlan0
= vorrq_u32(vlan0
, rss
);
157 vlan0
= vorrq_u32(vlan0
, l3_l4e
);
159 rearm0
= vsetq_lane_u64(vgetq_lane_u32(vlan0
, 0), mbuf_init
, 1);
160 rearm1
= vsetq_lane_u64(vgetq_lane_u32(vlan0
, 1), mbuf_init
, 1);
161 rearm2
= vsetq_lane_u64(vgetq_lane_u32(vlan0
, 2), mbuf_init
, 1);
162 rearm3
= vsetq_lane_u64(vgetq_lane_u32(vlan0
, 3), mbuf_init
, 1);
164 vst1q_u64((uint64_t *)&rx_pkts
[0]->rearm_data
, rearm0
);
165 vst1q_u64((uint64_t *)&rx_pkts
[1]->rearm_data
, rearm1
);
166 vst1q_u64((uint64_t *)&rx_pkts
[2]->rearm_data
, rearm2
);
167 vst1q_u64((uint64_t *)&rx_pkts
[3]->rearm_data
, rearm3
);
170 #define PKTLEN_SHIFT 10
171 #define I40E_UINT16_BIT (CHAR_BIT * sizeof(uint16_t))
174 desc_to_ptype_v(uint64x2_t descs
[4], struct rte_mbuf
**rx_pkts
,
181 for (i
= 0; i
< 4; i
++) {
182 tmp
= vreinterpretq_u8_u64(vshrq_n_u64(descs
[i
], 30));
183 ptype
= vgetq_lane_u8(tmp
, 8);
184 rx_pkts
[i
]->packet_type
= ptype_tbl
[ptype
];
191 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
192 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
195 static inline uint16_t
196 _recv_raw_pkts_vec(struct i40e_rx_queue
*rxq
, struct rte_mbuf
**rx_pkts
,
197 uint16_t nb_pkts
, uint8_t *split_packet
)
199 volatile union i40e_rx_desc
*rxdp
;
200 struct i40e_rx_entry
*sw_ring
;
201 uint16_t nb_pkts_recd
;
203 uint32_t *ptype_tbl
= rxq
->vsi
->adapter
->ptype_tbl
;
205 /* mask to shuffle from desc. to mbuf */
206 uint8x16_t shuf_msk
= {
207 0xFF, 0xFF, /* pkt_type set as unknown */
208 0xFF, 0xFF, /* pkt_type set as unknown */
209 14, 15, /* octet 15~14, low 16 bits pkt_len */
210 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */
211 14, 15, /* octet 15~14, 16 bits data_len */
212 2, 3, /* octet 2~3, low 16 bits vlan_macip */
213 4, 5, 6, 7 /* octet 4~7, 32bits rss */
216 uint8x16_t eop_check
= {
217 0x02, 0x00, 0x02, 0x00,
218 0x02, 0x00, 0x02, 0x00,
219 0x00, 0x00, 0x00, 0x00,
220 0x00, 0x00, 0x00, 0x00
223 uint16x8_t crc_adjust
= {
224 0, 0, /* ignore pkt_type field */
225 rxq
->crc_len
, /* sub crc on pkt_len */
226 0, /* ignore high-16bits of pkt_len */
227 rxq
->crc_len
, /* sub crc on data_len */
228 0, 0, 0 /* ignore non-length fields */
231 /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */
232 nb_pkts
= RTE_MIN(nb_pkts
, RTE_I40E_MAX_RX_BURST
);
234 /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */
235 nb_pkts
= RTE_ALIGN_FLOOR(nb_pkts
, RTE_I40E_DESCS_PER_LOOP
);
237 /* Just the act of getting into the function from the application is
238 * going to cost about 7 cycles
240 rxdp
= rxq
->rx_ring
+ rxq
->rx_tail
;
242 rte_prefetch_non_temporal(rxdp
);
244 /* See if we need to rearm the RX queue - gives the prefetch a bit
247 if (rxq
->rxrearm_nb
> RTE_I40E_RXQ_REARM_THRESH
)
250 /* Before we start moving massive data around, check to see if
251 * there is actually a packet available
253 if (!(rxdp
->wb
.qword1
.status_error_len
&
254 rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT
)))
257 /* Cache is empty -> need to scan the buffer rings, but first move
258 * the next 'n' mbufs into the cache
260 sw_ring
= &rxq
->sw_ring
[rxq
->rx_tail
];
262 /* A. load 4 packet in one loop
263 * [A*. mask out 4 unused dirty field in desc]
264 * B. copy 4 mbuf point from swring to rx_pkts
265 * C. calc the number of DD bits among the 4 packets
266 * [C*. extract the end-of-packet bit, if requested]
267 * D. fill info. from desc to mbuf
270 for (pos
= 0, nb_pkts_recd
= 0; pos
< nb_pkts
;
271 pos
+= RTE_I40E_DESCS_PER_LOOP
,
272 rxdp
+= RTE_I40E_DESCS_PER_LOOP
) {
273 uint64x2_t descs
[RTE_I40E_DESCS_PER_LOOP
];
274 uint8x16_t pkt_mb1
, pkt_mb2
, pkt_mb3
, pkt_mb4
;
275 uint16x8x2_t sterr_tmp1
, sterr_tmp2
;
276 uint64x2_t mbp1
, mbp2
;
281 int32x4_t len_shl
= {0, 0, 0, PKTLEN_SHIFT
};
283 /* B.1 load 1 mbuf point */
284 mbp1
= vld1q_u64((uint64_t *)&sw_ring
[pos
]);
285 /* Read desc statuses backwards to avoid race condition */
286 /* A.1 load 4 pkts desc */
287 descs
[3] = vld1q_u64((uint64_t *)(rxdp
+ 3));
290 /* B.2 copy 2 mbuf point into rx_pkts */
291 vst1q_u64((uint64_t *)&rx_pkts
[pos
], mbp1
);
293 /* B.1 load 1 mbuf point */
294 mbp2
= vld1q_u64((uint64_t *)&sw_ring
[pos
+ 2]);
296 descs
[2] = vld1q_u64((uint64_t *)(rxdp
+ 2));
297 /* B.1 load 2 mbuf point */
298 descs
[1] = vld1q_u64((uint64_t *)(rxdp
+ 1));
299 descs
[0] = vld1q_u64((uint64_t *)(rxdp
));
301 /* B.2 copy 2 mbuf point into rx_pkts */
302 vst1q_u64((uint64_t *)&rx_pkts
[pos
+ 2], mbp2
);
305 rte_mbuf_prefetch_part2(rx_pkts
[pos
]);
306 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 1]);
307 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 2]);
308 rte_mbuf_prefetch_part2(rx_pkts
[pos
+ 3]);
311 /* avoid compiler reorder optimization */
312 rte_compiler_barrier();
314 /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/
315 uint32x4_t len3
= vshlq_u32(vreinterpretq_u32_u64(descs
[3]),
317 descs
[3] = vreinterpretq_u64_u32(len3
);
318 uint32x4_t len2
= vshlq_u32(vreinterpretq_u32_u64(descs
[2]),
320 descs
[2] = vreinterpretq_u64_u32(len2
);
322 /* D.1 pkt 3,4 convert format from desc to pktmbuf */
323 pkt_mb4
= vqtbl1q_u8(vreinterpretq_u8_u64(descs
[3]), shuf_msk
);
324 pkt_mb3
= vqtbl1q_u8(vreinterpretq_u8_u64(descs
[2]), shuf_msk
);
326 /* C.1 4=>2 filter staterr info only */
327 sterr_tmp2
= vzipq_u16(vreinterpretq_u16_u64(descs
[1]),
328 vreinterpretq_u16_u64(descs
[3]));
329 /* C.1 4=>2 filter staterr info only */
330 sterr_tmp1
= vzipq_u16(vreinterpretq_u16_u64(descs
[0]),
331 vreinterpretq_u16_u64(descs
[2]));
333 /* C.2 get 4 pkts staterr value */
334 staterr
= vzipq_u16(sterr_tmp1
.val
[1],
335 sterr_tmp2
.val
[1]).val
[0];
337 desc_to_olflags_v(rxq
, descs
, &rx_pkts
[pos
]);
339 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
340 tmp
= vsubq_u16(vreinterpretq_u16_u8(pkt_mb4
), crc_adjust
);
341 pkt_mb4
= vreinterpretq_u8_u16(tmp
);
342 tmp
= vsubq_u16(vreinterpretq_u16_u8(pkt_mb3
), crc_adjust
);
343 pkt_mb3
= vreinterpretq_u8_u16(tmp
);
345 /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/
346 uint32x4_t len1
= vshlq_u32(vreinterpretq_u32_u64(descs
[1]),
348 descs
[1] = vreinterpretq_u64_u32(len1
);
349 uint32x4_t len0
= vshlq_u32(vreinterpretq_u32_u64(descs
[0]),
351 descs
[0] = vreinterpretq_u64_u32(len0
);
353 /* D.1 pkt 1,2 convert format from desc to pktmbuf */
354 pkt_mb2
= vqtbl1q_u8(vreinterpretq_u8_u64(descs
[1]), shuf_msk
);
355 pkt_mb1
= vqtbl1q_u8(vreinterpretq_u8_u64(descs
[0]), shuf_msk
);
357 /* D.3 copy final 3,4 data to rx_pkts */
358 vst1q_u8((void *)&rx_pkts
[pos
+ 3]->rx_descriptor_fields1
,
360 vst1q_u8((void *)&rx_pkts
[pos
+ 2]->rx_descriptor_fields1
,
363 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */
364 tmp
= vsubq_u16(vreinterpretq_u16_u8(pkt_mb2
), crc_adjust
);
365 pkt_mb2
= vreinterpretq_u8_u16(tmp
);
366 tmp
= vsubq_u16(vreinterpretq_u16_u8(pkt_mb1
), crc_adjust
);
367 pkt_mb1
= vreinterpretq_u8_u16(tmp
);
369 /* C* extract and record EOP bit */
371 uint8x16_t eop_shuf_mask
= {
372 0x00, 0x02, 0x04, 0x06,
373 0xFF, 0xFF, 0xFF, 0xFF,
374 0xFF, 0xFF, 0xFF, 0xFF,
375 0xFF, 0xFF, 0xFF, 0xFF};
378 /* and with mask to extract bits, flipping 1-0 */
379 eop_bits
= vmvnq_u8(vreinterpretq_u8_u16(staterr
));
380 eop_bits
= vandq_u8(eop_bits
, eop_check
);
381 /* the staterr values are not in order, as the count
382 * count of dd bits doesn't care. However, for end of
383 * packet tracking, we do care, so shuffle. This also
384 * compresses the 32-bit values to 8-bit
386 eop_bits
= vqtbl1q_u8(eop_bits
, eop_shuf_mask
);
388 /* store the resulting 32-bit value */
389 vst1q_lane_u32((uint32_t *)split_packet
,
390 vreinterpretq_u32_u8(eop_bits
), 0);
391 split_packet
+= RTE_I40E_DESCS_PER_LOOP
;
393 /* zero-out next pointers */
394 rx_pkts
[pos
]->next
= NULL
;
395 rx_pkts
[pos
+ 1]->next
= NULL
;
396 rx_pkts
[pos
+ 2]->next
= NULL
;
397 rx_pkts
[pos
+ 3]->next
= NULL
;
400 staterr
= vshlq_n_u16(staterr
, I40E_UINT16_BIT
- 1);
401 staterr
= vreinterpretq_u16_s16(
402 vshrq_n_s16(vreinterpretq_s16_u16(staterr
),
403 I40E_UINT16_BIT
- 1));
404 stat
= ~vgetq_lane_u64(vreinterpretq_u64_u16(staterr
), 0);
406 rte_prefetch_non_temporal(rxdp
+ RTE_I40E_DESCS_PER_LOOP
);
408 /* D.3 copy final 1,2 data to rx_pkts */
409 vst1q_u8((void *)&rx_pkts
[pos
+ 1]->rx_descriptor_fields1
,
411 vst1q_u8((void *)&rx_pkts
[pos
]->rx_descriptor_fields1
,
413 desc_to_ptype_v(descs
, &rx_pkts
[pos
], ptype_tbl
);
414 /* C.4 calc avaialbe number of desc */
415 if (unlikely(stat
== 0)) {
416 nb_pkts_recd
+= RTE_I40E_DESCS_PER_LOOP
;
418 nb_pkts_recd
+= __builtin_ctzl(stat
) / I40E_UINT16_BIT
;
423 /* Update our internal tail pointer */
424 rxq
->rx_tail
= (uint16_t)(rxq
->rx_tail
+ nb_pkts_recd
);
425 rxq
->rx_tail
= (uint16_t)(rxq
->rx_tail
& (rxq
->nb_rx_desc
- 1));
426 rxq
->rxrearm_nb
= (uint16_t)(rxq
->rxrearm_nb
+ nb_pkts_recd
);
433 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
434 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
438 i40e_recv_pkts_vec(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
441 return _recv_raw_pkts_vec(rx_queue
, rx_pkts
, nb_pkts
, NULL
);
444 /* vPMD receive routine that reassembles scattered packets
446 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet
447 * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST
451 i40e_recv_scattered_pkts_vec(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
455 struct i40e_rx_queue
*rxq
= rx_queue
;
456 uint8_t split_flags
[RTE_I40E_VPMD_RX_BURST
] = {0};
458 /* get some new buffers */
459 uint16_t nb_bufs
= _recv_raw_pkts_vec(rxq
, rx_pkts
, nb_pkts
,
464 /* happy day case, full burst + no packets to be joined */
465 const uint64_t *split_fl64
= (uint64_t *)split_flags
;
467 if (rxq
->pkt_first_seg
== NULL
&&
468 split_fl64
[0] == 0 && split_fl64
[1] == 0 &&
469 split_fl64
[2] == 0 && split_fl64
[3] == 0)
472 /* reassemble any packets that need reassembly*/
475 if (rxq
->pkt_first_seg
== NULL
) {
476 /* find the first split flag, and only reassemble then*/
477 while (i
< nb_bufs
&& !split_flags
[i
])
482 return i
+ reassemble_packets(rxq
, &rx_pkts
[i
], nb_bufs
- i
,
487 vtx1(volatile struct i40e_tx_desc
*txdp
,
488 struct rte_mbuf
*pkt
, uint64_t flags
)
490 uint64_t high_qw
= (I40E_TX_DESC_DTYPE_DATA
|
491 ((uint64_t)flags
<< I40E_TXD_QW1_CMD_SHIFT
) |
492 ((uint64_t)pkt
->data_len
<< I40E_TXD_QW1_TX_BUF_SZ_SHIFT
));
494 uint64x2_t descriptor
= {pkt
->buf_iova
+ pkt
->data_off
, high_qw
};
495 vst1q_u64((uint64_t *)txdp
, descriptor
);
499 vtx(volatile struct i40e_tx_desc
*txdp
,
500 struct rte_mbuf
**pkt
, uint16_t nb_pkts
, uint64_t flags
)
504 for (i
= 0; i
< nb_pkts
; ++i
, ++txdp
, ++pkt
)
505 vtx1(txdp
, *pkt
, flags
);
509 i40e_xmit_fixed_burst_vec(void *tx_queue
, struct rte_mbuf
**tx_pkts
,
512 struct i40e_tx_queue
*txq
= (struct i40e_tx_queue
*)tx_queue
;
513 volatile struct i40e_tx_desc
*txdp
;
514 struct i40e_tx_entry
*txep
;
515 uint16_t n
, nb_commit
, tx_id
;
516 uint64_t flags
= I40E_TD_CMD
;
517 uint64_t rs
= I40E_TX_DESC_CMD_RS
| I40E_TD_CMD
;
520 /* cross rx_thresh boundary is not allowed */
521 nb_pkts
= RTE_MIN(nb_pkts
, txq
->tx_rs_thresh
);
523 if (txq
->nb_tx_free
< txq
->tx_free_thresh
)
524 i40e_tx_free_bufs(txq
);
526 nb_commit
= nb_pkts
= (uint16_t)RTE_MIN(txq
->nb_tx_free
, nb_pkts
);
527 if (unlikely(nb_pkts
== 0))
530 tx_id
= txq
->tx_tail
;
531 txdp
= &txq
->tx_ring
[tx_id
];
532 txep
= &txq
->sw_ring
[tx_id
];
534 txq
->nb_tx_free
= (uint16_t)(txq
->nb_tx_free
- nb_pkts
);
536 n
= (uint16_t)(txq
->nb_tx_desc
- tx_id
);
537 if (nb_commit
>= n
) {
538 tx_backlog_entry(txep
, tx_pkts
, n
);
540 for (i
= 0; i
< n
- 1; ++i
, ++tx_pkts
, ++txdp
)
541 vtx1(txdp
, *tx_pkts
, flags
);
543 vtx1(txdp
, *tx_pkts
++, rs
);
545 nb_commit
= (uint16_t)(nb_commit
- n
);
548 txq
->tx_next_rs
= (uint16_t)(txq
->tx_rs_thresh
- 1);
550 /* avoid reach the end of ring */
551 txdp
= &txq
->tx_ring
[tx_id
];
552 txep
= &txq
->sw_ring
[tx_id
];
555 tx_backlog_entry(txep
, tx_pkts
, nb_commit
);
557 vtx(txdp
, tx_pkts
, nb_commit
, flags
);
559 tx_id
= (uint16_t)(tx_id
+ nb_commit
);
560 if (tx_id
> txq
->tx_next_rs
) {
561 txq
->tx_ring
[txq
->tx_next_rs
].cmd_type_offset_bsz
|=
562 rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS
) <<
563 I40E_TXD_QW1_CMD_SHIFT
);
565 (uint16_t)(txq
->tx_next_rs
+ txq
->tx_rs_thresh
);
568 txq
->tx_tail
= tx_id
;
570 I40E_PCI_REG_WRITE(txq
->qtx_tail
, txq
->tx_tail
);
575 void __attribute__((cold
))
576 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue
*rxq
)
578 _i40e_rx_queue_release_mbufs_vec(rxq
);
581 int __attribute__((cold
))
582 i40e_rxq_vec_setup(struct i40e_rx_queue
*rxq
)
584 return i40e_rxq_vec_setup_default(rxq
);
587 int __attribute__((cold
))
588 i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused
*txq
)
593 int __attribute__((cold
))
594 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev
*dev
)
596 return i40e_rx_vec_dev_conf_condition_check_default(dev
);