]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2015 Intel Corporation. | |
3 | * Copyright(c) 2016-2018, Linaro Limited. | |
7c673cae FG |
4 | */ |
5 | ||
6 | #include <stdint.h> | |
9f95a23c | 7 | #include <rte_ethdev_driver.h> |
7c673cae FG |
8 | #include <rte_malloc.h> |
9 | ||
10 | #include "base/i40e_prototype.h" | |
11 | #include "base/i40e_type.h" | |
12 | #include "i40e_ethdev.h" | |
13 | #include "i40e_rxtx.h" | |
14 | #include "i40e_rxtx_vec_common.h" | |
15 | ||
16 | #include <arm_neon.h> | |
17 | ||
18 | #pragma GCC diagnostic ignored "-Wcast-qual" | |
19 | ||
20 | static inline void | |
21 | i40e_rxq_rearm(struct i40e_rx_queue *rxq) | |
22 | { | |
23 | int i; | |
24 | uint16_t rx_id; | |
25 | volatile union i40e_rx_desc *rxdp; | |
26 | struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; | |
27 | struct rte_mbuf *mb0, *mb1; | |
28 | uint64x2_t dma_addr0, dma_addr1; | |
29 | uint64x2_t zero = vdupq_n_u64(0); | |
30 | uint64_t paddr; | |
7c673cae FG |
31 | |
32 | rxdp = rxq->rx_ring + rxq->rxrearm_start; | |
33 | ||
34 | /* Pull 'n' more MBUFs into the software ring */ | |
35 | if (unlikely(rte_mempool_get_bulk(rxq->mp, | |
36 | (void *)rxep, | |
37 | RTE_I40E_RXQ_REARM_THRESH) < 0)) { | |
38 | if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= | |
39 | rxq->nb_rx_desc) { | |
40 | for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { | |
41 | rxep[i].mbuf = &rxq->fake_mbuf; | |
42 | vst1q_u64((uint64_t *)&rxdp[i].read, zero); | |
43 | } | |
44 | } | |
45 | rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += | |
46 | RTE_I40E_RXQ_REARM_THRESH; | |
47 | return; | |
48 | } | |
49 | ||
7c673cae FG |
50 | /* Initialize the mbufs in vector, process 2 mbufs in one loop */ |
51 | for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { | |
52 | mb0 = rxep[0].mbuf; | |
53 | mb1 = rxep[1].mbuf; | |
54 | ||
9f95a23c | 55 | paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; |
7c673cae FG |
56 | dma_addr0 = vdupq_n_u64(paddr); |
57 | ||
58 | /* flush desc with pa dma_addr */ | |
59 | vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); | |
60 | ||
9f95a23c | 61 | paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; |
7c673cae FG |
62 | dma_addr1 = vdupq_n_u64(paddr); |
63 | vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); | |
64 | } | |
65 | ||
66 | rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; | |
67 | if (rxq->rxrearm_start >= rxq->nb_rx_desc) | |
68 | rxq->rxrearm_start = 0; | |
69 | ||
70 | rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; | |
71 | ||
72 | rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? | |
73 | (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); | |
74 | ||
75 | /* Update the tail pointer on the NIC */ | |
76 | I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); | |
77 | } | |
78 | ||
7c673cae | 79 | static inline void |
11fdf7f2 TL |
80 | desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4], |
81 | struct rte_mbuf **rx_pkts) | |
7c673cae FG |
82 | { |
83 | uint32x4_t vlan0, vlan1, rss, l3_l4e; | |
11fdf7f2 TL |
84 | const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0}; |
85 | uint64x2_t rearm0, rearm1, rearm2, rearm3; | |
7c673cae FG |
86 | |
87 | /* mask everything except RSS, flow director and VLAN flags | |
88 | * bit2 is for VLAN tag, bit11 for flow director indication | |
89 | * bit13:12 for RSS indication. | |
90 | */ | |
91 | const uint32x4_t rss_vlan_msk = { | |
92 | 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804}; | |
93 | ||
11fdf7f2 TL |
94 | const uint32x4_t cksum_mask = { |
95 | PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | | |
96 | PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | | |
97 | PKT_RX_EIP_CKSUM_BAD, | |
98 | PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | | |
99 | PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | | |
100 | PKT_RX_EIP_CKSUM_BAD, | |
101 | PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | | |
102 | PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | | |
103 | PKT_RX_EIP_CKSUM_BAD, | |
104 | PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | | |
105 | PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | | |
106 | PKT_RX_EIP_CKSUM_BAD}; | |
107 | ||
7c673cae FG |
108 | /* map rss and vlan type to rss hash and vlan flag */ |
109 | const uint8x16_t vlan_flags = { | |
110 | 0, 0, 0, 0, | |
9f95a23c | 111 | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, 0, 0, |
7c673cae FG |
112 | 0, 0, 0, 0, |
113 | 0, 0, 0, 0}; | |
114 | ||
115 | const uint8x16_t rss_flags = { | |
116 | 0, PKT_RX_FDIR, 0, 0, | |
117 | 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR, | |
118 | 0, 0, 0, 0, | |
119 | 0, 0, 0, 0}; | |
120 | ||
121 | const uint8x16_t l3_l4e_flags = { | |
11fdf7f2 TL |
122 | (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1, |
123 | PKT_RX_IP_CKSUM_BAD >> 1, | |
124 | (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, | |
125 | (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, | |
126 | (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, | |
127 | (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, | |
128 | (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | | |
129 | PKT_RX_L4_CKSUM_BAD) >> 1, | |
130 | (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | | |
131 | PKT_RX_IP_CKSUM_BAD) >> 1, | |
7c673cae FG |
132 | 0, 0, 0, 0, 0, 0, 0, 0}; |
133 | ||
134 | vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), | |
135 | vreinterpretq_u32_u64(descs[2])).val[1]; | |
136 | vlan1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), | |
137 | vreinterpretq_u32_u64(descs[3])).val[1]; | |
138 | vlan0 = vzipq_u32(vlan0, vlan1).val[0]; | |
139 | ||
140 | vlan1 = vandq_u32(vlan0, rss_vlan_msk); | |
141 | vlan0 = vreinterpretq_u32_u8(vqtbl1q_u8(vlan_flags, | |
142 | vreinterpretq_u8_u32(vlan1))); | |
143 | ||
144 | rss = vshrq_n_u32(vlan1, 11); | |
145 | rss = vreinterpretq_u32_u8(vqtbl1q_u8(rss_flags, | |
146 | vreinterpretq_u8_u32(rss))); | |
147 | ||
148 | l3_l4e = vshrq_n_u32(vlan1, 22); | |
149 | l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags, | |
150 | vreinterpretq_u8_u32(l3_l4e))); | |
11fdf7f2 TL |
151 | /* then we shift left 1 bit */ |
152 | l3_l4e = vshlq_n_u32(l3_l4e, 1); | |
153 | /* we need to mask out the reduntant bits */ | |
154 | l3_l4e = vandq_u32(l3_l4e, cksum_mask); | |
7c673cae FG |
155 | |
156 | vlan0 = vorrq_u32(vlan0, rss); | |
157 | vlan0 = vorrq_u32(vlan0, l3_l4e); | |
158 | ||
11fdf7f2 TL |
159 | rearm0 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 0), mbuf_init, 1); |
160 | rearm1 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 1), mbuf_init, 1); | |
161 | rearm2 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 2), mbuf_init, 1); | |
162 | rearm3 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 3), mbuf_init, 1); | |
163 | ||
164 | vst1q_u64((uint64_t *)&rx_pkts[0]->rearm_data, rearm0); | |
165 | vst1q_u64((uint64_t *)&rx_pkts[1]->rearm_data, rearm1); | |
166 | vst1q_u64((uint64_t *)&rx_pkts[2]->rearm_data, rearm2); | |
167 | vst1q_u64((uint64_t *)&rx_pkts[3]->rearm_data, rearm3); | |
7c673cae | 168 | } |
7c673cae FG |
169 | |
170 | #define PKTLEN_SHIFT 10 | |
9f95a23c | 171 | #define I40E_UINT16_BIT (CHAR_BIT * sizeof(uint16_t)) |
7c673cae FG |
172 | |
173 | static inline void | |
11fdf7f2 TL |
174 | desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts, |
175 | uint32_t *ptype_tbl) | |
7c673cae FG |
176 | { |
177 | int i; | |
178 | uint8_t ptype; | |
179 | uint8x16_t tmp; | |
180 | ||
181 | for (i = 0; i < 4; i++) { | |
182 | tmp = vreinterpretq_u8_u64(vshrq_n_u64(descs[i], 30)); | |
183 | ptype = vgetq_lane_u8(tmp, 8); | |
11fdf7f2 | 184 | rx_pkts[i]->packet_type = ptype_tbl[ptype]; |
7c673cae FG |
185 | } |
186 | ||
187 | } | |
188 | ||
189 | /* | |
190 | * Notice: | |
191 | * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet | |
192 | * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST | |
193 | * numbers of DD bits | |
194 | */ | |
195 | static inline uint16_t | |
196 | _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, | |
197 | uint16_t nb_pkts, uint8_t *split_packet) | |
198 | { | |
199 | volatile union i40e_rx_desc *rxdp; | |
200 | struct i40e_rx_entry *sw_ring; | |
201 | uint16_t nb_pkts_recd; | |
202 | int pos; | |
11fdf7f2 | 203 | uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; |
7c673cae FG |
204 | |
205 | /* mask to shuffle from desc. to mbuf */ | |
206 | uint8x16_t shuf_msk = { | |
207 | 0xFF, 0xFF, /* pkt_type set as unknown */ | |
208 | 0xFF, 0xFF, /* pkt_type set as unknown */ | |
209 | 14, 15, /* octet 15~14, low 16 bits pkt_len */ | |
210 | 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ | |
211 | 14, 15, /* octet 15~14, 16 bits data_len */ | |
212 | 2, 3, /* octet 2~3, low 16 bits vlan_macip */ | |
213 | 4, 5, 6, 7 /* octet 4~7, 32bits rss */ | |
214 | }; | |
215 | ||
216 | uint8x16_t eop_check = { | |
217 | 0x02, 0x00, 0x02, 0x00, | |
218 | 0x02, 0x00, 0x02, 0x00, | |
219 | 0x00, 0x00, 0x00, 0x00, | |
220 | 0x00, 0x00, 0x00, 0x00 | |
221 | }; | |
222 | ||
223 | uint16x8_t crc_adjust = { | |
224 | 0, 0, /* ignore pkt_type field */ | |
225 | rxq->crc_len, /* sub crc on pkt_len */ | |
226 | 0, /* ignore high-16bits of pkt_len */ | |
227 | rxq->crc_len, /* sub crc on data_len */ | |
228 | 0, 0, 0 /* ignore non-length fields */ | |
229 | }; | |
230 | ||
231 | /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ | |
232 | nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); | |
233 | ||
234 | /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ | |
235 | nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); | |
236 | ||
237 | /* Just the act of getting into the function from the application is | |
238 | * going to cost about 7 cycles | |
239 | */ | |
240 | rxdp = rxq->rx_ring + rxq->rx_tail; | |
241 | ||
242 | rte_prefetch_non_temporal(rxdp); | |
243 | ||
244 | /* See if we need to rearm the RX queue - gives the prefetch a bit | |
245 | * of time to act | |
246 | */ | |
247 | if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) | |
248 | i40e_rxq_rearm(rxq); | |
249 | ||
250 | /* Before we start moving massive data around, check to see if | |
251 | * there is actually a packet available | |
252 | */ | |
253 | if (!(rxdp->wb.qword1.status_error_len & | |
254 | rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) | |
255 | return 0; | |
256 | ||
257 | /* Cache is empty -> need to scan the buffer rings, but first move | |
258 | * the next 'n' mbufs into the cache | |
259 | */ | |
260 | sw_ring = &rxq->sw_ring[rxq->rx_tail]; | |
261 | ||
262 | /* A. load 4 packet in one loop | |
263 | * [A*. mask out 4 unused dirty field in desc] | |
264 | * B. copy 4 mbuf point from swring to rx_pkts | |
265 | * C. calc the number of DD bits among the 4 packets | |
266 | * [C*. extract the end-of-packet bit, if requested] | |
267 | * D. fill info. from desc to mbuf | |
268 | */ | |
269 | ||
270 | for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; | |
271 | pos += RTE_I40E_DESCS_PER_LOOP, | |
272 | rxdp += RTE_I40E_DESCS_PER_LOOP) { | |
273 | uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP]; | |
274 | uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; | |
275 | uint16x8x2_t sterr_tmp1, sterr_tmp2; | |
276 | uint64x2_t mbp1, mbp2; | |
277 | uint16x8_t staterr; | |
278 | uint16x8_t tmp; | |
279 | uint64_t stat; | |
280 | ||
281 | int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT}; | |
282 | ||
283 | /* B.1 load 1 mbuf point */ | |
284 | mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); | |
285 | /* Read desc statuses backwards to avoid race condition */ | |
286 | /* A.1 load 4 pkts desc */ | |
287 | descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); | |
288 | rte_rmb(); | |
289 | ||
290 | /* B.2 copy 2 mbuf point into rx_pkts */ | |
291 | vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); | |
292 | ||
293 | /* B.1 load 1 mbuf point */ | |
294 | mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); | |
295 | ||
296 | descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); | |
297 | /* B.1 load 2 mbuf point */ | |
298 | descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); | |
299 | descs[0] = vld1q_u64((uint64_t *)(rxdp)); | |
300 | ||
301 | /* B.2 copy 2 mbuf point into rx_pkts */ | |
302 | vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); | |
303 | ||
304 | if (split_packet) { | |
305 | rte_mbuf_prefetch_part2(rx_pkts[pos]); | |
306 | rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); | |
307 | rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); | |
308 | rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); | |
309 | } | |
310 | ||
311 | /* avoid compiler reorder optimization */ | |
312 | rte_compiler_barrier(); | |
313 | ||
314 | /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ | |
315 | uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]), | |
316 | len_shl); | |
317 | descs[3] = vreinterpretq_u64_u32(len3); | |
318 | uint32x4_t len2 = vshlq_u32(vreinterpretq_u32_u64(descs[2]), | |
319 | len_shl); | |
320 | descs[2] = vreinterpretq_u64_u32(len2); | |
321 | ||
322 | /* D.1 pkt 3,4 convert format from desc to pktmbuf */ | |
323 | pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); | |
324 | pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); | |
325 | ||
326 | /* C.1 4=>2 filter staterr info only */ | |
327 | sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]), | |
328 | vreinterpretq_u16_u64(descs[3])); | |
329 | /* C.1 4=>2 filter staterr info only */ | |
330 | sterr_tmp1 = vzipq_u16(vreinterpretq_u16_u64(descs[0]), | |
331 | vreinterpretq_u16_u64(descs[2])); | |
332 | ||
333 | /* C.2 get 4 pkts staterr value */ | |
334 | staterr = vzipq_u16(sterr_tmp1.val[1], | |
335 | sterr_tmp2.val[1]).val[0]; | |
7c673cae | 336 | |
11fdf7f2 | 337 | desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); |
7c673cae FG |
338 | |
339 | /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ | |
340 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); | |
341 | pkt_mb4 = vreinterpretq_u8_u16(tmp); | |
342 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); | |
343 | pkt_mb3 = vreinterpretq_u8_u16(tmp); | |
344 | ||
345 | /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ | |
346 | uint32x4_t len1 = vshlq_u32(vreinterpretq_u32_u64(descs[1]), | |
347 | len_shl); | |
348 | descs[1] = vreinterpretq_u64_u32(len1); | |
349 | uint32x4_t len0 = vshlq_u32(vreinterpretq_u32_u64(descs[0]), | |
350 | len_shl); | |
351 | descs[0] = vreinterpretq_u64_u32(len0); | |
352 | ||
353 | /* D.1 pkt 1,2 convert format from desc to pktmbuf */ | |
354 | pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); | |
355 | pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); | |
356 | ||
357 | /* D.3 copy final 3,4 data to rx_pkts */ | |
358 | vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, | |
359 | pkt_mb4); | |
360 | vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, | |
361 | pkt_mb3); | |
362 | ||
363 | /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ | |
364 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); | |
365 | pkt_mb2 = vreinterpretq_u8_u16(tmp); | |
366 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); | |
367 | pkt_mb1 = vreinterpretq_u8_u16(tmp); | |
368 | ||
369 | /* C* extract and record EOP bit */ | |
370 | if (split_packet) { | |
371 | uint8x16_t eop_shuf_mask = { | |
372 | 0x00, 0x02, 0x04, 0x06, | |
373 | 0xFF, 0xFF, 0xFF, 0xFF, | |
374 | 0xFF, 0xFF, 0xFF, 0xFF, | |
375 | 0xFF, 0xFF, 0xFF, 0xFF}; | |
376 | uint8x16_t eop_bits; | |
377 | ||
378 | /* and with mask to extract bits, flipping 1-0 */ | |
379 | eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr)); | |
380 | eop_bits = vandq_u8(eop_bits, eop_check); | |
381 | /* the staterr values are not in order, as the count | |
382 | * count of dd bits doesn't care. However, for end of | |
383 | * packet tracking, we do care, so shuffle. This also | |
384 | * compresses the 32-bit values to 8-bit | |
385 | */ | |
386 | eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask); | |
387 | ||
388 | /* store the resulting 32-bit value */ | |
389 | vst1q_lane_u32((uint32_t *)split_packet, | |
390 | vreinterpretq_u32_u8(eop_bits), 0); | |
391 | split_packet += RTE_I40E_DESCS_PER_LOOP; | |
392 | ||
393 | /* zero-out next pointers */ | |
394 | rx_pkts[pos]->next = NULL; | |
395 | rx_pkts[pos + 1]->next = NULL; | |
396 | rx_pkts[pos + 2]->next = NULL; | |
397 | rx_pkts[pos + 3]->next = NULL; | |
398 | } | |
399 | ||
9f95a23c TL |
400 | staterr = vshlq_n_u16(staterr, I40E_UINT16_BIT - 1); |
401 | staterr = vreinterpretq_u16_s16( | |
402 | vshrq_n_s16(vreinterpretq_s16_u16(staterr), | |
403 | I40E_UINT16_BIT - 1)); | |
404 | stat = ~vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0); | |
405 | ||
7c673cae FG |
406 | rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP); |
407 | ||
408 | /* D.3 copy final 1,2 data to rx_pkts */ | |
409 | vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, | |
410 | pkt_mb2); | |
411 | vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1, | |
412 | pkt_mb1); | |
11fdf7f2 | 413 | desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); |
7c673cae | 414 | /* C.4 calc avaialbe number of desc */ |
9f95a23c TL |
415 | if (unlikely(stat == 0)) { |
416 | nb_pkts_recd += RTE_I40E_DESCS_PER_LOOP; | |
417 | } else { | |
418 | nb_pkts_recd += __builtin_ctzl(stat) / I40E_UINT16_BIT; | |
7c673cae | 419 | break; |
9f95a23c | 420 | } |
7c673cae FG |
421 | } |
422 | ||
423 | /* Update our internal tail pointer */ | |
424 | rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); | |
425 | rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); | |
426 | rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); | |
427 | ||
428 | return nb_pkts_recd; | |
429 | } | |
430 | ||
431 | /* | |
432 | * Notice: | |
433 | * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet | |
434 | * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST | |
435 | * numbers of DD bits | |
436 | */ | |
437 | uint16_t | |
438 | i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, | |
439 | uint16_t nb_pkts) | |
440 | { | |
441 | return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); | |
442 | } | |
443 | ||
444 | /* vPMD receive routine that reassembles scattered packets | |
445 | * Notice: | |
446 | * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet | |
447 | * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST | |
448 | * numbers of DD bits | |
449 | */ | |
450 | uint16_t | |
451 | i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, | |
452 | uint16_t nb_pkts) | |
453 | { | |
454 | ||
455 | struct i40e_rx_queue *rxq = rx_queue; | |
456 | uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; | |
457 | ||
458 | /* get some new buffers */ | |
459 | uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, | |
460 | split_flags); | |
461 | if (nb_bufs == 0) | |
462 | return 0; | |
463 | ||
464 | /* happy day case, full burst + no packets to be joined */ | |
465 | const uint64_t *split_fl64 = (uint64_t *)split_flags; | |
466 | ||
467 | if (rxq->pkt_first_seg == NULL && | |
468 | split_fl64[0] == 0 && split_fl64[1] == 0 && | |
469 | split_fl64[2] == 0 && split_fl64[3] == 0) | |
470 | return nb_bufs; | |
471 | ||
472 | /* reassemble any packets that need reassembly*/ | |
473 | unsigned i = 0; | |
474 | ||
475 | if (rxq->pkt_first_seg == NULL) { | |
476 | /* find the first split flag, and only reassemble then*/ | |
477 | while (i < nb_bufs && !split_flags[i]) | |
478 | i++; | |
479 | if (i == nb_bufs) | |
480 | return nb_bufs; | |
481 | } | |
482 | return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, | |
483 | &split_flags[i]); | |
484 | } | |
485 | ||
486 | static inline void | |
487 | vtx1(volatile struct i40e_tx_desc *txdp, | |
488 | struct rte_mbuf *pkt, uint64_t flags) | |
489 | { | |
490 | uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | | |
491 | ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | | |
492 | ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); | |
493 | ||
9f95a23c | 494 | uint64x2_t descriptor = {pkt->buf_iova + pkt->data_off, high_qw}; |
7c673cae FG |
495 | vst1q_u64((uint64_t *)txdp, descriptor); |
496 | } | |
497 | ||
498 | static inline void | |
499 | vtx(volatile struct i40e_tx_desc *txdp, | |
500 | struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) | |
501 | { | |
502 | int i; | |
503 | ||
504 | for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) | |
505 | vtx1(txdp, *pkt, flags); | |
506 | } | |
507 | ||
508 | uint16_t | |
11fdf7f2 TL |
509 | i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, |
510 | uint16_t nb_pkts) | |
7c673cae FG |
511 | { |
512 | struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; | |
513 | volatile struct i40e_tx_desc *txdp; | |
514 | struct i40e_tx_entry *txep; | |
515 | uint16_t n, nb_commit, tx_id; | |
516 | uint64_t flags = I40E_TD_CMD; | |
517 | uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; | |
518 | int i; | |
519 | ||
520 | /* cross rx_thresh boundary is not allowed */ | |
521 | nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); | |
522 | ||
523 | if (txq->nb_tx_free < txq->tx_free_thresh) | |
524 | i40e_tx_free_bufs(txq); | |
525 | ||
526 | nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); | |
527 | if (unlikely(nb_pkts == 0)) | |
528 | return 0; | |
529 | ||
530 | tx_id = txq->tx_tail; | |
531 | txdp = &txq->tx_ring[tx_id]; | |
532 | txep = &txq->sw_ring[tx_id]; | |
533 | ||
534 | txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); | |
535 | ||
536 | n = (uint16_t)(txq->nb_tx_desc - tx_id); | |
537 | if (nb_commit >= n) { | |
538 | tx_backlog_entry(txep, tx_pkts, n); | |
539 | ||
540 | for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) | |
541 | vtx1(txdp, *tx_pkts, flags); | |
542 | ||
543 | vtx1(txdp, *tx_pkts++, rs); | |
544 | ||
545 | nb_commit = (uint16_t)(nb_commit - n); | |
546 | ||
547 | tx_id = 0; | |
548 | txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); | |
549 | ||
550 | /* avoid reach the end of ring */ | |
551 | txdp = &txq->tx_ring[tx_id]; | |
552 | txep = &txq->sw_ring[tx_id]; | |
553 | } | |
554 | ||
555 | tx_backlog_entry(txep, tx_pkts, nb_commit); | |
556 | ||
557 | vtx(txdp, tx_pkts, nb_commit, flags); | |
558 | ||
559 | tx_id = (uint16_t)(tx_id + nb_commit); | |
560 | if (tx_id > txq->tx_next_rs) { | |
561 | txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= | |
562 | rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << | |
563 | I40E_TXD_QW1_CMD_SHIFT); | |
564 | txq->tx_next_rs = | |
565 | (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); | |
566 | } | |
567 | ||
568 | txq->tx_tail = tx_id; | |
569 | ||
570 | I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); | |
571 | ||
572 | return nb_pkts; | |
573 | } | |
574 | ||
575 | void __attribute__((cold)) | |
576 | i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) | |
577 | { | |
578 | _i40e_rx_queue_release_mbufs_vec(rxq); | |
579 | } | |
580 | ||
581 | int __attribute__((cold)) | |
582 | i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) | |
583 | { | |
584 | return i40e_rxq_vec_setup_default(rxq); | |
585 | } | |
586 | ||
587 | int __attribute__((cold)) | |
588 | i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) | |
589 | { | |
590 | return 0; | |
591 | } | |
592 | ||
593 | int __attribute__((cold)) | |
594 | i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) | |
595 | { | |
596 | return i40e_rx_vec_dev_conf_condition_check_default(dev); | |
597 | } |