]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2015 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | #include <stdint.h> | |
11fdf7f2 | 6 | #include <rte_ethdev_driver.h> |
7c673cae FG |
7 | #include <rte_malloc.h> |
8 | ||
9 | #include "ixgbe_ethdev.h" | |
10 | #include "ixgbe_rxtx.h" | |
11 | #include "ixgbe_rxtx_vec_common.h" | |
12 | ||
13 | #include <arm_neon.h> | |
14 | ||
15 | #pragma GCC diagnostic ignored "-Wcast-qual" | |
16 | ||
17 | static inline void | |
18 | ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) | |
19 | { | |
20 | int i; | |
21 | uint16_t rx_id; | |
22 | volatile union ixgbe_adv_rx_desc *rxdp; | |
23 | struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; | |
24 | struct rte_mbuf *mb0, *mb1; | |
25 | uint64x2_t dma_addr0, dma_addr1; | |
26 | uint64x2_t zero = vdupq_n_u64(0); | |
27 | uint64_t paddr; | |
28 | uint8x8_t p; | |
29 | ||
30 | rxdp = rxq->rx_ring + rxq->rxrearm_start; | |
31 | ||
32 | /* Pull 'n' more MBUFs into the software ring */ | |
33 | if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, | |
34 | (void *)rxep, | |
35 | RTE_IXGBE_RXQ_REARM_THRESH) < 0)) { | |
36 | if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >= | |
37 | rxq->nb_rx_desc) { | |
38 | for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) { | |
39 | rxep[i].mbuf = &rxq->fake_mbuf; | |
40 | vst1q_u64((uint64_t *)&rxdp[i].read, | |
41 | zero); | |
42 | } | |
43 | } | |
44 | rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += | |
45 | RTE_IXGBE_RXQ_REARM_THRESH; | |
46 | return; | |
47 | } | |
48 | ||
49 | p = vld1_u8((uint8_t *)&rxq->mbuf_initializer); | |
50 | ||
51 | /* Initialize the mbufs in vector, process 2 mbufs in one loop */ | |
52 | for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { | |
53 | mb0 = rxep[0].mbuf; | |
54 | mb1 = rxep[1].mbuf; | |
55 | ||
56 | /* | |
57 | * Flush mbuf with pkt template. | |
58 | * Data to be rearmed is 6 bytes long. | |
7c673cae FG |
59 | */ |
60 | vst1_u8((uint8_t *)&mb0->rearm_data, p); | |
11fdf7f2 | 61 | paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; |
7c673cae FG |
62 | dma_addr0 = vsetq_lane_u64(paddr, zero, 0); |
63 | /* flush desc with pa dma_addr */ | |
64 | vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); | |
65 | ||
66 | vst1_u8((uint8_t *)&mb1->rearm_data, p); | |
11fdf7f2 | 67 | paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; |
7c673cae FG |
68 | dma_addr1 = vsetq_lane_u64(paddr, zero, 0); |
69 | vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); | |
70 | } | |
71 | ||
72 | rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH; | |
73 | if (rxq->rxrearm_start >= rxq->nb_rx_desc) | |
74 | rxq->rxrearm_start = 0; | |
75 | ||
76 | rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH; | |
77 | ||
78 | rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? | |
79 | (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); | |
80 | ||
81 | /* Update the tail pointer on the NIC */ | |
82 | IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id); | |
83 | } | |
84 | ||
7c673cae FG |
85 | #define VTAG_SHIFT (3) |
86 | ||
87 | static inline void | |
88 | desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, | |
89 | uint8x16_t staterr, struct rte_mbuf **rx_pkts) | |
90 | { | |
91 | uint8x16_t ptype; | |
92 | uint8x16_t vtag; | |
93 | ||
94 | union { | |
95 | uint8_t e[4]; | |
96 | uint32_t word; | |
97 | } vol; | |
98 | ||
99 | const uint8x16_t pkttype_msk = { | |
11fdf7f2 TL |
100 | PKT_RX_VLAN, PKT_RX_VLAN, |
101 | PKT_RX_VLAN, PKT_RX_VLAN, | |
7c673cae FG |
102 | 0x00, 0x00, 0x00, 0x00, |
103 | 0x00, 0x00, 0x00, 0x00, | |
104 | 0x00, 0x00, 0x00, 0x00}; | |
105 | ||
106 | const uint8x16_t rsstype_msk = { | |
107 | 0x0F, 0x0F, 0x0F, 0x0F, | |
108 | 0x00, 0x00, 0x00, 0x00, | |
109 | 0x00, 0x00, 0x00, 0x00, | |
110 | 0x00, 0x00, 0x00, 0x00}; | |
111 | ||
112 | const uint8x16_t rss_flags = { | |
113 | 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, | |
114 | 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, | |
115 | PKT_RX_RSS_HASH, 0, 0, 0, | |
116 | 0, 0, 0, PKT_RX_FDIR}; | |
117 | ||
118 | ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0]; | |
119 | ptype = vandq_u8(ptype, rsstype_msk); | |
120 | ptype = vqtbl1q_u8(rss_flags, ptype); | |
121 | ||
122 | vtag = vshrq_n_u8(staterr, VTAG_SHIFT); | |
123 | vtag = vandq_u8(vtag, pkttype_msk); | |
124 | vtag = vorrq_u8(ptype, vtag); | |
125 | ||
126 | vol.word = vgetq_lane_u32(vreinterpretq_u32_u8(vtag), 0); | |
127 | ||
128 | rx_pkts[0]->ol_flags = vol.e[0]; | |
129 | rx_pkts[1]->ol_flags = vol.e[1]; | |
130 | rx_pkts[2]->ol_flags = vol.e[2]; | |
131 | rx_pkts[3]->ol_flags = vol.e[3]; | |
132 | } | |
7c673cae FG |
133 | |
134 | /* | |
135 | * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) | |
136 | * | |
137 | * Notice: | |
138 | * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet | |
139 | * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST | |
140 | * numbers of DD bit | |
141 | * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two | |
142 | * - don't support ol_flags for rss and csum err | |
143 | */ | |
144 | ||
7c673cae | 145 | #define IXGBE_VPMD_DESC_EOP_MASK 0x02020202 |
f67539c2 TL |
146 | #define IXGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t)) |
147 | ||
148 | static inline uint32_t | |
149 | get_packet_type(uint32_t pkt_info, | |
150 | uint32_t etqf_check, | |
151 | uint32_t tunnel_check) | |
152 | { | |
153 | if (etqf_check) | |
154 | return RTE_PTYPE_UNKNOWN; | |
155 | ||
156 | if (tunnel_check) { | |
157 | pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL; | |
158 | return ptype_table_tn[pkt_info]; | |
159 | } | |
160 | ||
161 | pkt_info &= IXGBE_PACKET_TYPE_MASK_82599; | |
162 | return ptype_table[pkt_info]; | |
163 | } | |
164 | ||
165 | static inline void | |
166 | desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask, | |
167 | struct rte_mbuf **rx_pkts) | |
168 | { | |
169 | uint32x4_t etqf_check, tunnel_check; | |
170 | uint32x4_t etqf_mask = vdupq_n_u32(0x8000); | |
171 | uint32x4_t tunnel_mask = vdupq_n_u32(0x10000); | |
172 | uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask); | |
173 | uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), | |
174 | vreinterpretq_u32_u64(descs[2])).val[0]; | |
175 | uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), | |
176 | vreinterpretq_u32_u64(descs[3])).val[0]; | |
177 | ||
178 | /* interleave low 32 bits, | |
179 | * now we have 4 ptypes in a NEON register | |
180 | */ | |
181 | ptype0 = vzipq_u32(ptype0, ptype1).val[0]; | |
182 | ||
183 | /* mask etqf bits */ | |
184 | etqf_check = vandq_u32(ptype0, etqf_mask); | |
185 | /* mask tunnel bits */ | |
186 | tunnel_check = vandq_u32(ptype0, tunnel_mask); | |
187 | ||
188 | /* shift right by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */ | |
189 | ptype0 = vandq_u32(vshrq_n_u32(ptype0, IXGBE_PACKET_TYPE_SHIFT), | |
190 | ptype_mask); | |
191 | ||
192 | rx_pkts[0]->packet_type = | |
193 | get_packet_type(vgetq_lane_u32(ptype0, 0), | |
194 | vgetq_lane_u32(etqf_check, 0), | |
195 | vgetq_lane_u32(tunnel_check, 0)); | |
196 | rx_pkts[1]->packet_type = | |
197 | get_packet_type(vgetq_lane_u32(ptype0, 1), | |
198 | vgetq_lane_u32(etqf_check, 1), | |
199 | vgetq_lane_u32(tunnel_check, 1)); | |
200 | rx_pkts[2]->packet_type = | |
201 | get_packet_type(vgetq_lane_u32(ptype0, 2), | |
202 | vgetq_lane_u32(etqf_check, 2), | |
203 | vgetq_lane_u32(tunnel_check, 2)); | |
204 | rx_pkts[3]->packet_type = | |
205 | get_packet_type(vgetq_lane_u32(ptype0, 3), | |
206 | vgetq_lane_u32(etqf_check, 3), | |
207 | vgetq_lane_u32(tunnel_check, 3)); | |
208 | } | |
7c673cae FG |
209 | |
210 | static inline uint16_t | |
211 | _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, | |
212 | uint16_t nb_pkts, uint8_t *split_packet) | |
213 | { | |
214 | volatile union ixgbe_adv_rx_desc *rxdp; | |
215 | struct ixgbe_rx_entry *sw_ring; | |
216 | uint16_t nb_pkts_recd; | |
217 | int pos; | |
7c673cae FG |
218 | uint8x16_t shuf_msk = { |
219 | 0xFF, 0xFF, | |
220 | 0xFF, 0xFF, /* skip 32 bits pkt_type */ | |
221 | 12, 13, /* octet 12~13, low 16 bits pkt_len */ | |
222 | 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ | |
223 | 12, 13, /* octet 12~13, 16 bits data_len */ | |
224 | 14, 15, /* octet 14~15, low 16 bits vlan_macip */ | |
225 | 4, 5, 6, 7 /* octet 4~7, 32bits rss */ | |
226 | }; | |
227 | uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0, | |
228 | rxq->crc_len, 0, 0, 0}; | |
229 | ||
230 | /* nb_pkts shall be less equal than RTE_IXGBE_MAX_RX_BURST */ | |
231 | nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_MAX_RX_BURST); | |
232 | ||
233 | /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ | |
234 | nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); | |
235 | ||
236 | /* Just the act of getting into the function from the application is | |
237 | * going to cost about 7 cycles | |
238 | */ | |
239 | rxdp = rxq->rx_ring + rxq->rx_tail; | |
240 | ||
241 | rte_prefetch_non_temporal(rxdp); | |
242 | ||
243 | /* See if we need to rearm the RX queue - gives the prefetch a bit | |
244 | * of time to act | |
245 | */ | |
246 | if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH) | |
247 | ixgbe_rxq_rearm(rxq); | |
248 | ||
249 | /* Before we start moving massive data around, check to see if | |
250 | * there is actually a packet available | |
251 | */ | |
252 | if (!(rxdp->wb.upper.status_error & | |
253 | rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) | |
254 | return 0; | |
255 | ||
256 | /* Cache is empty -> need to scan the buffer rings, but first move | |
257 | * the next 'n' mbufs into the cache | |
258 | */ | |
259 | sw_ring = &rxq->sw_ring[rxq->rx_tail]; | |
260 | ||
261 | /* A. load 4 packet in one loop | |
262 | * B. copy 4 mbuf point from swring to rx_pkts | |
263 | * C. calc the number of DD bits among the 4 packets | |
264 | * [C*. extract the end-of-packet bit, if requested] | |
265 | * D. fill info. from desc to mbuf | |
266 | */ | |
267 | for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; | |
268 | pos += RTE_IXGBE_DESCS_PER_LOOP, | |
269 | rxdp += RTE_IXGBE_DESCS_PER_LOOP) { | |
270 | uint64x2_t descs[RTE_IXGBE_DESCS_PER_LOOP]; | |
271 | uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; | |
272 | uint8x16x2_t sterr_tmp1, sterr_tmp2; | |
273 | uint64x2_t mbp1, mbp2; | |
274 | uint8x16_t staterr; | |
275 | uint16x8_t tmp; | |
276 | uint32_t stat; | |
277 | ||
f67539c2 | 278 | /* B.1 load 2 mbuf point */ |
7c673cae FG |
279 | mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); |
280 | ||
7c673cae FG |
281 | /* B.2 copy 2 mbuf point into rx_pkts */ |
282 | vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); | |
283 | ||
f67539c2 | 284 | /* B.1 load 2 mbuf point */ |
7c673cae FG |
285 | mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); |
286 | ||
11fdf7f2 | 287 | /* A. load 4 pkts descs */ |
7c673cae | 288 | descs[0] = vld1q_u64((uint64_t *)(rxdp)); |
11fdf7f2 TL |
289 | descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); |
290 | descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); | |
291 | descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); | |
7c673cae FG |
292 | |
293 | /* B.2 copy 2 mbuf point into rx_pkts */ | |
294 | vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); | |
295 | ||
296 | if (split_packet) { | |
297 | rte_mbuf_prefetch_part2(rx_pkts[pos]); | |
298 | rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); | |
299 | rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); | |
300 | rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); | |
301 | } | |
302 | ||
303 | /* D.1 pkt 3,4 convert format from desc to pktmbuf */ | |
304 | pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); | |
305 | pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); | |
306 | ||
307 | /* D.1 pkt 1,2 convert format from desc to pktmbuf */ | |
308 | pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); | |
309 | pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); | |
310 | ||
311 | /* C.1 4=>2 filter staterr info only */ | |
312 | sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]), | |
313 | vreinterpretq_u8_u64(descs[3])); | |
314 | /* C.1 4=>2 filter staterr info only */ | |
315 | sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]), | |
316 | vreinterpretq_u8_u64(descs[2])); | |
317 | ||
318 | /* C.2 get 4 pkts staterr value */ | |
319 | staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0]; | |
7c673cae FG |
320 | |
321 | /* set ol_flags with vlan packet type */ | |
322 | desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, | |
323 | &rx_pkts[pos]); | |
324 | ||
325 | /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ | |
326 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); | |
327 | pkt_mb4 = vreinterpretq_u8_u16(tmp); | |
328 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); | |
329 | pkt_mb3 = vreinterpretq_u8_u16(tmp); | |
330 | ||
331 | /* D.3 copy final 3,4 data to rx_pkts */ | |
332 | vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, | |
333 | pkt_mb4); | |
334 | vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, | |
335 | pkt_mb3); | |
336 | ||
337 | /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ | |
338 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); | |
339 | pkt_mb2 = vreinterpretq_u8_u16(tmp); | |
340 | tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); | |
341 | pkt_mb1 = vreinterpretq_u8_u16(tmp); | |
342 | ||
343 | /* C* extract and record EOP bit */ | |
344 | if (split_packet) { | |
f67539c2 | 345 | stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); |
7c673cae FG |
346 | /* and with mask to extract bits, flipping 1-0 */ |
347 | *(int *)split_packet = ~stat & IXGBE_VPMD_DESC_EOP_MASK; | |
348 | ||
349 | split_packet += RTE_IXGBE_DESCS_PER_LOOP; | |
7c673cae FG |
350 | } |
351 | ||
f67539c2 TL |
352 | /* C.4 expand DD bit to saturate UINT8 */ |
353 | staterr = vshlq_n_u8(staterr, IXGBE_UINT8_BIT - 1); | |
354 | staterr = vreinterpretq_u8_s8 | |
355 | (vshrq_n_s8(vreinterpretq_s8_u8(staterr), | |
356 | IXGBE_UINT8_BIT - 1)); | |
357 | stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); | |
358 | ||
7c673cae FG |
359 | rte_prefetch_non_temporal(rxdp + RTE_IXGBE_DESCS_PER_LOOP); |
360 | ||
361 | /* D.3 copy final 1,2 data to rx_pkts */ | |
362 | vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1, | |
363 | pkt_mb2); | |
364 | vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1, | |
365 | pkt_mb1); | |
366 | ||
f67539c2 | 367 | desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]); |
11fdf7f2 | 368 | |
f67539c2 TL |
369 | /* C.5 calc available number of desc */ |
370 | if (unlikely(stat == 0)) { | |
11fdf7f2 | 371 | nb_pkts_recd += RTE_IXGBE_DESCS_PER_LOOP; |
f67539c2 TL |
372 | } else { |
373 | nb_pkts_recd += __builtin_ctz(stat) / IXGBE_UINT8_BIT; | |
374 | break; | |
11fdf7f2 | 375 | } |
7c673cae FG |
376 | } |
377 | ||
378 | /* Update our internal tail pointer */ | |
379 | rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); | |
380 | rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); | |
381 | rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); | |
382 | ||
383 | return nb_pkts_recd; | |
384 | } | |
385 | ||
386 | /* | |
387 | * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) | |
388 | * | |
389 | * Notice: | |
390 | * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet | |
391 | * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST | |
392 | * numbers of DD bit | |
393 | * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two | |
394 | * - don't support ol_flags for rss and csum err | |
395 | */ | |
396 | uint16_t | |
397 | ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, | |
398 | uint16_t nb_pkts) | |
399 | { | |
400 | return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); | |
401 | } | |
402 | ||
403 | /* | |
404 | * vPMD receive routine that reassembles scattered packets | |
405 | * | |
406 | * Notice: | |
407 | * - don't support ol_flags for rss and csum err | |
408 | * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet | |
409 | * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST | |
410 | * numbers of DD bit | |
411 | * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two | |
412 | */ | |
413 | uint16_t | |
414 | ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, | |
415 | uint16_t nb_pkts) | |
416 | { | |
417 | struct ixgbe_rx_queue *rxq = rx_queue; | |
418 | uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0}; | |
419 | ||
420 | /* get some new buffers */ | |
421 | uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, | |
422 | split_flags); | |
423 | if (nb_bufs == 0) | |
424 | return 0; | |
425 | ||
426 | /* happy day case, full burst + no packets to be joined */ | |
427 | const uint64_t *split_fl64 = (uint64_t *)split_flags; | |
428 | if (rxq->pkt_first_seg == NULL && | |
429 | split_fl64[0] == 0 && split_fl64[1] == 0 && | |
430 | split_fl64[2] == 0 && split_fl64[3] == 0) | |
431 | return nb_bufs; | |
432 | ||
433 | /* reassemble any packets that need reassembly*/ | |
434 | unsigned int i = 0; | |
435 | if (rxq->pkt_first_seg == NULL) { | |
436 | /* find the first split flag, and only reassemble then*/ | |
437 | while (i < nb_bufs && !split_flags[i]) | |
438 | i++; | |
439 | if (i == nb_bufs) | |
440 | return nb_bufs; | |
f67539c2 | 441 | rxq->pkt_first_seg = rx_pkts[i]; |
7c673cae FG |
442 | } |
443 | return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, | |
444 | &split_flags[i]); | |
445 | } | |
446 | ||
447 | static inline void | |
448 | vtx1(volatile union ixgbe_adv_tx_desc *txdp, | |
449 | struct rte_mbuf *pkt, uint64_t flags) | |
450 | { | |
451 | uint64x2_t descriptor = { | |
11fdf7f2 | 452 | pkt->buf_iova + pkt->data_off, |
7c673cae FG |
453 | (uint64_t)pkt->pkt_len << 46 | flags | pkt->data_len}; |
454 | ||
455 | vst1q_u64((uint64_t *)&txdp->read, descriptor); | |
456 | } | |
457 | ||
458 | static inline void | |
459 | vtx(volatile union ixgbe_adv_tx_desc *txdp, | |
460 | struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) | |
461 | { | |
462 | int i; | |
463 | ||
464 | for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) | |
465 | vtx1(txdp, *pkt, flags); | |
466 | } | |
467 | ||
468 | uint16_t | |
11fdf7f2 TL |
469 | ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, |
470 | uint16_t nb_pkts) | |
7c673cae FG |
471 | { |
472 | struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; | |
473 | volatile union ixgbe_adv_tx_desc *txdp; | |
474 | struct ixgbe_tx_entry_v *txep; | |
475 | uint16_t n, nb_commit, tx_id; | |
476 | uint64_t flags = DCMD_DTYP_FLAGS; | |
477 | uint64_t rs = IXGBE_ADVTXD_DCMD_RS | DCMD_DTYP_FLAGS; | |
478 | int i; | |
479 | ||
480 | /* cross rx_thresh boundary is not allowed */ | |
481 | nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); | |
482 | ||
483 | if (txq->nb_tx_free < txq->tx_free_thresh) | |
484 | ixgbe_tx_free_bufs(txq); | |
485 | ||
486 | nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); | |
487 | if (unlikely(nb_pkts == 0)) | |
488 | return 0; | |
489 | ||
490 | tx_id = txq->tx_tail; | |
491 | txdp = &txq->tx_ring[tx_id]; | |
492 | txep = &txq->sw_ring_v[tx_id]; | |
493 | ||
494 | txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); | |
495 | ||
496 | n = (uint16_t)(txq->nb_tx_desc - tx_id); | |
497 | if (nb_commit >= n) { | |
498 | tx_backlog_entry(txep, tx_pkts, n); | |
499 | ||
500 | for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) | |
501 | vtx1(txdp, *tx_pkts, flags); | |
502 | ||
503 | vtx1(txdp, *tx_pkts++, rs); | |
504 | ||
505 | nb_commit = (uint16_t)(nb_commit - n); | |
506 | ||
507 | tx_id = 0; | |
508 | txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); | |
509 | ||
510 | /* avoid reach the end of ring */ | |
511 | txdp = &txq->tx_ring[tx_id]; | |
512 | txep = &txq->sw_ring_v[tx_id]; | |
513 | } | |
514 | ||
515 | tx_backlog_entry(txep, tx_pkts, nb_commit); | |
516 | ||
517 | vtx(txdp, tx_pkts, nb_commit, flags); | |
518 | ||
519 | tx_id = (uint16_t)(tx_id + nb_commit); | |
520 | if (tx_id > txq->tx_next_rs) { | |
521 | txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |= | |
522 | rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS); | |
523 | txq->tx_next_rs = (uint16_t)(txq->tx_next_rs + | |
524 | txq->tx_rs_thresh); | |
525 | } | |
526 | ||
527 | txq->tx_tail = tx_id; | |
528 | ||
529 | IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail); | |
530 | ||
531 | return nb_pkts; | |
532 | } | |
533 | ||
f67539c2 | 534 | static void __rte_cold |
7c673cae FG |
535 | ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq) |
536 | { | |
537 | _ixgbe_tx_queue_release_mbufs_vec(txq); | |
538 | } | |
539 | ||
f67539c2 | 540 | void __rte_cold |
7c673cae FG |
541 | ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) |
542 | { | |
543 | _ixgbe_rx_queue_release_mbufs_vec(rxq); | |
544 | } | |
545 | ||
f67539c2 | 546 | static void __rte_cold |
7c673cae FG |
547 | ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq) |
548 | { | |
549 | _ixgbe_tx_free_swring_vec(txq); | |
550 | } | |
551 | ||
f67539c2 | 552 | static void __rte_cold |
7c673cae FG |
553 | ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq) |
554 | { | |
555 | _ixgbe_reset_tx_queue_vec(txq); | |
556 | } | |
557 | ||
558 | static const struct ixgbe_txq_ops vec_txq_ops = { | |
559 | .release_mbufs = ixgbe_tx_queue_release_mbufs_vec, | |
560 | .free_swring = ixgbe_tx_free_swring, | |
561 | .reset = ixgbe_reset_tx_queue, | |
562 | }; | |
563 | ||
f67539c2 | 564 | int __rte_cold |
7c673cae FG |
565 | ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq) |
566 | { | |
567 | return ixgbe_rxq_vec_setup_default(rxq); | |
568 | } | |
569 | ||
f67539c2 | 570 | int __rte_cold |
7c673cae FG |
571 | ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) |
572 | { | |
573 | return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops); | |
574 | } | |
575 | ||
f67539c2 | 576 | int __rte_cold |
7c673cae FG |
577 | ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) |
578 | { | |
579 | struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; | |
580 | ||
581 | /* no csum error report support */ | |
11fdf7f2 | 582 | if (rxmode->offloads & DEV_RX_OFFLOAD_CHECKSUM) |
7c673cae FG |
583 | return -1; |
584 | ||
585 | return ixgbe_rx_vec_dev_conf_condition_check_default(dev); | |
586 | } |