]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/dpdk/drivers/net/mlx5/mlx5_rxtx.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / drivers / net / mlx5 / mlx5_rxtx.c
CommitLineData
9f95a23c
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
7c673cae
FG
4 */
5
6#include <assert.h>
7#include <stdint.h>
8#include <string.h>
9#include <stdlib.h>
10
11/* Verbs header. */
12/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13#ifdef PEDANTIC
14#pragma GCC diagnostic ignored "-Wpedantic"
15#endif
16#include <infiniband/verbs.h>
9f95a23c 17#include <infiniband/mlx5dv.h>
7c673cae
FG
18#ifdef PEDANTIC
19#pragma GCC diagnostic error "-Wpedantic"
20#endif
21
7c673cae
FG
22#include <rte_mbuf.h>
23#include <rte_mempool.h>
24#include <rte_prefetch.h>
25#include <rte_common.h>
26#include <rte_branch_prediction.h>
27#include <rte_ether.h>
7c673cae
FG
28
29#include "mlx5.h"
30#include "mlx5_utils.h"
31#include "mlx5_rxtx.h"
32#include "mlx5_autoconf.h"
33#include "mlx5_defs.h"
34#include "mlx5_prm.h"
35
9f95a23c
TL
36static __rte_always_inline uint32_t
37rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
11fdf7f2 38
9f95a23c
TL
39static __rte_always_inline int
40mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
41 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe);
11fdf7f2 42
9f95a23c
TL
43static __rte_always_inline uint32_t
44rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe);
11fdf7f2 45
9f95a23c
TL
46static __rte_always_inline void
47rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
48 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
11fdf7f2 49
9f95a23c
TL
50static __rte_always_inline void
51mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx);
11fdf7f2 52
9f95a23c
TL
53uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
54 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
55};
11fdf7f2 56
9f95a23c
TL
57uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned;
58uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned;
7c673cae
FG
59
60/**
9f95a23c 61 * Build a table to translate Rx completion flags to packet type.
7c673cae 62 *
9f95a23c 63 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
7c673cae 64 */
9f95a23c
TL
65void
66mlx5_set_ptype_table(void)
7c673cae 67{
7c673cae 68 unsigned int i;
9f95a23c 69 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table;
7c673cae 70
9f95a23c
TL
71 /* Last entry must not be overwritten, reserved for errored packet. */
72 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i)
73 (*p)[i] = RTE_PTYPE_UNKNOWN;
74 /*
75 * The index to the array should have:
76 * bit[1:0] = l3_hdr_type
77 * bit[4:2] = l4_hdr_type
78 * bit[5] = ip_frag
79 * bit[6] = tunneled
80 * bit[7] = outer_l3_type
81 */
82 /* L2 */
83 (*p)[0x00] = RTE_PTYPE_L2_ETHER;
84 /* L3 */
85 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
86 RTE_PTYPE_L4_NONFRAG;
87 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
88 RTE_PTYPE_L4_NONFRAG;
89 /* Fragmented */
90 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
91 RTE_PTYPE_L4_FRAG;
92 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
93 RTE_PTYPE_L4_FRAG;
94 /* TCP */
95 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
96 RTE_PTYPE_L4_TCP;
97 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
98 RTE_PTYPE_L4_TCP;
99 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
100 RTE_PTYPE_L4_TCP;
101 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
102 RTE_PTYPE_L4_TCP;
103 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
104 RTE_PTYPE_L4_TCP;
105 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
106 RTE_PTYPE_L4_TCP;
107 /* UDP */
108 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
109 RTE_PTYPE_L4_UDP;
110 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
111 RTE_PTYPE_L4_UDP;
112 /* Repeat with outer_l3_type being set. Just in case. */
113 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
114 RTE_PTYPE_L4_NONFRAG;
115 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
116 RTE_PTYPE_L4_NONFRAG;
117 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
118 RTE_PTYPE_L4_FRAG;
119 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
120 RTE_PTYPE_L4_FRAG;
121 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
122 RTE_PTYPE_L4_TCP;
123 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
124 RTE_PTYPE_L4_TCP;
125 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
126 RTE_PTYPE_L4_TCP;
127 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
128 RTE_PTYPE_L4_TCP;
129 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
130 RTE_PTYPE_L4_TCP;
131 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
132 RTE_PTYPE_L4_TCP;
133 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
134 RTE_PTYPE_L4_UDP;
135 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
136 RTE_PTYPE_L4_UDP;
137 /* Tunneled - L3 */
138 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
139 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
140 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
141 RTE_PTYPE_INNER_L4_NONFRAG;
142 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
143 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
144 RTE_PTYPE_INNER_L4_NONFRAG;
145 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
146 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
147 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
148 RTE_PTYPE_INNER_L4_NONFRAG;
149 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
150 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
151 RTE_PTYPE_INNER_L4_NONFRAG;
152 /* Tunneled - Fragmented */
153 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
154 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
155 RTE_PTYPE_INNER_L4_FRAG;
156 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
157 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
158 RTE_PTYPE_INNER_L4_FRAG;
159 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
160 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
161 RTE_PTYPE_INNER_L4_FRAG;
162 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
163 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
164 RTE_PTYPE_INNER_L4_FRAG;
165 /* Tunneled - TCP */
166 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
167 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
168 RTE_PTYPE_INNER_L4_TCP;
169 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
170 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
171 RTE_PTYPE_INNER_L4_TCP;
172 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
173 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
174 RTE_PTYPE_INNER_L4_TCP;
175 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
176 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
177 RTE_PTYPE_INNER_L4_TCP;
178 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
179 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
180 RTE_PTYPE_INNER_L4_TCP;
181 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
182 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
183 RTE_PTYPE_INNER_L4_TCP;
184 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
185 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
186 RTE_PTYPE_INNER_L4_TCP;
187 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
188 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
189 RTE_PTYPE_INNER_L4_TCP;
190 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
191 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
192 RTE_PTYPE_INNER_L4_TCP;
193 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
194 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
195 RTE_PTYPE_INNER_L4_TCP;
196 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
197 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
198 RTE_PTYPE_INNER_L4_TCP;
199 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
200 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
201 RTE_PTYPE_INNER_L4_TCP;
202 /* Tunneled - UDP */
203 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
204 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
205 RTE_PTYPE_INNER_L4_UDP;
206 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
207 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
208 RTE_PTYPE_INNER_L4_UDP;
209 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
210 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
211 RTE_PTYPE_INNER_L4_UDP;
212 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
213 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
214 RTE_PTYPE_INNER_L4_UDP;
7c673cae
FG
215}
216
7c673cae 217/**
9f95a23c 218 * Build a table to translate packet to checksum type of Verbs.
7c673cae 219 */
9f95a23c
TL
220void
221mlx5_set_cksum_table(void)
7c673cae 222{
9f95a23c
TL
223 unsigned int i;
224 uint8_t v;
225
226 /*
227 * The index should have:
228 * bit[0] = PKT_TX_TCP_SEG
229 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM
230 * bit[4] = PKT_TX_IP_CKSUM
231 * bit[8] = PKT_TX_OUTER_IP_CKSUM
232 * bit[9] = tunnel
233 */
234 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) {
235 v = 0;
236 if (i & (1 << 9)) {
237 /* Tunneled packet. */
238 if (i & (1 << 8)) /* Outer IP. */
239 v |= MLX5_ETH_WQE_L3_CSUM;
240 if (i & (1 << 4)) /* Inner IP. */
241 v |= MLX5_ETH_WQE_L3_INNER_CSUM;
242 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */
243 v |= MLX5_ETH_WQE_L4_INNER_CSUM;
244 } else {
245 /* No tunnel. */
246 if (i & (1 << 4)) /* IP. */
247 v |= MLX5_ETH_WQE_L3_CSUM;
248 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */
249 v |= MLX5_ETH_WQE_L4_CSUM;
250 }
251 mlx5_cksum_table[i] = v;
7c673cae 252 }
7c673cae
FG
253}
254
11fdf7f2 255/**
9f95a23c 256 * Build a table to translate packet type of mbuf to SWP type of Verbs.
11fdf7f2 257 */
9f95a23c
TL
258void
259mlx5_set_swp_types_table(void)
11fdf7f2 260{
9f95a23c
TL
261 unsigned int i;
262 uint8_t v;
263
264 /*
265 * The index should have:
266 * bit[0:1] = PKT_TX_L4_MASK
267 * bit[4] = PKT_TX_IPV6
268 * bit[8] = PKT_TX_OUTER_IPV6
269 * bit[9] = PKT_TX_OUTER_UDP
270 */
271 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) {
272 v = 0;
273 if (i & (1 << 8))
274 v |= MLX5_ETH_WQE_L3_OUTER_IPV6;
275 if (i & (1 << 9))
276 v |= MLX5_ETH_WQE_L4_OUTER_UDP;
277 if (i & (1 << 4))
278 v |= MLX5_ETH_WQE_L3_INNER_IPV6;
279 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52))
280 v |= MLX5_ETH_WQE_L4_INNER_UDP;
281 mlx5_swp_types_table[i] = v;
282 }
11fdf7f2
TL
283}
284
285/**
286 * Return the size of tailroom of WQ.
287 *
288 * @param txq
289 * Pointer to TX queue structure.
290 * @param addr
291 * Pointer to tail of WQ.
292 *
293 * @return
294 * Size of tailroom.
295 */
296static inline size_t
9f95a23c 297tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
11fdf7f2
TL
298{
299 size_t tailroom;
300 tailroom = (uintptr_t)(txq->wqes) +
301 (1 << txq->wqe_n) * MLX5_WQE_SIZE -
302 (uintptr_t)addr;
303 return tailroom;
304}
305
306/**
307 * Copy data to tailroom of circular queue.
308 *
309 * @param dst
310 * Pointer to destination.
311 * @param src
312 * Pointer to source.
313 * @param n
314 * Number of bytes to copy.
315 * @param base
316 * Pointer to head of queue.
317 * @param tailroom
318 * Size of tailroom from dst.
319 *
320 * @return
321 * Pointer after copied data.
322 */
323static inline void *
324mlx5_copy_to_wq(void *dst, const void *src, size_t n,
325 void *base, size_t tailroom)
326{
327 void *ret;
328
329 if (n > tailroom) {
330 rte_memcpy(dst, src, tailroom);
331 rte_memcpy(base, (void *)((uintptr_t)src + tailroom),
332 n - tailroom);
333 ret = (uint8_t *)base + n - tailroom;
334 } else {
335 rte_memcpy(dst, src, n);
336 ret = (n == tailroom) ? base : (uint8_t *)dst + n;
337 }
338 return ret;
339}
7c673cae
FG
340
341/**
9f95a23c 342 * Inline TSO headers into WQE.
7c673cae
FG
343 *
344 * @return
9f95a23c 345 * 0 on success, negative errno value on failure.
7c673cae 346 */
9f95a23c
TL
347static int
348inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf,
349 uint32_t *length,
350 uintptr_t *addr,
351 uint16_t *pkt_inline_sz,
352 uint8_t **raw,
353 uint16_t *max_wqe,
354 uint16_t *tso_segsz,
355 uint16_t *tso_header_sz)
7c673cae 356{
9f95a23c
TL
357 uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) +
358 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
359 unsigned int copy_b;
360 uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0;
361 const uint8_t tunneled = txq->tunnel_en && (buf->ol_flags &
362 PKT_TX_TUNNEL_MASK);
363 uint16_t n_wqe;
364
365 *tso_segsz = buf->tso_segsz;
366 *tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len;
367 if (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) {
368 txq->stats.oerrors++;
369 return -EINVAL;
7c673cae 370 }
9f95a23c
TL
371 if (tunneled)
372 *tso_header_sz += buf->outer_l2_len + buf->outer_l3_len;
373 /* First seg must contain all TSO headers. */
374 if (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) ||
375 *tso_header_sz > DATA_LEN(buf)) {
376 txq->stats.oerrors++;
377 return -EINVAL;
378 }
379 copy_b = *tso_header_sz - *pkt_inline_sz;
380 if (!copy_b || ((end - (uintptr_t)*raw) < copy_b))
381 return -EAGAIN;
382 n_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
383 if (unlikely(*max_wqe < n_wqe))
384 return -EINVAL;
385 *max_wqe -= n_wqe;
386 rte_memcpy((void *)*raw, (void *)*addr, copy_b);
387 *length -= copy_b;
388 *addr += copy_b;
389 copy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE;
390 *pkt_inline_sz += copy_b;
391 *raw += copy_b;
392 return 0;
7c673cae
FG
393}
394
395/**
11fdf7f2 396 * DPDK callback to check the status of a tx descriptor.
7c673cae 397 *
11fdf7f2
TL
398 * @param tx_queue
399 * The tx queue.
400 * @param[in] offset
401 * The index of the descriptor in the ring.
402 *
403 * @return
404 * The status of the tx descriptor.
7c673cae 405 */
11fdf7f2
TL
406int
407mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
7c673cae 408{
9f95a23c
TL
409 struct mlx5_txq_data *txq = tx_queue;
410 uint16_t used;
7c673cae 411
9f95a23c
TL
412 mlx5_tx_complete(txq);
413 used = txq->elts_head - txq->elts_tail;
11fdf7f2
TL
414 if (offset < used)
415 return RTE_ETH_TX_DESC_FULL;
416 return RTE_ETH_TX_DESC_DONE;
7c673cae
FG
417}
418
419/**
9f95a23c 420 * Internal function to compute the number of used descriptors in an RX queue
7c673cae 421 *
9f95a23c
TL
422 * @param rxq
423 * The Rx queue.
11fdf7f2
TL
424 *
425 * @return
9f95a23c 426 * The number of used rx descriptor.
7c673cae 427 */
9f95a23c
TL
428static uint32_t
429rx_queue_count(struct mlx5_rxq_data *rxq)
7c673cae 430{
11fdf7f2
TL
431 struct rxq_zip *zip = &rxq->zip;
432 volatile struct mlx5_cqe *cqe;
433 const unsigned int cqe_n = (1 << rxq->cqe_n);
434 const unsigned int cqe_cnt = cqe_n - 1;
435 unsigned int cq_ci;
436 unsigned int used;
7c673cae 437
11fdf7f2
TL
438 /* if we are processing a compressed cqe */
439 if (zip->ai) {
440 used = zip->cqe_cnt - zip->ca;
441 cq_ci = zip->cq_ci;
442 } else {
443 used = 0;
444 cq_ci = rxq->cq_ci;
445 }
446 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
447 while (check_cqe(cqe, cqe_n, cq_ci) == 0) {
448 int8_t op_own;
449 unsigned int n;
450
451 op_own = cqe->op_own;
452 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
9f95a23c 453 n = rte_be_to_cpu_32(cqe->byte_cnt);
11fdf7f2
TL
454 else
455 n = 1;
456 cq_ci += n;
457 used += n;
458 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt];
459 }
460 used = RTE_MIN(used, (1U << rxq->elts_n) - 1);
9f95a23c
TL
461 return used;
462}
463
464/**
465 * DPDK callback to check the status of a rx descriptor.
466 *
467 * @param rx_queue
468 * The Rx queue.
469 * @param[in] offset
470 * The index of the descriptor in the ring.
471 *
472 * @return
473 * The status of the tx descriptor.
474 */
475int
476mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
477{
478 struct mlx5_rxq_data *rxq = rx_queue;
479 struct mlx5_rxq_ctrl *rxq_ctrl =
480 container_of(rxq, struct mlx5_rxq_ctrl, rxq);
481 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv);
482
483 if (dev->rx_pkt_burst != mlx5_rx_burst) {
484 rte_errno = ENOTSUP;
485 return -rte_errno;
486 }
487 if (offset >= (1 << rxq->elts_n)) {
488 rte_errno = EINVAL;
489 return -rte_errno;
490 }
491 if (offset < rx_queue_count(rxq))
11fdf7f2
TL
492 return RTE_ETH_RX_DESC_DONE;
493 return RTE_ETH_RX_DESC_AVAIL;
7c673cae
FG
494}
495
9f95a23c
TL
496/**
497 * DPDK callback to get the number of used descriptors in a RX queue
498 *
499 * @param dev
500 * Pointer to the device structure.
501 *
502 * @param rx_queue_id
503 * The Rx queue.
504 *
505 * @return
506 * The number of used rx descriptor.
507 * -EINVAL if the queue is invalid
508 */
509uint32_t
510mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
511{
512 struct mlx5_priv *priv = dev->data->dev_private;
513 struct mlx5_rxq_data *rxq;
514
515 if (dev->rx_pkt_burst != mlx5_rx_burst) {
516 rte_errno = ENOTSUP;
517 return -rte_errno;
518 }
519 rxq = (*priv->rxqs)[rx_queue_id];
520 if (!rxq) {
521 rte_errno = EINVAL;
522 return -rte_errno;
523 }
524 return rx_queue_count(rxq);
525}
526
7c673cae
FG
527/**
528 * DPDK callback for TX.
529 *
530 * @param dpdk_txq
531 * Generic pointer to TX queue structure.
532 * @param[in] pkts
533 * Packets to transmit.
534 * @param pkts_n
535 * Number of packets in array.
536 *
537 * @return
538 * Number of packets successfully transmitted (<= pkts_n).
539 */
540uint16_t
541mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
542{
9f95a23c 543 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
7c673cae 544 uint16_t elts_head = txq->elts_head;
9f95a23c
TL
545 const uint16_t elts_n = 1 << txq->elts_n;
546 const uint16_t elts_m = elts_n - 1;
7c673cae
FG
547 unsigned int i = 0;
548 unsigned int j = 0;
11fdf7f2 549 unsigned int k = 0;
9f95a23c 550 uint16_t max_elts;
11fdf7f2 551 uint16_t max_wqe;
7c673cae 552 unsigned int comp;
11fdf7f2 553 volatile struct mlx5_wqe_ctrl *last_wqe = NULL;
7c673cae 554 unsigned int segs_n = 0;
9f95a23c
TL
555 const unsigned int max_inline = txq->max_inline;
556 uint64_t addr_64;
7c673cae
FG
557
558 if (unlikely(!pkts_n))
559 return 0;
560 /* Prefetch first packet cacheline. */
7c673cae
FG
561 rte_prefetch0(*pkts);
562 /* Start processing. */
9f95a23c
TL
563 mlx5_tx_complete(txq);
564 max_elts = (elts_n - (elts_head - txq->elts_tail));
11fdf7f2
TL
565 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
566 if (unlikely(!max_wqe))
567 return 0;
7c673cae 568 do {
9f95a23c
TL
569 struct rte_mbuf *buf = *pkts; /* First_seg. */
570 uint8_t *raw;
571 volatile struct mlx5_wqe_v *wqe = NULL;
11fdf7f2 572 volatile rte_v128u32_t *dseg = NULL;
7c673cae
FG
573 uint32_t length;
574 unsigned int ds = 0;
11fdf7f2 575 unsigned int sg = 0; /* counter of additional segs attached. */
7c673cae 576 uintptr_t addr;
11fdf7f2
TL
577 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2;
578 uint16_t tso_header_sz = 0;
579 uint16_t ehdr;
9f95a23c
TL
580 uint8_t cs_flags;
581 uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
582 uint32_t swp_offsets = 0;
583 uint8_t swp_types = 0;
584 rte_be32_t metadata;
585 uint16_t tso_segsz = 0;
7c673cae
FG
586#ifdef MLX5_PMD_SOFT_COUNTERS
587 uint32_t total_length = 0;
588#endif
9f95a23c 589 int ret;
7c673cae 590
7c673cae
FG
591 segs_n = buf->nb_segs;
592 /*
593 * Make sure there is enough room to store this packet and
594 * that one ring entry remains unused.
595 */
596 assert(segs_n);
9f95a23c 597 if (max_elts < segs_n)
7c673cae 598 break;
9f95a23c
TL
599 max_elts -= segs_n;
600 sg = --segs_n;
11fdf7f2
TL
601 if (unlikely(--max_wqe == 0))
602 break;
603 wqe = (volatile struct mlx5_wqe_v *)
604 tx_mlx5_wqe(txq, txq->wqe_ci);
605 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
606 if (pkts_n - i > 1)
607 rte_prefetch0(*(pkts + 1));
7c673cae
FG
608 addr = rte_pktmbuf_mtod(buf, uintptr_t);
609 length = DATA_LEN(buf);
11fdf7f2
TL
610 ehdr = (((uint8_t *)addr)[1] << 8) |
611 ((uint8_t *)addr)[0];
7c673cae
FG
612#ifdef MLX5_PMD_SOFT_COUNTERS
613 total_length = length;
614#endif
9f95a23c
TL
615 if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
616 txq->stats.oerrors++;
11fdf7f2 617 break;
9f95a23c 618 }
7c673cae 619 /* Update element. */
9f95a23c 620 (*txq->elts)[elts_head & elts_m] = buf;
7c673cae 621 /* Prefetch next buffer data. */
11fdf7f2
TL
622 if (pkts_n - i > 1)
623 rte_prefetch0(
624 rte_pktmbuf_mtod(*(pkts + 1), volatile void *));
9f95a23c
TL
625 cs_flags = txq_ol_cksum_to_cs(buf);
626 txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
11fdf7f2 627 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
9f95a23c
TL
628 /* Copy metadata from mbuf if valid */
629 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
630 0;
7c673cae
FG
631 /* Replace the Ethernet type by the VLAN if necessary. */
632 if (buf->ol_flags & PKT_TX_VLAN_PKT) {
9f95a23c
TL
633 uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
634 buf->vlan_tci);
11fdf7f2
TL
635 unsigned int len = 2 * ETHER_ADDR_LEN - 2;
636
637 addr += 2;
638 length -= 2;
639 /* Copy Destination and source mac address. */
640 memcpy((uint8_t *)raw, ((uint8_t *)addr), len);
641 /* Copy VLAN. */
642 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan));
643 /* Copy missing two bytes to end the DSeg. */
644 memcpy((uint8_t *)raw + len + sizeof(vlan),
645 ((uint8_t *)addr) + len, 2);
646 addr += len + 2;
647 length -= (len + 2);
648 } else {
649 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2,
650 MLX5_WQE_DWORD_SIZE);
651 length -= pkt_inline_sz;
652 addr += pkt_inline_sz;
653 }
9f95a23c
TL
654 raw += MLX5_WQE_DWORD_SIZE;
655 if (tso) {
656 ret = inline_tso(txq, buf, &length,
657 &addr, &pkt_inline_sz,
658 &raw, &max_wqe,
659 &tso_segsz, &tso_header_sz);
660 if (ret == -EINVAL) {
661 break;
662 } else if (ret == -EAGAIN) {
663 /* NOP WQE. */
664 wqe->ctrl = (rte_v128u32_t){
665 rte_cpu_to_be_32(txq->wqe_ci << 8),
666 rte_cpu_to_be_32(txq->qp_num_8s | 1),
667 0,
668 0,
669 };
670 ds = 1;
671#ifdef MLX5_PMD_SOFT_COUNTERS
672 total_length = 0;
673#endif
674 k++;
675 goto next_wqe;
11fdf7f2 676 }
7c673cae
FG
677 }
678 /* Inline if enough room. */
9f95a23c
TL
679 if (max_inline || tso) {
680 uint32_t inl = 0;
11fdf7f2
TL
681 uintptr_t end = (uintptr_t)
682 (((uintptr_t)txq->wqes) +
683 (1 << txq->wqe_n) * MLX5_WQE_SIZE);
684 unsigned int inline_room = max_inline *
685 RTE_CACHE_LINE_SIZE -
9f95a23c
TL
686 (pkt_inline_sz - 2) -
687 !!tso * sizeof(inl);
688 uintptr_t addr_end;
689 unsigned int copy_b;
690
691pkt_inline:
692 addr_end = RTE_ALIGN_FLOOR(addr + inline_room,
693 RTE_CACHE_LINE_SIZE);
694 copy_b = (addr_end > addr) ?
695 RTE_MIN((addr_end - addr), length) : 0;
696 if (copy_b && ((end - (uintptr_t)raw) >
697 (copy_b + sizeof(inl)))) {
11fdf7f2
TL
698 /*
699 * One Dseg remains in the current WQE. To
700 * keep the computation positive, it is
701 * removed after the bytes to Dseg conversion.
702 */
703 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4;
704
705 if (unlikely(max_wqe < n))
706 break;
707 max_wqe -= n;
708 if (tso) {
9f95a23c
TL
709 assert(inl == 0);
710 inl = rte_cpu_to_be_32(copy_b |
711 MLX5_INLINE_SEG);
11fdf7f2
TL
712 rte_memcpy((void *)raw,
713 (void *)&inl, sizeof(inl));
714 raw += sizeof(inl);
715 pkt_inline_sz += sizeof(inl);
716 }
7c673cae
FG
717 rte_memcpy((void *)raw, (void *)addr, copy_b);
718 addr += copy_b;
719 length -= copy_b;
720 pkt_inline_sz += copy_b;
7c673cae 721 }
7c673cae 722 /*
11fdf7f2 723 * 2 DWORDs consumed by the WQE header + ETH segment +
7c673cae
FG
724 * the size of the inline part of the packet.
725 */
726 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
727 if (length > 0) {
11fdf7f2
TL
728 if (ds % (MLX5_WQE_SIZE /
729 MLX5_WQE_DWORD_SIZE) == 0) {
730 if (unlikely(--max_wqe == 0))
731 break;
732 dseg = (volatile rte_v128u32_t *)
733 tx_mlx5_wqe(txq, txq->wqe_ci +
734 ds / 4);
735 } else {
736 dseg = (volatile rte_v128u32_t *)
737 ((uintptr_t)wqe +
738 (ds * MLX5_WQE_DWORD_SIZE));
739 }
7c673cae
FG
740 goto use_dseg;
741 } else if (!segs_n) {
742 goto next_pkt;
743 } else {
9f95a23c
TL
744 /*
745 * Further inline the next segment only for
746 * non-TSO packets.
747 */
748 if (!tso) {
749 raw += copy_b;
750 inline_room -= copy_b;
751 } else {
752 inline_room = 0;
753 }
754 /* Move to the next segment. */
755 --segs_n;
756 buf = buf->next;
757 assert(buf);
758 addr = rte_pktmbuf_mtod(buf, uintptr_t);
759 length = DATA_LEN(buf);
760#ifdef MLX5_PMD_SOFT_COUNTERS
761 total_length += length;
762#endif
763 (*txq->elts)[++elts_head & elts_m] = buf;
764 goto pkt_inline;
7c673cae
FG
765 }
766 } else {
767 /*
768 * No inline has been done in the packet, only the
769 * Ethernet Header as been stored.
770 */
11fdf7f2 771 dseg = (volatile rte_v128u32_t *)
7c673cae
FG
772 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE));
773 ds = 3;
774use_dseg:
775 /* Add the remaining packet as a simple ds. */
9f95a23c 776 addr_64 = rte_cpu_to_be_64(addr);
11fdf7f2 777 *dseg = (rte_v128u32_t){
9f95a23c
TL
778 rte_cpu_to_be_32(length),
779 mlx5_tx_mb2mr(txq, buf),
780 addr_64,
781 addr_64 >> 32,
7c673cae
FG
782 };
783 ++ds;
784 if (!segs_n)
785 goto next_pkt;
786 }
787next_seg:
788 assert(buf);
789 assert(ds);
790 assert(wqe);
791 /*
792 * Spill on next WQE when the current one does not have
793 * enough room left. Size of WQE must a be a multiple
794 * of data segment size.
795 */
796 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
797 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) {
11fdf7f2
TL
798 if (unlikely(--max_wqe == 0))
799 break;
800 dseg = (volatile rte_v128u32_t *)
801 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4);
802 rte_prefetch0(tx_mlx5_wqe(txq,
803 txq->wqe_ci + ds / 4 + 1));
7c673cae
FG
804 } else {
805 ++dseg;
806 }
807 ++ds;
808 buf = buf->next;
809 assert(buf);
810 length = DATA_LEN(buf);
811#ifdef MLX5_PMD_SOFT_COUNTERS
812 total_length += length;
813#endif
814 /* Store segment information. */
9f95a23c 815 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
11fdf7f2 816 *dseg = (rte_v128u32_t){
9f95a23c
TL
817 rte_cpu_to_be_32(length),
818 mlx5_tx_mb2mr(txq, buf),
819 addr_64,
820 addr_64 >> 32,
7c673cae 821 };
9f95a23c
TL
822 (*txq->elts)[++elts_head & elts_m] = buf;
823 if (--segs_n)
7c673cae 824 goto next_seg;
7c673cae 825next_pkt:
9f95a23c
TL
826 if (ds > MLX5_DSEG_MAX) {
827 txq->stats.oerrors++;
828 break;
829 }
830 ++elts_head;
11fdf7f2 831 ++pkts;
7c673cae 832 ++i;
9f95a23c 833 j += sg;
11fdf7f2
TL
834 /* Initialize known and common part of the WQE structure. */
835 if (tso) {
836 wqe->ctrl = (rte_v128u32_t){
9f95a23c
TL
837 rte_cpu_to_be_32((txq->wqe_ci << 8) |
838 MLX5_OPCODE_TSO),
839 rte_cpu_to_be_32(txq->qp_num_8s | ds),
11fdf7f2
TL
840 0,
841 0,
842 };
843 wqe->eseg = (rte_v128u32_t){
9f95a23c
TL
844 swp_offsets,
845 cs_flags | (swp_types << 8) |
846 (rte_cpu_to_be_16(tso_segsz) << 16),
847 metadata,
848 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
11fdf7f2
TL
849 };
850 } else {
851 wqe->ctrl = (rte_v128u32_t){
9f95a23c
TL
852 rte_cpu_to_be_32((txq->wqe_ci << 8) |
853 MLX5_OPCODE_SEND),
854 rte_cpu_to_be_32(txq->qp_num_8s | ds),
11fdf7f2
TL
855 0,
856 0,
857 };
858 wqe->eseg = (rte_v128u32_t){
9f95a23c
TL
859 swp_offsets,
860 cs_flags | (swp_types << 8),
861 metadata,
862 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
11fdf7f2
TL
863 };
864 }
865next_wqe:
7c673cae 866 txq->wqe_ci += (ds + 3) / 4;
11fdf7f2
TL
867 /* Save the last successful WQE for completion request */
868 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe;
7c673cae
FG
869#ifdef MLX5_PMD_SOFT_COUNTERS
870 /* Increment sent bytes counter. */
871 txq->stats.obytes += total_length;
872#endif
11fdf7f2 873 } while (i < pkts_n);
7c673cae 874 /* Take a shortcut if nothing must be sent. */
11fdf7f2 875 if (unlikely((i + k) == 0))
7c673cae 876 return 0;
9f95a23c 877 txq->elts_head += (i + j);
7c673cae 878 /* Check whether completion threshold has been reached. */
11fdf7f2 879 comp = txq->elts_comp + i + j + k;
7c673cae 880 if (comp >= MLX5_TX_COMP_THRESH) {
9f95a23c
TL
881 /* A CQE slot must always be available. */
882 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
7c673cae 883 /* Request completion on last WQE. */
9f95a23c 884 last_wqe->ctrl2 = rte_cpu_to_be_32(8);
7c673cae 885 /* Save elts_head in unused "immediate" field of WQE. */
11fdf7f2 886 last_wqe->ctrl3 = txq->elts_head;
7c673cae
FG
887 txq->elts_comp = 0;
888 } else {
889 txq->elts_comp = comp;
890 }
891#ifdef MLX5_PMD_SOFT_COUNTERS
892 /* Increment sent packets counter. */
893 txq->stats.opackets += i;
894#endif
895 /* Ring QP doorbell. */
11fdf7f2 896 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe);
7c673cae
FG
897 return i;
898}
899
900/**
901 * Open a MPW session.
902 *
903 * @param txq
904 * Pointer to TX queue structure.
905 * @param mpw
906 * Pointer to MPW session structure.
907 * @param length
908 * Packet length.
909 */
910static inline void
9f95a23c 911mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
7c673cae
FG
912{
913 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
914 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
915 (volatile struct mlx5_wqe_data_seg (*)[])
11fdf7f2 916 tx_mlx5_wqe(txq, idx + 1);
7c673cae
FG
917
918 mpw->state = MLX5_MPW_STATE_OPENED;
919 mpw->pkts_n = 0;
920 mpw->len = length;
921 mpw->total_len = 0;
11fdf7f2 922 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
9f95a23c 923 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
7c673cae
FG
924 mpw->wqe->eseg.inline_hdr_sz = 0;
925 mpw->wqe->eseg.rsvd0 = 0;
926 mpw->wqe->eseg.rsvd1 = 0;
9f95a23c
TL
927 mpw->wqe->eseg.flow_table_metadata = 0;
928 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
929 (txq->wqe_ci << 8) |
930 MLX5_OPCODE_TSO);
7c673cae
FG
931 mpw->wqe->ctrl[2] = 0;
932 mpw->wqe->ctrl[3] = 0;
933 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
934 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
935 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
936 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
937 mpw->data.dseg[2] = &(*dseg)[0];
938 mpw->data.dseg[3] = &(*dseg)[1];
939 mpw->data.dseg[4] = &(*dseg)[2];
940}
941
942/**
943 * Close a MPW session.
944 *
945 * @param txq
946 * Pointer to TX queue structure.
947 * @param mpw
948 * Pointer to MPW session structure.
949 */
950static inline void
9f95a23c 951mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
7c673cae
FG
952{
953 unsigned int num = mpw->pkts_n;
954
955 /*
956 * Store size in multiple of 16 bytes. Control and Ethernet segments
957 * count as 2.
958 */
9f95a23c 959 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num));
7c673cae
FG
960 mpw->state = MLX5_MPW_STATE_CLOSED;
961 if (num < 3)
962 ++txq->wqe_ci;
963 else
964 txq->wqe_ci += 2;
11fdf7f2
TL
965 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
966 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
7c673cae
FG
967}
968
969/**
970 * DPDK callback for TX with MPW support.
971 *
972 * @param dpdk_txq
973 * Generic pointer to TX queue structure.
974 * @param[in] pkts
975 * Packets to transmit.
976 * @param pkts_n
977 * Number of packets in array.
978 *
979 * @return
980 * Number of packets successfully transmitted (<= pkts_n).
981 */
982uint16_t
983mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
984{
9f95a23c 985 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
7c673cae 986 uint16_t elts_head = txq->elts_head;
9f95a23c
TL
987 const uint16_t elts_n = 1 << txq->elts_n;
988 const uint16_t elts_m = elts_n - 1;
7c673cae
FG
989 unsigned int i = 0;
990 unsigned int j = 0;
9f95a23c 991 uint16_t max_elts;
11fdf7f2 992 uint16_t max_wqe;
7c673cae
FG
993 unsigned int comp;
994 struct mlx5_mpw mpw = {
995 .state = MLX5_MPW_STATE_CLOSED,
996 };
997
998 if (unlikely(!pkts_n))
999 return 0;
1000 /* Prefetch first packet cacheline. */
11fdf7f2
TL
1001 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
1002 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
7c673cae 1003 /* Start processing. */
9f95a23c
TL
1004 mlx5_tx_complete(txq);
1005 max_elts = (elts_n - (elts_head - txq->elts_tail));
11fdf7f2
TL
1006 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
1007 if (unlikely(!max_wqe))
1008 return 0;
7c673cae
FG
1009 do {
1010 struct rte_mbuf *buf = *(pkts++);
7c673cae
FG
1011 uint32_t length;
1012 unsigned int segs_n = buf->nb_segs;
9f95a23c
TL
1013 uint32_t cs_flags;
1014 rte_be32_t metadata;
7c673cae
FG
1015
1016 /*
1017 * Make sure there is enough room to store this packet and
1018 * that one ring entry remains unused.
1019 */
1020 assert(segs_n);
9f95a23c 1021 if (max_elts < segs_n)
7c673cae
FG
1022 break;
1023 /* Do not bother with large packets MPW cannot handle. */
9f95a23c
TL
1024 if (segs_n > MLX5_MPW_DSEG_MAX) {
1025 txq->stats.oerrors++;
7c673cae 1026 break;
9f95a23c
TL
1027 }
1028 max_elts -= segs_n;
7c673cae 1029 --pkts_n;
9f95a23c
TL
1030 cs_flags = txq_ol_cksum_to_cs(buf);
1031 /* Copy metadata from mbuf if valid */
1032 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1033 0;
7c673cae
FG
1034 /* Retrieve packet information. */
1035 length = PKT_LEN(buf);
1036 assert(length);
1037 /* Start new session if packet differs. */
1038 if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
1039 ((mpw.len != length) ||
1040 (segs_n != 1) ||
9f95a23c 1041 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
7c673cae
FG
1042 (mpw.wqe->eseg.cs_flags != cs_flags)))
1043 mlx5_mpw_close(txq, &mpw);
1044 if (mpw.state == MLX5_MPW_STATE_CLOSED) {
11fdf7f2
TL
1045 /*
1046 * Multi-Packet WQE consumes at most two WQE.
1047 * mlx5_mpw_new() expects to be able to use such
1048 * resources.
1049 */
1050 if (unlikely(max_wqe < 2))
1051 break;
1052 max_wqe -= 2;
7c673cae
FG
1053 mlx5_mpw_new(txq, &mpw, length);
1054 mpw.wqe->eseg.cs_flags = cs_flags;
9f95a23c 1055 mpw.wqe->eseg.flow_table_metadata = metadata;
7c673cae
FG
1056 }
1057 /* Multi-segment packets must be alone in their MPW. */
1058 assert((segs_n == 1) || (mpw.pkts_n == 0));
1059#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1060 length = 0;
1061#endif
1062 do {
1063 volatile struct mlx5_wqe_data_seg *dseg;
1064 uintptr_t addr;
1065
7c673cae 1066 assert(buf);
9f95a23c 1067 (*txq->elts)[elts_head++ & elts_m] = buf;
7c673cae
FG
1068 dseg = mpw.data.dseg[mpw.pkts_n];
1069 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1070 *dseg = (struct mlx5_wqe_data_seg){
9f95a23c
TL
1071 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
1072 .lkey = mlx5_tx_mb2mr(txq, buf),
1073 .addr = rte_cpu_to_be_64(addr),
7c673cae 1074 };
7c673cae
FG
1075#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1076 length += DATA_LEN(buf);
1077#endif
1078 buf = buf->next;
1079 ++mpw.pkts_n;
1080 ++j;
1081 } while (--segs_n);
1082 assert(length == mpw.len);
1083 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1084 mlx5_mpw_close(txq, &mpw);
7c673cae
FG
1085#ifdef MLX5_PMD_SOFT_COUNTERS
1086 /* Increment sent bytes counter. */
1087 txq->stats.obytes += length;
1088#endif
1089 ++i;
1090 } while (pkts_n);
1091 /* Take a shortcut if nothing must be sent. */
1092 if (unlikely(i == 0))
1093 return 0;
1094 /* Check whether completion threshold has been reached. */
1095 /* "j" includes both packets and segments. */
1096 comp = txq->elts_comp + j;
1097 if (comp >= MLX5_TX_COMP_THRESH) {
1098 volatile struct mlx5_wqe *wqe = mpw.wqe;
1099
9f95a23c
TL
1100 /* A CQE slot must always be available. */
1101 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
7c673cae 1102 /* Request completion on last WQE. */
9f95a23c 1103 wqe->ctrl[2] = rte_cpu_to_be_32(8);
7c673cae
FG
1104 /* Save elts_head in unused "immediate" field of WQE. */
1105 wqe->ctrl[3] = elts_head;
1106 txq->elts_comp = 0;
1107 } else {
1108 txq->elts_comp = comp;
1109 }
1110#ifdef MLX5_PMD_SOFT_COUNTERS
1111 /* Increment sent packets counter. */
1112 txq->stats.opackets += i;
1113#endif
1114 /* Ring QP doorbell. */
1115 if (mpw.state == MLX5_MPW_STATE_OPENED)
1116 mlx5_mpw_close(txq, &mpw);
11fdf7f2 1117 mlx5_tx_dbrec(txq, mpw.wqe);
7c673cae
FG
1118 txq->elts_head = elts_head;
1119 return i;
1120}
1121
1122/**
1123 * Open a MPW inline session.
1124 *
1125 * @param txq
1126 * Pointer to TX queue structure.
1127 * @param mpw
1128 * Pointer to MPW session structure.
1129 * @param length
1130 * Packet length.
1131 */
1132static inline void
9f95a23c
TL
1133mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
1134 uint32_t length)
7c673cae
FG
1135{
1136 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
1137 struct mlx5_wqe_inl_small *inl;
1138
1139 mpw->state = MLX5_MPW_INL_STATE_OPENED;
1140 mpw->pkts_n = 0;
1141 mpw->len = length;
1142 mpw->total_len = 0;
11fdf7f2 1143 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
9f95a23c
TL
1144 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
1145 (txq->wqe_ci << 8) |
1146 MLX5_OPCODE_TSO);
7c673cae
FG
1147 mpw->wqe->ctrl[2] = 0;
1148 mpw->wqe->ctrl[3] = 0;
9f95a23c 1149 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
7c673cae
FG
1150 mpw->wqe->eseg.inline_hdr_sz = 0;
1151 mpw->wqe->eseg.cs_flags = 0;
1152 mpw->wqe->eseg.rsvd0 = 0;
1153 mpw->wqe->eseg.rsvd1 = 0;
9f95a23c 1154 mpw->wqe->eseg.flow_table_metadata = 0;
7c673cae
FG
1155 inl = (struct mlx5_wqe_inl_small *)
1156 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
1157 mpw->data.raw = (uint8_t *)&inl->raw;
1158}
1159
1160/**
1161 * Close a MPW inline session.
1162 *
1163 * @param txq
1164 * Pointer to TX queue structure.
1165 * @param mpw
1166 * Pointer to MPW session structure.
1167 */
1168static inline void
9f95a23c 1169mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
7c673cae
FG
1170{
1171 unsigned int size;
1172 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
1173 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
1174
1175 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
1176 /*
1177 * Store size in multiple of 16 bytes. Control and Ethernet segments
1178 * count as 2.
1179 */
9f95a23c
TL
1180 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
1181 MLX5_WQE_DS(size));
7c673cae 1182 mpw->state = MLX5_MPW_STATE_CLOSED;
9f95a23c 1183 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG);
7c673cae
FG
1184 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
1185}
1186
1187/**
1188 * DPDK callback for TX with MPW inline support.
1189 *
1190 * @param dpdk_txq
1191 * Generic pointer to TX queue structure.
1192 * @param[in] pkts
1193 * Packets to transmit.
1194 * @param pkts_n
1195 * Number of packets in array.
1196 *
1197 * @return
1198 * Number of packets successfully transmitted (<= pkts_n).
1199 */
1200uint16_t
1201mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
1202 uint16_t pkts_n)
1203{
9f95a23c 1204 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
7c673cae 1205 uint16_t elts_head = txq->elts_head;
9f95a23c
TL
1206 const uint16_t elts_n = 1 << txq->elts_n;
1207 const uint16_t elts_m = elts_n - 1;
7c673cae
FG
1208 unsigned int i = 0;
1209 unsigned int j = 0;
9f95a23c 1210 uint16_t max_elts;
11fdf7f2 1211 uint16_t max_wqe;
7c673cae
FG
1212 unsigned int comp;
1213 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
1214 struct mlx5_mpw mpw = {
1215 .state = MLX5_MPW_STATE_CLOSED,
1216 };
11fdf7f2
TL
1217 /*
1218 * Compute the maximum number of WQE which can be consumed by inline
1219 * code.
1220 * - 2 DSEG for:
1221 * - 1 control segment,
1222 * - 1 Ethernet segment,
1223 * - N Dseg from the inline request.
1224 */
1225 const unsigned int wqe_inl_n =
1226 ((2 * MLX5_WQE_DWORD_SIZE +
1227 txq->max_inline * RTE_CACHE_LINE_SIZE) +
1228 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE;
7c673cae
FG
1229
1230 if (unlikely(!pkts_n))
1231 return 0;
1232 /* Prefetch first packet cacheline. */
11fdf7f2
TL
1233 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
1234 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
7c673cae 1235 /* Start processing. */
9f95a23c
TL
1236 mlx5_tx_complete(txq);
1237 max_elts = (elts_n - (elts_head - txq->elts_tail));
7c673cae
FG
1238 do {
1239 struct rte_mbuf *buf = *(pkts++);
7c673cae
FG
1240 uintptr_t addr;
1241 uint32_t length;
1242 unsigned int segs_n = buf->nb_segs;
9f95a23c
TL
1243 uint8_t cs_flags;
1244 rte_be32_t metadata;
7c673cae
FG
1245
1246 /*
1247 * Make sure there is enough room to store this packet and
1248 * that one ring entry remains unused.
1249 */
1250 assert(segs_n);
9f95a23c 1251 if (max_elts < segs_n)
7c673cae
FG
1252 break;
1253 /* Do not bother with large packets MPW cannot handle. */
9f95a23c
TL
1254 if (segs_n > MLX5_MPW_DSEG_MAX) {
1255 txq->stats.oerrors++;
7c673cae 1256 break;
9f95a23c
TL
1257 }
1258 max_elts -= segs_n;
7c673cae 1259 --pkts_n;
11fdf7f2
TL
1260 /*
1261 * Compute max_wqe in case less WQE were consumed in previous
1262 * iteration.
1263 */
1264 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
9f95a23c
TL
1265 cs_flags = txq_ol_cksum_to_cs(buf);
1266 /* Copy metadata from mbuf if valid */
1267 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1268 0;
7c673cae
FG
1269 /* Retrieve packet information. */
1270 length = PKT_LEN(buf);
1271 /* Start new session if packet differs. */
1272 if (mpw.state == MLX5_MPW_STATE_OPENED) {
1273 if ((mpw.len != length) ||
1274 (segs_n != 1) ||
9f95a23c 1275 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
7c673cae
FG
1276 (mpw.wqe->eseg.cs_flags != cs_flags))
1277 mlx5_mpw_close(txq, &mpw);
1278 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
1279 if ((mpw.len != length) ||
1280 (segs_n != 1) ||
1281 (length > inline_room) ||
9f95a23c 1282 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
7c673cae
FG
1283 (mpw.wqe->eseg.cs_flags != cs_flags)) {
1284 mlx5_mpw_inline_close(txq, &mpw);
1285 inline_room =
1286 txq->max_inline * RTE_CACHE_LINE_SIZE;
1287 }
1288 }
1289 if (mpw.state == MLX5_MPW_STATE_CLOSED) {
1290 if ((segs_n != 1) ||
1291 (length > inline_room)) {
11fdf7f2
TL
1292 /*
1293 * Multi-Packet WQE consumes at most two WQE.
1294 * mlx5_mpw_new() expects to be able to use
1295 * such resources.
1296 */
1297 if (unlikely(max_wqe < 2))
1298 break;
1299 max_wqe -= 2;
7c673cae
FG
1300 mlx5_mpw_new(txq, &mpw, length);
1301 mpw.wqe->eseg.cs_flags = cs_flags;
9f95a23c 1302 mpw.wqe->eseg.flow_table_metadata = metadata;
7c673cae 1303 } else {
11fdf7f2
TL
1304 if (unlikely(max_wqe < wqe_inl_n))
1305 break;
1306 max_wqe -= wqe_inl_n;
7c673cae
FG
1307 mlx5_mpw_inline_new(txq, &mpw, length);
1308 mpw.wqe->eseg.cs_flags = cs_flags;
9f95a23c 1309 mpw.wqe->eseg.flow_table_metadata = metadata;
7c673cae
FG
1310 }
1311 }
1312 /* Multi-segment packets must be alone in their MPW. */
1313 assert((segs_n == 1) || (mpw.pkts_n == 0));
1314 if (mpw.state == MLX5_MPW_STATE_OPENED) {
1315 assert(inline_room ==
1316 txq->max_inline * RTE_CACHE_LINE_SIZE);
1317#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1318 length = 0;
1319#endif
1320 do {
1321 volatile struct mlx5_wqe_data_seg *dseg;
1322
7c673cae 1323 assert(buf);
9f95a23c 1324 (*txq->elts)[elts_head++ & elts_m] = buf;
7c673cae
FG
1325 dseg = mpw.data.dseg[mpw.pkts_n];
1326 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1327 *dseg = (struct mlx5_wqe_data_seg){
9f95a23c
TL
1328 .byte_count =
1329 rte_cpu_to_be_32(DATA_LEN(buf)),
1330 .lkey = mlx5_tx_mb2mr(txq, buf),
1331 .addr = rte_cpu_to_be_64(addr),
7c673cae 1332 };
7c673cae
FG
1333#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
1334 length += DATA_LEN(buf);
1335#endif
1336 buf = buf->next;
1337 ++mpw.pkts_n;
1338 ++j;
1339 } while (--segs_n);
1340 assert(length == mpw.len);
1341 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
1342 mlx5_mpw_close(txq, &mpw);
1343 } else {
1344 unsigned int max;
1345
1346 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
1347 assert(length <= inline_room);
1348 assert(length == DATA_LEN(buf));
7c673cae 1349 addr = rte_pktmbuf_mtod(buf, uintptr_t);
9f95a23c 1350 (*txq->elts)[elts_head++ & elts_m] = buf;
7c673cae 1351 /* Maximum number of bytes before wrapping. */
11fdf7f2
TL
1352 max = ((((uintptr_t)(txq->wqes)) +
1353 (1 << txq->wqe_n) *
1354 MLX5_WQE_SIZE) -
7c673cae
FG
1355 (uintptr_t)mpw.data.raw);
1356 if (length > max) {
1357 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1358 (void *)addr,
1359 max);
11fdf7f2 1360 mpw.data.raw = (volatile void *)txq->wqes;
7c673cae
FG
1361 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1362 (void *)(addr + max),
1363 length - max);
1364 mpw.data.raw += length - max;
1365 } else {
1366 rte_memcpy((void *)(uintptr_t)mpw.data.raw,
1367 (void *)addr,
1368 length);
11fdf7f2
TL
1369
1370 if (length == max)
1371 mpw.data.raw =
1372 (volatile void *)txq->wqes;
1373 else
1374 mpw.data.raw += length;
7c673cae 1375 }
7c673cae 1376 ++mpw.pkts_n;
11fdf7f2 1377 mpw.total_len += length;
7c673cae
FG
1378 ++j;
1379 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
1380 mlx5_mpw_inline_close(txq, &mpw);
1381 inline_room =
1382 txq->max_inline * RTE_CACHE_LINE_SIZE;
1383 } else {
1384 inline_room -= length;
1385 }
1386 }
7c673cae
FG
1387#ifdef MLX5_PMD_SOFT_COUNTERS
1388 /* Increment sent bytes counter. */
1389 txq->stats.obytes += length;
1390#endif
1391 ++i;
1392 } while (pkts_n);
1393 /* Take a shortcut if nothing must be sent. */
1394 if (unlikely(i == 0))
1395 return 0;
1396 /* Check whether completion threshold has been reached. */
1397 /* "j" includes both packets and segments. */
1398 comp = txq->elts_comp + j;
1399 if (comp >= MLX5_TX_COMP_THRESH) {
1400 volatile struct mlx5_wqe *wqe = mpw.wqe;
1401
9f95a23c
TL
1402 /* A CQE slot must always be available. */
1403 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
7c673cae 1404 /* Request completion on last WQE. */
9f95a23c 1405 wqe->ctrl[2] = rte_cpu_to_be_32(8);
7c673cae
FG
1406 /* Save elts_head in unused "immediate" field of WQE. */
1407 wqe->ctrl[3] = elts_head;
1408 txq->elts_comp = 0;
1409 } else {
1410 txq->elts_comp = comp;
1411 }
1412#ifdef MLX5_PMD_SOFT_COUNTERS
1413 /* Increment sent packets counter. */
1414 txq->stats.opackets += i;
1415#endif
1416 /* Ring QP doorbell. */
1417 if (mpw.state == MLX5_MPW_INL_STATE_OPENED)
1418 mlx5_mpw_inline_close(txq, &mpw);
1419 else if (mpw.state == MLX5_MPW_STATE_OPENED)
1420 mlx5_mpw_close(txq, &mpw);
11fdf7f2
TL
1421 mlx5_tx_dbrec(txq, mpw.wqe);
1422 txq->elts_head = elts_head;
1423 return i;
1424}
1425
1426/**
1427 * Open an Enhanced MPW session.
1428 *
1429 * @param txq
1430 * Pointer to TX queue structure.
1431 * @param mpw
1432 * Pointer to MPW session structure.
1433 * @param length
1434 * Packet length.
1435 */
1436static inline void
9f95a23c 1437mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
11fdf7f2
TL
1438{
1439 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
1440
1441 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED;
1442 mpw->pkts_n = 0;
1443 mpw->total_len = sizeof(struct mlx5_wqe);
1444 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
9f95a23c
TL
1445 mpw->wqe->ctrl[0] =
1446 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
1447 (txq->wqe_ci << 8) |
1448 MLX5_OPCODE_ENHANCED_MPSW);
11fdf7f2
TL
1449 mpw->wqe->ctrl[2] = 0;
1450 mpw->wqe->ctrl[3] = 0;
1451 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
1452 if (unlikely(padding)) {
1453 uintptr_t addr = (uintptr_t)(mpw->wqe + 1);
1454
1455 /* Pad the first 2 DWORDs with zero-length inline header. */
9f95a23c 1456 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG);
11fdf7f2 1457 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =
9f95a23c 1458 rte_cpu_to_be_32(MLX5_INLINE_SEG);
11fdf7f2
TL
1459 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;
1460 /* Start from the next WQEBB. */
1461 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));
1462 } else {
1463 mpw->data.raw = (volatile void *)(mpw->wqe + 1);
1464 }
1465}
1466
1467/**
1468 * Close an Enhanced MPW session.
1469 *
1470 * @param txq
1471 * Pointer to TX queue structure.
1472 * @param mpw
1473 * Pointer to MPW session structure.
1474 *
1475 * @return
1476 * Number of consumed WQEs.
1477 */
1478static inline uint16_t
9f95a23c 1479mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
11fdf7f2
TL
1480{
1481 uint16_t ret;
1482
1483 /* Store size in multiple of 16 bytes. Control and Ethernet segments
1484 * count as 2.
1485 */
9f95a23c
TL
1486 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
1487 MLX5_WQE_DS(mpw->total_len));
11fdf7f2
TL
1488 mpw->state = MLX5_MPW_STATE_CLOSED;
1489 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
1490 txq->wqe_ci += ret;
1491 return ret;
1492}
1493
1494/**
9f95a23c 1495 * TX with Enhanced MPW support.
11fdf7f2 1496 *
9f95a23c
TL
1497 * @param txq
1498 * Pointer to TX queue structure.
11fdf7f2
TL
1499 * @param[in] pkts
1500 * Packets to transmit.
1501 * @param pkts_n
1502 * Number of packets in array.
1503 *
1504 * @return
1505 * Number of packets successfully transmitted (<= pkts_n).
1506 */
9f95a23c
TL
1507static inline uint16_t
1508txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
1509 uint16_t pkts_n)
11fdf7f2 1510{
11fdf7f2 1511 uint16_t elts_head = txq->elts_head;
9f95a23c
TL
1512 const uint16_t elts_n = 1 << txq->elts_n;
1513 const uint16_t elts_m = elts_n - 1;
11fdf7f2
TL
1514 unsigned int i = 0;
1515 unsigned int j = 0;
9f95a23c 1516 uint16_t max_elts;
11fdf7f2
TL
1517 uint16_t max_wqe;
1518 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
1519 unsigned int mpw_room = 0;
1520 unsigned int inl_pad = 0;
1521 uint32_t inl_hdr;
9f95a23c 1522 uint64_t addr_64;
11fdf7f2
TL
1523 struct mlx5_mpw mpw = {
1524 .state = MLX5_MPW_STATE_CLOSED,
1525 };
1526
1527 if (unlikely(!pkts_n))
1528 return 0;
1529 /* Start processing. */
9f95a23c 1530 mlx5_tx_complete(txq);
11fdf7f2 1531 max_elts = (elts_n - (elts_head - txq->elts_tail));
11fdf7f2
TL
1532 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
1533 if (unlikely(!max_wqe))
1534 return 0;
1535 do {
1536 struct rte_mbuf *buf = *(pkts++);
11fdf7f2 1537 uintptr_t addr;
11fdf7f2
TL
1538 unsigned int do_inline = 0; /* Whether inline is possible. */
1539 uint32_t length;
9f95a23c
TL
1540 uint8_t cs_flags;
1541 rte_be32_t metadata;
11fdf7f2 1542
9f95a23c
TL
1543 /* Multi-segmented packet is handled in slow-path outside. */
1544 assert(NB_SEGS(buf) == 1);
1545 /* Make sure there is enough room to store this packet. */
1546 if (max_elts - j == 0)
11fdf7f2 1547 break;
9f95a23c
TL
1548 cs_flags = txq_ol_cksum_to_cs(buf);
1549 /* Copy metadata from mbuf if valid */
1550 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
1551 0;
11fdf7f2
TL
1552 /* Retrieve packet information. */
1553 length = PKT_LEN(buf);
1554 /* Start new session if:
1555 * - multi-segment packet
1556 * - no space left even for a dseg
1557 * - next packet can be inlined with a new WQE
1558 * - cs_flag differs
11fdf7f2
TL
1559 */
1560 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) {
9f95a23c
TL
1561 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) >
1562 mpw_room) ||
11fdf7f2
TL
1563 (length <= txq->inline_max_packet_sz &&
1564 inl_pad + sizeof(inl_hdr) + length >
9f95a23c
TL
1565 mpw_room) ||
1566 (mpw.wqe->eseg.flow_table_metadata != metadata) ||
11fdf7f2
TL
1567 (mpw.wqe->eseg.cs_flags != cs_flags))
1568 max_wqe -= mlx5_empw_close(txq, &mpw);
1569 }
1570 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) {
9f95a23c
TL
1571 /* In Enhanced MPW, inline as much as the budget is
1572 * allowed. The remaining space is to be filled with
1573 * dsegs. If the title WQEBB isn't padded, it will have
1574 * 2 dsegs there.
1575 */
1576 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX,
1577 (max_inline ? max_inline :
1578 pkts_n * MLX5_WQE_DWORD_SIZE) +
1579 MLX5_WQE_SIZE);
1580 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room))
1581 break;
1582 /* Don't pad the title WQEBB to not waste WQ. */
1583 mlx5_empw_new(txq, &mpw, 0);
1584 mpw_room -= mpw.total_len;
1585 inl_pad = 0;
1586 do_inline = length <= txq->inline_max_packet_sz &&
1587 sizeof(inl_hdr) + length <= mpw_room &&
1588 !txq->mpw_hdr_dseg;
11fdf7f2 1589 mpw.wqe->eseg.cs_flags = cs_flags;
9f95a23c 1590 mpw.wqe->eseg.flow_table_metadata = metadata;
11fdf7f2
TL
1591 } else {
1592 /* Evaluate whether the next packet can be inlined.
1593 * Inlininig is possible when:
1594 * - length is less than configured value
1595 * - length fits for remaining space
1596 * - not required to fill the title WQEBB with dsegs
1597 */
1598 do_inline =
1599 length <= txq->inline_max_packet_sz &&
1600 inl_pad + sizeof(inl_hdr) + length <=
1601 mpw_room &&
1602 (!txq->mpw_hdr_dseg ||
1603 mpw.total_len >= MLX5_WQE_SIZE);
1604 }
9f95a23c 1605 if (max_inline && do_inline) {
11fdf7f2
TL
1606 /* Inline packet into WQE. */
1607 unsigned int max;
1608
1609 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
1610 assert(length == DATA_LEN(buf));
9f95a23c 1611 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG);
11fdf7f2
TL
1612 addr = rte_pktmbuf_mtod(buf, uintptr_t);
1613 mpw.data.raw = (volatile void *)
1614 ((uintptr_t)mpw.data.raw + inl_pad);
1615 max = tx_mlx5_wq_tailroom(txq,
1616 (void *)(uintptr_t)mpw.data.raw);
1617 /* Copy inline header. */
1618 mpw.data.raw = (volatile void *)
1619 mlx5_copy_to_wq(
1620 (void *)(uintptr_t)mpw.data.raw,
1621 &inl_hdr,
1622 sizeof(inl_hdr),
1623 (void *)(uintptr_t)txq->wqes,
1624 max);
1625 max = tx_mlx5_wq_tailroom(txq,
1626 (void *)(uintptr_t)mpw.data.raw);
1627 /* Copy packet data. */
1628 mpw.data.raw = (volatile void *)
1629 mlx5_copy_to_wq(
1630 (void *)(uintptr_t)mpw.data.raw,
1631 (void *)addr,
1632 length,
1633 (void *)(uintptr_t)txq->wqes,
1634 max);
1635 ++mpw.pkts_n;
1636 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length);
1637 /* No need to get completion as the entire packet is
1638 * copied to WQ. Free the buf right away.
1639 */
11fdf7f2
TL
1640 rte_pktmbuf_free_seg(buf);
1641 mpw_room -= (inl_pad + sizeof(inl_hdr) + length);
1642 /* Add pad in the next packet if any. */
1643 inl_pad = (((uintptr_t)mpw.data.raw +
1644 (MLX5_WQE_DWORD_SIZE - 1)) &
1645 ~(MLX5_WQE_DWORD_SIZE - 1)) -
1646 (uintptr_t)mpw.data.raw;
1647 } else {
1648 /* No inline. Load a dseg of packet pointer. */
1649 volatile rte_v128u32_t *dseg;
1650
1651 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
1652 assert((inl_pad + sizeof(*dseg)) <= mpw_room);
1653 assert(length == DATA_LEN(buf));
1654 if (!tx_mlx5_wq_tailroom(txq,
1655 (void *)((uintptr_t)mpw.data.raw
1656 + inl_pad)))
1657 dseg = (volatile void *)txq->wqes;
1658 else
1659 dseg = (volatile void *)
1660 ((uintptr_t)mpw.data.raw +
1661 inl_pad);
9f95a23c
TL
1662 (*txq->elts)[elts_head++ & elts_m] = buf;
1663 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
1664 uintptr_t));
11fdf7f2 1665 *dseg = (rte_v128u32_t) {
9f95a23c
TL
1666 rte_cpu_to_be_32(length),
1667 mlx5_tx_mb2mr(txq, buf),
1668 addr_64,
1669 addr_64 >> 32,
11fdf7f2
TL
1670 };
1671 mpw.data.raw = (volatile void *)(dseg + 1);
1672 mpw.total_len += (inl_pad + sizeof(*dseg));
1673 ++j;
1674 ++mpw.pkts_n;
1675 mpw_room -= (inl_pad + sizeof(*dseg));
1676 inl_pad = 0;
1677 }
11fdf7f2
TL
1678#ifdef MLX5_PMD_SOFT_COUNTERS
1679 /* Increment sent bytes counter. */
1680 txq->stats.obytes += length;
1681#endif
1682 ++i;
1683 } while (i < pkts_n);
1684 /* Take a shortcut if nothing must be sent. */
1685 if (unlikely(i == 0))
1686 return 0;
1687 /* Check whether completion threshold has been reached. */
1688 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH ||
1689 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >=
1690 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) {
1691 volatile struct mlx5_wqe *wqe = mpw.wqe;
1692
9f95a23c
TL
1693 /* A CQE slot must always be available. */
1694 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
11fdf7f2 1695 /* Request completion on last WQE. */
9f95a23c 1696 wqe->ctrl[2] = rte_cpu_to_be_32(8);
11fdf7f2
TL
1697 /* Save elts_head in unused "immediate" field of WQE. */
1698 wqe->ctrl[3] = elts_head;
1699 txq->elts_comp = 0;
1700 txq->mpw_comp = txq->wqe_ci;
11fdf7f2
TL
1701 } else {
1702 txq->elts_comp += j;
1703 }
1704#ifdef MLX5_PMD_SOFT_COUNTERS
1705 /* Increment sent packets counter. */
1706 txq->stats.opackets += i;
1707#endif
1708 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED)
1709 mlx5_empw_close(txq, &mpw);
11fdf7f2
TL
1710 /* Ring QP doorbell. */
1711 mlx5_tx_dbrec(txq, mpw.wqe);
7c673cae
FG
1712 txq->elts_head = elts_head;
1713 return i;
1714}
1715
9f95a23c
TL
1716/**
1717 * DPDK callback for TX with Enhanced MPW support.
1718 *
1719 * @param dpdk_txq
1720 * Generic pointer to TX queue structure.
1721 * @param[in] pkts
1722 * Packets to transmit.
1723 * @param pkts_n
1724 * Number of packets in array.
1725 *
1726 * @return
1727 * Number of packets successfully transmitted (<= pkts_n).
1728 */
1729uint16_t
1730mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
1731{
1732 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
1733 uint16_t nb_tx = 0;
1734
1735 while (pkts_n > nb_tx) {
1736 uint16_t n;
1737 uint16_t ret;
1738
1739 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx);
1740 if (n) {
1741 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n);
1742 if (!ret)
1743 break;
1744 nb_tx += ret;
1745 }
1746 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx);
1747 if (n) {
1748 ret = txq_burst_empw(txq, &pkts[nb_tx], n);
1749 if (!ret)
1750 break;
1751 nb_tx += ret;
1752 }
1753 }
1754 return nb_tx;
1755}
1756
7c673cae
FG
1757/**
1758 * Translate RX completion flags to packet type.
1759 *
9f95a23c
TL
1760 * @param[in] rxq
1761 * Pointer to RX queue structure.
7c673cae
FG
1762 * @param[in] cqe
1763 * Pointer to CQE.
1764 *
1765 * @note: fix mlx5_dev_supported_ptypes_get() if any change here.
1766 *
1767 * @return
1768 * Packet type for struct rte_mbuf.
1769 */
1770static inline uint32_t
9f95a23c 1771rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
7c673cae 1772{
9f95a23c
TL
1773 uint8_t idx;
1774 uint8_t pinfo = cqe->pkt_info;
1775 uint16_t ptype = cqe->hdr_type_etc;
1776
1777 /*
1778 * The index to the array should have:
1779 * bit[1:0] = l3_hdr_type
1780 * bit[4:2] = l4_hdr_type
1781 * bit[5] = ip_frag
1782 * bit[6] = tunneled
1783 * bit[7] = outer_l3_type
1784 */
1785 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10);
1786 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6));
7c673cae
FG
1787}
1788
1789/**
1790 * Get size of the next packet for a given CQE. For compressed CQEs, the
1791 * consumer index is updated only once all packets of the current one have
1792 * been processed.
1793 *
1794 * @param rxq
1795 * Pointer to RX queue.
1796 * @param cqe
1797 * CQE to process.
9f95a23c
TL
1798 * @param[out] mcqe
1799 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not
1800 * written.
7c673cae
FG
1801 *
1802 * @return
1803 * Packet size in bytes (0 if there is none), -1 in case of completion
1804 * with error.
1805 */
1806static inline int
9f95a23c
TL
1807mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
1808 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe)
7c673cae
FG
1809{
1810 struct rxq_zip *zip = &rxq->zip;
1811 uint16_t cqe_n = cqe_cnt + 1;
1812 int len = 0;
11fdf7f2 1813 uint16_t idx, end;
7c673cae
FG
1814
1815 /* Process compressed data in the CQE and mini arrays. */
1816 if (zip->ai) {
1817 volatile struct mlx5_mini_cqe8 (*mc)[8] =
1818 (volatile struct mlx5_mini_cqe8 (*)[8])
9f95a23c 1819 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
7c673cae 1820
9f95a23c
TL
1821 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
1822 *mcqe = &(*mc)[zip->ai & 7];
7c673cae 1823 if ((++zip->ai & 7) == 0) {
11fdf7f2
TL
1824 /* Invalidate consumed CQEs */
1825 idx = zip->ca;
1826 end = zip->na;
1827 while (idx != end) {
1828 (*rxq->cqes)[idx & cqe_cnt].op_own =
1829 MLX5_CQE_INVALIDATE;
1830 ++idx;
1831 }
7c673cae
FG
1832 /*
1833 * Increment consumer index to skip the number of
1834 * CQEs consumed. Hardware leaves holes in the CQ
1835 * ring for software use.
1836 */
1837 zip->ca = zip->na;
1838 zip->na += 8;
1839 }
1840 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
11fdf7f2
TL
1841 /* Invalidate the rest */
1842 idx = zip->ca;
1843 end = zip->cq_ci;
7c673cae
FG
1844
1845 while (idx != end) {
1846 (*rxq->cqes)[idx & cqe_cnt].op_own =
1847 MLX5_CQE_INVALIDATE;
1848 ++idx;
1849 }
1850 rxq->cq_ci = zip->cq_ci;
1851 zip->ai = 0;
1852 }
1853 /* No compressed data, get next CQE and verify if it is compressed. */
1854 } else {
1855 int ret;
1856 int8_t op_own;
1857
1858 ret = check_cqe(cqe, cqe_n, rxq->cq_ci);
1859 if (unlikely(ret == 1))
1860 return 0;
1861 ++rxq->cq_ci;
1862 op_own = cqe->op_own;
9f95a23c 1863 rte_cio_rmb();
7c673cae
FG
1864 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
1865 volatile struct mlx5_mini_cqe8 (*mc)[8] =
1866 (volatile struct mlx5_mini_cqe8 (*)[8])
1867 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
9f95a23c 1868 cqe_cnt].pkt_info);
7c673cae
FG
1869
1870 /* Fix endianness. */
9f95a23c 1871 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
7c673cae
FG
1872 /*
1873 * Current mini array position is the one returned by
1874 * check_cqe64().
1875 *
1876 * If completion comprises several mini arrays, as a
1877 * special case the second one is located 7 CQEs after
1878 * the initial CQE instead of 8 for subsequent ones.
1879 */
11fdf7f2 1880 zip->ca = rxq->cq_ci;
7c673cae
FG
1881 zip->na = zip->ca + 7;
1882 /* Compute the next non compressed CQE. */
1883 --rxq->cq_ci;
1884 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
1885 /* Get packet size to return. */
9f95a23c
TL
1886 len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
1887 *mcqe = &(*mc)[0];
7c673cae 1888 zip->ai = 1;
11fdf7f2
TL
1889 /* Prefetch all the entries to be invalidated */
1890 idx = zip->ca;
1891 end = zip->cq_ci;
1892 while (idx != end) {
1893 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]);
1894 ++idx;
1895 }
7c673cae 1896 } else {
9f95a23c 1897 len = rte_be_to_cpu_32(cqe->byte_cnt);
7c673cae
FG
1898 }
1899 /* Error while receiving packet. */
1900 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
1901 return -1;
1902 }
1903 return len;
1904}
1905
1906/**
1907 * Translate RX completion flags to offload flags.
1908 *
7c673cae
FG
1909 * @param[in] cqe
1910 * Pointer to CQE.
1911 *
1912 * @return
1913 * Offload flags (ol_flags) for struct rte_mbuf.
1914 */
1915static inline uint32_t
9f95a23c 1916rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe)
7c673cae
FG
1917{
1918 uint32_t ol_flags = 0;
9f95a23c 1919 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
11fdf7f2
TL
1920
1921 ol_flags =
1922 TRANSPOSE(flags,
1923 MLX5_CQE_RX_L3_HDR_VALID,
1924 PKT_RX_IP_CKSUM_GOOD) |
1925 TRANSPOSE(flags,
1926 MLX5_CQE_RX_L4_HDR_VALID,
1927 PKT_RX_L4_CKSUM_GOOD);
7c673cae
FG
1928 return ol_flags;
1929}
1930
9f95a23c
TL
1931/**
1932 * Fill in mbuf fields from RX completion flags.
1933 * Note that pkt->ol_flags should be initialized outside of this function.
1934 *
1935 * @param rxq
1936 * Pointer to RX queue.
1937 * @param pkt
1938 * mbuf to fill.
1939 * @param cqe
1940 * CQE to process.
1941 * @param rss_hash_res
1942 * Packet RSS Hash result.
1943 */
1944static inline void
1945rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
1946 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res)
1947{
1948 /* Update packet information. */
1949 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe);
1950 if (rss_hash_res && rxq->rss_hash) {
1951 pkt->hash.rss = rss_hash_res;
1952 pkt->ol_flags |= PKT_RX_RSS_HASH;
1953 }
1954 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) {
1955 pkt->ol_flags |= PKT_RX_FDIR;
1956 if (cqe->sop_drop_qpn !=
1957 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) {
1958 uint32_t mark = cqe->sop_drop_qpn;
1959
1960 pkt->ol_flags |= PKT_RX_FDIR_ID;
1961 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark);
1962 }
1963 }
1964 if (rxq->csum)
1965 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe);
1966 if (rxq->vlan_strip &&
1967 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) {
1968 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
1969 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info);
1970 }
1971 if (rxq->hw_timestamp) {
1972 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp);
1973 pkt->ol_flags |= PKT_RX_TIMESTAMP;
1974 }
1975}
1976
7c673cae
FG
1977/**
1978 * DPDK callback for RX.
1979 *
1980 * @param dpdk_rxq
1981 * Generic pointer to RX queue structure.
1982 * @param[out] pkts
1983 * Array to store received packets.
1984 * @param pkts_n
1985 * Maximum number of packets in array.
1986 *
1987 * @return
1988 * Number of packets successfully received (<= pkts_n).
1989 */
1990uint16_t
1991mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
1992{
9f95a23c 1993 struct mlx5_rxq_data *rxq = dpdk_rxq;
7c673cae
FG
1994 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
1995 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
1996 const unsigned int sges_n = rxq->sges_n;
1997 struct rte_mbuf *pkt = NULL;
1998 struct rte_mbuf *seg = NULL;
1999 volatile struct mlx5_cqe *cqe =
2000 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
2001 unsigned int i = 0;
2002 unsigned int rq_ci = rxq->rq_ci << sges_n;
11fdf7f2 2003 int len = 0; /* keep its value across iterations. */
7c673cae
FG
2004
2005 while (pkts_n) {
2006 unsigned int idx = rq_ci & wqe_cnt;
9f95a23c
TL
2007 volatile struct mlx5_wqe_data_seg *wqe =
2008 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx];
7c673cae 2009 struct rte_mbuf *rep = (*rxq->elts)[idx];
9f95a23c
TL
2010 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
2011 uint32_t rss_hash_res;
7c673cae
FG
2012
2013 if (pkt)
2014 NEXT(seg) = rep;
2015 seg = rep;
2016 rte_prefetch0(seg);
2017 rte_prefetch0(cqe);
2018 rte_prefetch0(wqe);
2019 rep = rte_mbuf_raw_alloc(rxq->mp);
2020 if (unlikely(rep == NULL)) {
2021 ++rxq->stats.rx_nombuf;
2022 if (!pkt) {
2023 /*
2024 * no buffers before we even started,
2025 * bail out silently.
2026 */
2027 break;
2028 }
2029 while (pkt != seg) {
2030 assert(pkt != (*rxq->elts)[idx]);
11fdf7f2
TL
2031 rep = NEXT(pkt);
2032 NEXT(pkt) = NULL;
2033 NB_SEGS(pkt) = 1;
2034 rte_mbuf_raw_free(pkt);
2035 pkt = rep;
7c673cae
FG
2036 }
2037 break;
2038 }
2039 if (!pkt) {
2040 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt];
9f95a23c 2041 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe);
7c673cae 2042 if (!len) {
11fdf7f2 2043 rte_mbuf_raw_free(rep);
7c673cae
FG
2044 break;
2045 }
2046 if (unlikely(len == -1)) {
2047 /* RX error, packet is likely too large. */
11fdf7f2 2048 rte_mbuf_raw_free(rep);
7c673cae
FG
2049 ++rxq->stats.idropped;
2050 goto skip;
2051 }
2052 pkt = seg;
2053 assert(len >= (rxq->crc_present << 2));
7c673cae 2054 pkt->ol_flags = 0;
9f95a23c
TL
2055 /* If compressed, take hash result from mini-CQE. */
2056 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ?
2057 cqe->rx_hash_res :
2058 mcqe->rx_hash_result);
2059 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
11fdf7f2
TL
2060 if (rxq->crc_present)
2061 len -= ETHER_CRC_LEN;
7c673cae
FG
2062 PKT_LEN(pkt) = len;
2063 }
2064 DATA_LEN(rep) = DATA_LEN(seg);
2065 PKT_LEN(rep) = PKT_LEN(seg);
2066 SET_DATA_OFF(rep, DATA_OFF(seg));
7c673cae 2067 PORT(rep) = PORT(seg);
7c673cae
FG
2068 (*rxq->elts)[idx] = rep;
2069 /*
2070 * Fill NIC descriptor with the new buffer. The lkey and size
2071 * of the buffers are already known, only the buffer address
2072 * changes.
2073 */
9f95a23c
TL
2074 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
2075 /* If there's only one MR, no need to replace LKey in WQE. */
2076 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
2077 wqe->lkey = mlx5_rx_mb2mr(rxq, rep);
7c673cae
FG
2078 if (len > DATA_LEN(seg)) {
2079 len -= DATA_LEN(seg);
2080 ++NB_SEGS(pkt);
2081 ++rq_ci;
2082 continue;
2083 }
2084 DATA_LEN(seg) = len;
2085#ifdef MLX5_PMD_SOFT_COUNTERS
2086 /* Increment bytes counter. */
2087 rxq->stats.ibytes += PKT_LEN(pkt);
2088#endif
2089 /* Return packet. */
2090 *(pkts++) = pkt;
2091 pkt = NULL;
2092 --pkts_n;
2093 ++i;
2094skip:
2095 /* Align consumer index to the next stride. */
2096 rq_ci >>= sges_n;
2097 ++rq_ci;
2098 rq_ci <<= sges_n;
2099 }
2100 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
2101 return 0;
2102 /* Update the consumer index. */
2103 rxq->rq_ci = rq_ci >> sges_n;
9f95a23c
TL
2104 rte_cio_wmb();
2105 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2106 rte_cio_wmb();
2107 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2108#ifdef MLX5_PMD_SOFT_COUNTERS
2109 /* Increment packets counter. */
2110 rxq->stats.ipackets += i;
2111#endif
2112 return i;
2113}
2114
2115void
2116mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque)
2117{
2118 struct mlx5_mprq_buf *buf = opaque;
2119
2120 if (rte_atomic16_read(&buf->refcnt) == 1) {
2121 rte_mempool_put(buf->mp, buf);
2122 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) {
2123 rte_atomic16_set(&buf->refcnt, 1);
2124 rte_mempool_put(buf->mp, buf);
2125 }
2126}
2127
2128void
2129mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
2130{
2131 mlx5_mprq_buf_free_cb(NULL, buf);
2132}
2133
2134static inline void
2135mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
2136{
2137 struct mlx5_mprq_buf *rep = rxq->mprq_repl;
2138 volatile struct mlx5_wqe_data_seg *wqe =
2139 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
2140 void *addr;
2141
2142 assert(rep != NULL);
2143 /* Replace MPRQ buf. */
2144 (*rxq->mprq_bufs)[rq_idx] = rep;
2145 /* Replace WQE. */
2146 addr = mlx5_mprq_buf_addr(rep);
2147 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
2148 /* If there's only one MR, no need to replace LKey in WQE. */
2149 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
2150 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
2151 /* Stash a mbuf for next replacement. */
2152 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
2153 rxq->mprq_repl = rep;
2154 else
2155 rxq->mprq_repl = NULL;
2156}
2157
2158/**
2159 * DPDK callback for RX with Multi-Packet RQ support.
2160 *
2161 * @param dpdk_rxq
2162 * Generic pointer to RX queue structure.
2163 * @param[out] pkts
2164 * Array to store received packets.
2165 * @param pkts_n
2166 * Maximum number of packets in array.
2167 *
2168 * @return
2169 * Number of packets successfully received (<= pkts_n).
2170 */
2171uint16_t
2172mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
2173{
2174 struct mlx5_rxq_data *rxq = dpdk_rxq;
2175 const unsigned int strd_n = 1 << rxq->strd_num_n;
2176 const unsigned int strd_sz = 1 << rxq->strd_sz_n;
2177 const unsigned int strd_shift =
2178 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
2179 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
2180 const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
2181 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
2182 unsigned int i = 0;
2183 uint32_t rq_ci = rxq->rq_ci;
2184 uint16_t consumed_strd = rxq->consumed_strd;
2185 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
2186
2187 while (i < pkts_n) {
2188 struct rte_mbuf *pkt;
2189 void *addr;
2190 int ret;
2191 unsigned int len;
2192 uint16_t strd_cnt;
2193 uint16_t strd_idx;
2194 uint32_t offset;
2195 uint32_t byte_cnt;
2196 volatile struct mlx5_mini_cqe8 *mcqe = NULL;
2197 uint32_t rss_hash_res = 0;
2198
2199 if (consumed_strd == strd_n) {
2200 /* Replace WQE only if the buffer is still in use. */
2201 if (rte_atomic16_read(&buf->refcnt) > 1) {
2202 mprq_buf_replace(rxq, rq_ci & wq_mask);
2203 /* Release the old buffer. */
2204 mlx5_mprq_buf_free(buf);
2205 } else if (unlikely(rxq->mprq_repl == NULL)) {
2206 struct mlx5_mprq_buf *rep;
2207
2208 /*
2209 * Currently, the MPRQ mempool is out of buffer
2210 * and doing memcpy regardless of the size of Rx
2211 * packet. Retry allocation to get back to
2212 * normal.
2213 */
2214 if (!rte_mempool_get(rxq->mprq_mp,
2215 (void **)&rep))
2216 rxq->mprq_repl = rep;
2217 }
2218 /* Advance to the next WQE. */
2219 consumed_strd = 0;
2220 ++rq_ci;
2221 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
2222 }
2223 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
2224 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe);
2225 if (!ret)
2226 break;
2227 if (unlikely(ret == -1)) {
2228 /* RX error, packet is likely too large. */
2229 ++rxq->stats.idropped;
2230 continue;
2231 }
2232 byte_cnt = ret;
2233 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >>
2234 MLX5_MPRQ_STRIDE_NUM_SHIFT;
2235 assert(strd_cnt);
2236 consumed_strd += strd_cnt;
2237 if (byte_cnt & MLX5_MPRQ_FILLER_MASK)
2238 continue;
2239 if (mcqe == NULL) {
2240 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res);
2241 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter);
2242 } else {
2243 /* mini-CQE for MPRQ doesn't have hash result. */
2244 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx);
2245 }
2246 assert(strd_idx < strd_n);
2247 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask));
2248 /*
2249 * Currently configured to receive a packet per a stride. But if
2250 * MTU is adjusted through kernel interface, device could
2251 * consume multiple strides without raising an error. In this
2252 * case, the packet should be dropped because it is bigger than
2253 * the max_rx_pkt_len.
2254 */
2255 if (unlikely(strd_cnt > 1)) {
2256 ++rxq->stats.idropped;
2257 continue;
2258 }
2259 pkt = rte_pktmbuf_alloc(rxq->mp);
2260 if (unlikely(pkt == NULL)) {
2261 ++rxq->stats.rx_nombuf;
2262 break;
2263 }
2264 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT;
2265 assert((int)len >= (rxq->crc_present << 2));
2266 if (rxq->crc_present)
2267 len -= ETHER_CRC_LEN;
2268 offset = strd_idx * strd_sz + strd_shift;
2269 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset);
2270 /* Initialize the offload flag. */
2271 pkt->ol_flags = 0;
2272 /*
2273 * Memcpy packets to the target mbuf if:
2274 * - The size of packet is smaller than mprq_max_memcpy_len.
2275 * - Out of buffer in the Mempool for Multi-Packet RQ.
2276 */
2277 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) {
2278 /*
2279 * When memcpy'ing packet due to out-of-buffer, the
2280 * packet must be smaller than the target mbuf.
2281 */
2282 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
2283 rte_pktmbuf_free_seg(pkt);
2284 ++rxq->stats.idropped;
2285 continue;
2286 }
2287 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len);
2288 } else {
2289 rte_iova_t buf_iova;
2290 struct rte_mbuf_ext_shared_info *shinfo;
2291 uint16_t buf_len = strd_cnt * strd_sz;
2292
2293 /* Increment the refcnt of the whole chunk. */
2294 rte_atomic16_add_return(&buf->refcnt, 1);
2295 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <=
2296 strd_n + 1);
2297 addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
2298 /*
2299 * MLX5 device doesn't use iova but it is necessary in a
2300 * case where the Rx packet is transmitted via a
2301 * different PMD.
2302 */
2303 buf_iova = rte_mempool_virt2iova(buf) +
2304 RTE_PTR_DIFF(addr, buf);
2305 shinfo = rte_pktmbuf_ext_shinfo_init_helper(addr,
2306 &buf_len, mlx5_mprq_buf_free_cb, buf);
2307 /*
2308 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
2309 * attaching the stride to mbuf and more offload flags
2310 * will be added below by calling rxq_cq_to_mbuf().
2311 * Other fields will be overwritten.
2312 */
2313 rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len,
2314 shinfo);
2315 rte_pktmbuf_reset_headroom(pkt);
2316 assert(pkt->ol_flags == EXT_ATTACHED_MBUF);
2317 /*
2318 * Prevent potential overflow due to MTU change through
2319 * kernel interface.
2320 */
2321 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) {
2322 rte_pktmbuf_free_seg(pkt);
2323 ++rxq->stats.idropped;
2324 continue;
2325 }
2326 }
2327 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
2328 PKT_LEN(pkt) = len;
2329 DATA_LEN(pkt) = len;
2330 PORT(pkt) = rxq->port_id;
2331#ifdef MLX5_PMD_SOFT_COUNTERS
2332 /* Increment bytes counter. */
2333 rxq->stats.ibytes += PKT_LEN(pkt);
2334#endif
2335 /* Return packet. */
2336 *(pkts++) = pkt;
2337 ++i;
2338 }
2339 /* Update the consumer indexes. */
2340 rxq->consumed_strd = consumed_strd;
2341 rte_cio_wmb();
2342 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
2343 if (rq_ci != rxq->rq_ci) {
2344 rxq->rq_ci = rq_ci;
2345 rte_cio_wmb();
2346 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
2347 }
7c673cae
FG
2348#ifdef MLX5_PMD_SOFT_COUNTERS
2349 /* Increment packets counter. */
2350 rxq->stats.ipackets += i;
2351#endif
2352 return i;
2353}
2354
2355/**
2356 * Dummy DPDK callback for TX.
2357 *
2358 * This function is used to temporarily replace the real callback during
2359 * unsafe control operations on the queue, or in case of error.
2360 *
2361 * @param dpdk_txq
2362 * Generic pointer to TX queue structure.
2363 * @param[in] pkts
2364 * Packets to transmit.
2365 * @param pkts_n
2366 * Number of packets in array.
2367 *
2368 * @return
2369 * Number of packets successfully transmitted (<= pkts_n).
2370 */
2371uint16_t
9f95a23c
TL
2372removed_tx_burst(void *dpdk_txq __rte_unused,
2373 struct rte_mbuf **pkts __rte_unused,
2374 uint16_t pkts_n __rte_unused)
7c673cae 2375{
9f95a23c 2376 rte_mb();
7c673cae
FG
2377 return 0;
2378}
2379
2380/**
2381 * Dummy DPDK callback for RX.
2382 *
2383 * This function is used to temporarily replace the real callback during
2384 * unsafe control operations on the queue, or in case of error.
2385 *
2386 * @param dpdk_rxq
2387 * Generic pointer to RX queue structure.
2388 * @param[out] pkts
2389 * Array to store received packets.
2390 * @param pkts_n
2391 * Maximum number of packets in array.
2392 *
2393 * @return
2394 * Number of packets successfully received (<= pkts_n).
2395 */
2396uint16_t
9f95a23c
TL
2397removed_rx_burst(void *dpdk_txq __rte_unused,
2398 struct rte_mbuf **pkts __rte_unused,
2399 uint16_t pkts_n __rte_unused)
7c673cae 2400{
9f95a23c 2401 rte_mb();
7c673cae
FG
2402 return 0;
2403}
11fdf7f2 2404
9f95a23c
TL
2405/*
2406 * Vectorized Rx/Tx routines are not compiled in when required vector
2407 * instructions are not supported on a target architecture. The following null
2408 * stubs are needed for linkage when those are not included outside of this file
2409 * (e.g. mlx5_rxtx_vec_sse.c for x86).
11fdf7f2 2410 */
9f95a23c
TL
2411
2412__rte_weak uint16_t
2413mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
2414 struct rte_mbuf **pkts __rte_unused,
2415 uint16_t pkts_n __rte_unused)
11fdf7f2 2416{
9f95a23c 2417 return 0;
11fdf7f2
TL
2418}
2419
9f95a23c
TL
2420__rte_weak uint16_t
2421mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
2422 struct rte_mbuf **pkts __rte_unused,
2423 uint16_t pkts_n __rte_unused)
11fdf7f2 2424{
9f95a23c
TL
2425 return 0;
2426}
2427
2428__rte_weak uint16_t
2429mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
2430 struct rte_mbuf **pkts __rte_unused,
2431 uint16_t pkts_n __rte_unused)
2432{
2433 return 0;
2434}
2435
2436__rte_weak int
2437mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
2438{
2439 return -ENOTSUP;
2440}
2441
2442__rte_weak int
2443mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
2444{
2445 return -ENOTSUP;
2446}
2447
2448__rte_weak int
2449mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
2450{
2451 return -ENOTSUP;
2452}
2453
2454__rte_weak int
2455mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
2456{
2457 return -ENOTSUP;
11fdf7f2 2458}