1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2015 6WIND S.A.
3 * Copyright 2015 Mellanox Technologies, Ltd
6 #ifndef RTE_PMD_MLX5_RXTX_H_
7 #define RTE_PMD_MLX5_RXTX_H_
11 #include <sys/queue.h>
14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
16 #pragma GCC diagnostic ignored "-Wpedantic"
18 #include <infiniband/verbs.h>
19 #include <infiniband/mlx5dv.h>
21 #pragma GCC diagnostic error "-Wpedantic"
25 #include <rte_mempool.h>
26 #include <rte_common.h>
27 #include <rte_hexdump.h>
28 #include <rte_atomic.h>
29 #include <rte_spinlock.h>
32 #include "mlx5_utils.h"
35 #include "mlx5_autoconf.h"
36 #include "mlx5_defs.h"
39 /* Support tunnel matching. */
40 #define MLX5_FLOW_TUNNEL 5
42 struct mlx5_rxq_stats
{
43 unsigned int idx
; /**< Mapping index. */
44 #ifdef MLX5_PMD_SOFT_COUNTERS
45 uint64_t ipackets
; /**< Total of successfully received packets. */
46 uint64_t ibytes
; /**< Total of successfully received bytes. */
48 uint64_t idropped
; /**< Total of packets dropped when RX ring full. */
49 uint64_t rx_nombuf
; /**< Total of RX mbuf allocation failures. */
52 struct mlx5_txq_stats
{
53 unsigned int idx
; /**< Mapping index. */
54 #ifdef MLX5_PMD_SOFT_COUNTERS
55 uint64_t opackets
; /**< Total of successfully sent packets. */
56 uint64_t obytes
; /**< Total of successfully sent bytes. */
58 uint64_t oerrors
; /**< Total number of failed transmitted packets. */
63 /* Compressed CQE context. */
65 uint16_t ai
; /* Array index. */
66 uint16_t ca
; /* Current array index. */
67 uint16_t na
; /* Next array index. */
68 uint16_t cq_ci
; /* The next CQE. */
69 uint32_t cqe_cnt
; /* Number of CQEs. */
72 /* Multi-Packet RQ buffer header. */
73 struct mlx5_mprq_buf
{
74 struct rte_mempool
*mp
;
75 rte_atomic16_t refcnt
; /* Atomically accessed refcnt. */
76 uint8_t pad
[RTE_PKTMBUF_HEADROOM
]; /* Headroom for the first packet. */
77 } __rte_cache_aligned
;
79 /* Get pointer to the first stride. */
80 #define mlx5_mprq_buf_addr(ptr) ((ptr) + 1)
82 /* RX queue descriptor. */
83 struct mlx5_rxq_data
{
84 unsigned int csum
:1; /* Enable checksum offloading. */
85 unsigned int hw_timestamp
:1; /* Enable HW timestamp. */
86 unsigned int vlan_strip
:1; /* Enable VLAN stripping. */
87 unsigned int crc_present
:1; /* CRC must be subtracted. */
88 unsigned int sges_n
:2; /* Log 2 of SGEs (max buffers per packet). */
89 unsigned int cqe_n
:4; /* Log 2 of CQ elements. */
90 unsigned int elts_n
:4; /* Log 2 of Mbufs. */
91 unsigned int rss_hash
:1; /* RSS hash result is enabled. */
92 unsigned int mark
:1; /* Marked flow available on the queue. */
93 unsigned int strd_num_n
:5; /* Log 2 of the number of stride. */
94 unsigned int strd_sz_n
:4; /* Log 2 of stride size. */
95 unsigned int strd_shift_en
:1; /* Enable 2bytes shift on a stride. */
96 unsigned int :6; /* Remaining bits. */
97 volatile uint32_t *rq_db
;
98 volatile uint32_t *cq_db
;
101 uint16_t consumed_strd
; /* Number of consumed strides in WQE. */
104 struct mlx5_mr_ctrl mr_ctrl
; /* MR control descriptor. */
105 uint16_t mprq_max_memcpy_len
; /* Maximum size of packet to memcpy. */
107 volatile struct mlx5_cqe(*cqes
)[];
108 struct rxq_zip zip
; /* Compressed context. */
111 struct rte_mbuf
*(*elts
)[];
112 struct mlx5_mprq_buf
*(*mprq_bufs
)[];
114 struct rte_mempool
*mp
;
115 struct rte_mempool
*mprq_mp
; /* Mempool for Multi-Packet RQ. */
116 struct mlx5_mprq_buf
*mprq_repl
; /* Stashed mbuf for replenish. */
117 struct mlx5_rxq_stats stats
;
118 uint64_t mbuf_initializer
; /* Default rearm_data for vectorized Rx. */
119 struct rte_mbuf fake_mbuf
; /* elts padding for vectorized Rx. */
120 void *cq_uar
; /* CQ user access region. */
121 uint32_t cqn
; /* CQ number. */
122 uint8_t cq_arm_sn
; /* CQ arm seq number. */
124 rte_spinlock_t
*uar_lock_cq
;
125 /* CQ (UAR) access lock required for 32bit implementations */
127 uint32_t tunnel
; /* Tunnel information. */
128 } __rte_cache_aligned
;
130 /* Verbs Rx queue elements. */
131 struct mlx5_rxq_ibv
{
132 LIST_ENTRY(mlx5_rxq_ibv
) next
; /* Pointer to the next element. */
133 rte_atomic32_t refcnt
; /* Reference counter. */
134 struct mlx5_rxq_ctrl
*rxq_ctrl
; /* Back pointer to parent. */
135 struct ibv_cq
*cq
; /* Completion Queue. */
136 struct ibv_wq
*wq
; /* Work Queue. */
137 struct ibv_comp_channel
*channel
;
140 /* RX queue control descriptor. */
141 struct mlx5_rxq_ctrl
{
142 LIST_ENTRY(mlx5_rxq_ctrl
) next
; /* Pointer to the next element. */
143 rte_atomic32_t refcnt
; /* Reference counter. */
144 struct mlx5_rxq_ibv
*ibv
; /* Verbs elements. */
145 struct priv
*priv
; /* Back pointer to private data. */
146 struct mlx5_rxq_data rxq
; /* Data path structure. */
147 unsigned int socket
; /* CPU socket ID for allocations. */
148 unsigned int irq
:1; /* Whether IRQ is enabled. */
149 uint16_t idx
; /* Queue index. */
150 uint32_t flow_mark_n
; /* Number of Mark/Flag flows using this Queue. */
151 uint32_t flow_tunnels_n
[MLX5_FLOW_TUNNEL
]; /* Tunnels counters. */
154 /* Indirection table. */
155 struct mlx5_ind_table_ibv
{
156 LIST_ENTRY(mlx5_ind_table_ibv
) next
; /* Pointer to the next element. */
157 rte_atomic32_t refcnt
; /* Reference counter. */
158 struct ibv_rwq_ind_table
*ind_table
; /**< Indirection table. */
159 uint32_t queues_n
; /**< Number of queues in the list. */
160 uint16_t queues
[]; /**< Queue list. */
165 LIST_ENTRY(mlx5_hrxq
) next
; /* Pointer to the next element. */
166 rte_atomic32_t refcnt
; /* Reference counter. */
167 struct mlx5_ind_table_ibv
*ind_table
; /* Indirection table. */
168 struct ibv_qp
*qp
; /* Verbs queue pair. */
169 uint64_t hash_fields
; /* Verbs Hash fields. */
170 uint32_t rss_key_len
; /* Hash key length in bytes. */
171 uint8_t rss_key
[]; /* Hash key. */
174 /* TX queue descriptor. */
176 struct mlx5_txq_data
{
177 uint16_t elts_head
; /* Current counter in (*elts)[]. */
178 uint16_t elts_tail
; /* Counter of first element awaiting completion. */
179 uint16_t elts_comp
; /* Counter since last completion request. */
180 uint16_t mpw_comp
; /* WQ index since last completion request. */
181 uint16_t cq_ci
; /* Consumer index for completion queue. */
183 uint16_t cq_pi
; /* Producer index for completion queue. */
185 uint16_t wqe_ci
; /* Consumer index for work queue. */
186 uint16_t wqe_pi
; /* Producer index for work queue. */
187 uint16_t elts_n
:4; /* (*elts)[] length (in log2). */
188 uint16_t cqe_n
:4; /* Number of CQ elements (in log2). */
189 uint16_t wqe_n
:4; /* Number of of WQ elements (in log2). */
190 uint16_t tso_en
:1; /* When set hardware TSO is enabled. */
191 uint16_t tunnel_en
:1;
192 /* When set TX offload for tunneled packets are supported. */
193 uint16_t swp_en
:1; /* Whether SW parser is enabled. */
194 uint16_t mpw_hdr_dseg
:1; /* Enable DSEGs in the title WQEBB. */
195 uint16_t max_inline
; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
196 uint16_t inline_max_packet_sz
; /* Max packet size for inlining. */
197 uint32_t qp_num_8s
; /* QP number shifted by 8. */
198 uint64_t offloads
; /* Offloads for Tx Queue. */
199 struct mlx5_mr_ctrl mr_ctrl
; /* MR control descriptor. */
200 volatile struct mlx5_cqe (*cqes
)[]; /* Completion queue. */
201 volatile void *wqes
; /* Work queue (use volatile to write into). */
202 volatile uint32_t *qp_db
; /* Work queue doorbell. */
203 volatile uint32_t *cq_db
; /* Completion queue doorbell. */
204 volatile void *bf_reg
; /* Blueflame register remapped. */
205 struct rte_mbuf
*(*elts
)[]; /* TX elements. */
206 struct mlx5_txq_stats stats
; /* TX queue counters. */
208 rte_spinlock_t
*uar_lock
;
209 /* UAR access lock required for 32bit implementations */
211 } __rte_cache_aligned
;
213 /* Verbs Rx queue elements. */
214 struct mlx5_txq_ibv
{
215 LIST_ENTRY(mlx5_txq_ibv
) next
; /* Pointer to the next element. */
216 rte_atomic32_t refcnt
; /* Reference counter. */
217 struct mlx5_txq_ctrl
*txq_ctrl
; /* Pointer to the control queue. */
218 struct ibv_cq
*cq
; /* Completion Queue. */
219 struct ibv_qp
*qp
; /* Queue Pair. */
222 /* TX queue control descriptor. */
223 struct mlx5_txq_ctrl
{
224 LIST_ENTRY(mlx5_txq_ctrl
) next
; /* Pointer to the next element. */
225 rte_atomic32_t refcnt
; /* Reference counter. */
226 unsigned int socket
; /* CPU socket ID for allocations. */
227 unsigned int max_inline_data
; /* Max inline data. */
228 unsigned int max_tso_header
; /* Max TSO header size. */
229 struct mlx5_txq_ibv
*ibv
; /* Verbs queue object. */
230 struct priv
*priv
; /* Back pointer to private data. */
231 struct mlx5_txq_data txq
; /* Data path structure. */
232 off_t uar_mmap_offset
; /* UAR mmap offset for non-primary process. */
233 volatile void *bf_reg_orig
; /* Blueflame register from verbs. */
234 uint16_t idx
; /* Queue index. */
239 extern uint8_t rss_hash_default_key
[];
241 int mlx5_check_mprq_support(struct rte_eth_dev
*dev
);
242 int mlx5_rxq_mprq_enabled(struct mlx5_rxq_data
*rxq
);
243 int mlx5_mprq_enabled(struct rte_eth_dev
*dev
);
244 int mlx5_mprq_free_mp(struct rte_eth_dev
*dev
);
245 int mlx5_mprq_alloc_mp(struct rte_eth_dev
*dev
);
246 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl
*rxq_ctrl
);
247 int mlx5_rx_queue_setup(struct rte_eth_dev
*dev
, uint16_t idx
, uint16_t desc
,
248 unsigned int socket
, const struct rte_eth_rxconf
*conf
,
249 struct rte_mempool
*mp
);
250 void mlx5_rx_queue_release(void *dpdk_rxq
);
251 int mlx5_rx_intr_vec_enable(struct rte_eth_dev
*dev
);
252 void mlx5_rx_intr_vec_disable(struct rte_eth_dev
*dev
);
253 int mlx5_rx_intr_enable(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
);
254 int mlx5_rx_intr_disable(struct rte_eth_dev
*dev
, uint16_t rx_queue_id
);
255 struct mlx5_rxq_ibv
*mlx5_rxq_ibv_new(struct rte_eth_dev
*dev
, uint16_t idx
);
256 struct mlx5_rxq_ibv
*mlx5_rxq_ibv_get(struct rte_eth_dev
*dev
, uint16_t idx
);
257 int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv
*rxq_ibv
);
258 int mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv
*rxq_ibv
);
259 struct mlx5_rxq_ibv
*mlx5_rxq_ibv_drop_new(struct rte_eth_dev
*dev
);
260 void mlx5_rxq_ibv_drop_release(struct rte_eth_dev
*dev
);
261 int mlx5_rxq_ibv_verify(struct rte_eth_dev
*dev
);
262 struct mlx5_rxq_ctrl
*mlx5_rxq_new(struct rte_eth_dev
*dev
, uint16_t idx
,
263 uint16_t desc
, unsigned int socket
,
264 const struct rte_eth_rxconf
*conf
,
265 struct rte_mempool
*mp
);
266 struct mlx5_rxq_ctrl
*mlx5_rxq_get(struct rte_eth_dev
*dev
, uint16_t idx
);
267 int mlx5_rxq_release(struct rte_eth_dev
*dev
, uint16_t idx
);
268 int mlx5_rxq_releasable(struct rte_eth_dev
*dev
, uint16_t idx
);
269 int mlx5_rxq_verify(struct rte_eth_dev
*dev
);
270 int rxq_alloc_elts(struct mlx5_rxq_ctrl
*rxq_ctrl
);
271 int rxq_alloc_mprq_buf(struct mlx5_rxq_ctrl
*rxq_ctrl
);
272 struct mlx5_ind_table_ibv
*mlx5_ind_table_ibv_new(struct rte_eth_dev
*dev
,
273 const uint16_t *queues
,
275 struct mlx5_ind_table_ibv
*mlx5_ind_table_ibv_get(struct rte_eth_dev
*dev
,
276 const uint16_t *queues
,
278 int mlx5_ind_table_ibv_release(struct rte_eth_dev
*dev
,
279 struct mlx5_ind_table_ibv
*ind_tbl
);
280 int mlx5_ind_table_ibv_verify(struct rte_eth_dev
*dev
);
281 struct mlx5_ind_table_ibv
*mlx5_ind_table_ibv_drop_new(struct rte_eth_dev
*dev
);
282 void mlx5_ind_table_ibv_drop_release(struct rte_eth_dev
*dev
);
283 struct mlx5_hrxq
*mlx5_hrxq_new(struct rte_eth_dev
*dev
,
284 const uint8_t *rss_key
, uint32_t rss_key_len
,
285 uint64_t hash_fields
,
286 const uint16_t *queues
, uint32_t queues_n
,
287 int tunnel __rte_unused
);
288 struct mlx5_hrxq
*mlx5_hrxq_get(struct rte_eth_dev
*dev
,
289 const uint8_t *rss_key
, uint32_t rss_key_len
,
290 uint64_t hash_fields
,
291 const uint16_t *queues
, uint32_t queues_n
);
292 int mlx5_hrxq_release(struct rte_eth_dev
*dev
, struct mlx5_hrxq
*hxrq
);
293 int mlx5_hrxq_ibv_verify(struct rte_eth_dev
*dev
);
294 struct mlx5_hrxq
*mlx5_hrxq_drop_new(struct rte_eth_dev
*dev
);
295 void mlx5_hrxq_drop_release(struct rte_eth_dev
*dev
);
296 uint64_t mlx5_get_rx_port_offloads(void);
297 uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev
*dev
);
301 int mlx5_tx_queue_setup(struct rte_eth_dev
*dev
, uint16_t idx
, uint16_t desc
,
302 unsigned int socket
, const struct rte_eth_txconf
*conf
);
303 void mlx5_tx_queue_release(void *dpdk_txq
);
304 int mlx5_tx_uar_remap(struct rte_eth_dev
*dev
, int fd
);
305 struct mlx5_txq_ibv
*mlx5_txq_ibv_new(struct rte_eth_dev
*dev
, uint16_t idx
);
306 struct mlx5_txq_ibv
*mlx5_txq_ibv_get(struct rte_eth_dev
*dev
, uint16_t idx
);
307 int mlx5_txq_ibv_release(struct mlx5_txq_ibv
*txq_ibv
);
308 int mlx5_txq_ibv_releasable(struct mlx5_txq_ibv
*txq_ibv
);
309 int mlx5_txq_ibv_verify(struct rte_eth_dev
*dev
);
310 struct mlx5_txq_ctrl
*mlx5_txq_new(struct rte_eth_dev
*dev
, uint16_t idx
,
311 uint16_t desc
, unsigned int socket
,
312 const struct rte_eth_txconf
*conf
);
313 struct mlx5_txq_ctrl
*mlx5_txq_get(struct rte_eth_dev
*dev
, uint16_t idx
);
314 int mlx5_txq_release(struct rte_eth_dev
*dev
, uint16_t idx
);
315 int mlx5_txq_releasable(struct rte_eth_dev
*dev
, uint16_t idx
);
316 int mlx5_txq_verify(struct rte_eth_dev
*dev
);
317 void txq_alloc_elts(struct mlx5_txq_ctrl
*txq_ctrl
);
318 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev
*dev
);
322 extern uint32_t mlx5_ptype_table
[];
323 extern uint8_t mlx5_cksum_table
[];
324 extern uint8_t mlx5_swp_types_table
[];
326 void mlx5_set_ptype_table(void);
327 void mlx5_set_cksum_table(void);
328 void mlx5_set_swp_types_table(void);
329 uint16_t mlx5_tx_burst(void *dpdk_txq
, struct rte_mbuf
**pkts
,
331 uint16_t mlx5_tx_burst_mpw(void *dpdk_txq
, struct rte_mbuf
**pkts
,
333 uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq
, struct rte_mbuf
**pkts
,
335 uint16_t mlx5_tx_burst_empw(void *dpdk_txq
, struct rte_mbuf
**pkts
,
337 uint16_t mlx5_rx_burst(void *dpdk_rxq
, struct rte_mbuf
**pkts
, uint16_t pkts_n
);
338 void mlx5_mprq_buf_free_cb(void *addr
, void *opaque
);
339 void mlx5_mprq_buf_free(struct mlx5_mprq_buf
*buf
);
340 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq
, struct rte_mbuf
**pkts
,
342 uint16_t removed_tx_burst(void *dpdk_txq
, struct rte_mbuf
**pkts
,
344 uint16_t removed_rx_burst(void *dpdk_rxq
, struct rte_mbuf
**pkts
,
346 int mlx5_rx_descriptor_status(void *rx_queue
, uint16_t offset
);
347 int mlx5_tx_descriptor_status(void *tx_queue
, uint16_t offset
);
349 /* Vectorized version of mlx5_rxtx.c */
350 int mlx5_check_raw_vec_tx_support(struct rte_eth_dev
*dev
);
351 int mlx5_check_vec_tx_support(struct rte_eth_dev
*dev
);
352 int mlx5_rxq_check_vec_support(struct mlx5_rxq_data
*rxq_data
);
353 int mlx5_check_vec_rx_support(struct rte_eth_dev
*dev
);
354 uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq
, struct rte_mbuf
**pkts
,
356 uint16_t mlx5_tx_burst_vec(void *dpdk_txq
, struct rte_mbuf
**pkts
,
358 uint16_t mlx5_rx_burst_vec(void *dpdk_txq
, struct rte_mbuf
**pkts
,
363 void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl
*mr_ctrl
);
364 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data
*rxq
, uintptr_t addr
);
365 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data
*txq
, uintptr_t addr
);
368 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
369 * 64bit architectures.
372 * value to write in CPU endian format.
374 * Address to write to.
376 * Address of the lock to use for that UAR access.
378 static __rte_always_inline
void
379 __mlx5_uar_write64_relaxed(uint64_t val
, volatile void *addr
,
380 rte_spinlock_t
*lock __rte_unused
)
383 rte_write64_relaxed(val
, addr
);
384 #else /* !RTE_ARCH_64 */
385 rte_spinlock_lock(lock
);
386 rte_write32_relaxed(val
, addr
);
388 rte_write32_relaxed(val
>> 32,
389 (volatile void *)((volatile char *)addr
+ 4));
390 rte_spinlock_unlock(lock
);
395 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
396 * 64bit architectures while guaranteeing the order of execution with the
397 * code being executed.
400 * value to write in CPU endian format.
402 * Address to write to.
404 * Address of the lock to use for that UAR access.
406 static __rte_always_inline
void
407 __mlx5_uar_write64(uint64_t val
, volatile void *addr
, rte_spinlock_t
*lock
)
410 __mlx5_uar_write64_relaxed(val
, addr
, lock
);
413 /* Assist macros, used instead of directly calling the functions they wrap. */
415 #define mlx5_uar_write64_relaxed(val, dst, lock) \
416 __mlx5_uar_write64_relaxed(val, dst, NULL)
417 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL)
419 #define mlx5_uar_write64_relaxed(val, dst, lock) \
420 __mlx5_uar_write64_relaxed(val, dst, lock)
421 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock)
426 * Verify or set magic value in CQE.
435 check_cqe_seen(volatile struct mlx5_cqe
*cqe
)
437 static const uint8_t magic
[] = "seen";
438 volatile uint8_t (*buf
)[sizeof(cqe
->rsvd1
)] = &cqe
->rsvd1
;
442 for (i
= 0; i
< sizeof(magic
) && i
< sizeof(*buf
); ++i
)
443 if (!ret
|| (*buf
)[i
] != magic
[i
]) {
445 (*buf
)[i
] = magic
[i
];
452 * Check whether CQE is valid.
457 * Size of completion queue.
462 * 0 on success, 1 on failure.
464 static __rte_always_inline
int
465 check_cqe(volatile struct mlx5_cqe
*cqe
,
466 unsigned int cqes_n
, const uint16_t ci
)
468 uint16_t idx
= ci
& cqes_n
;
469 uint8_t op_own
= cqe
->op_own
;
470 uint8_t op_owner
= MLX5_CQE_OWNER(op_own
);
471 uint8_t op_code
= MLX5_CQE_OPCODE(op_own
);
473 if (unlikely((op_owner
!= (!!(idx
))) || (op_code
== MLX5_CQE_INVALID
)))
474 return 1; /* No CQE. */
476 if ((op_code
== MLX5_CQE_RESP_ERR
) ||
477 (op_code
== MLX5_CQE_REQ_ERR
)) {
478 volatile struct mlx5_err_cqe
*err_cqe
= (volatile void *)cqe
;
479 uint8_t syndrome
= err_cqe
->syndrome
;
481 if ((syndrome
== MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
) ||
482 (syndrome
== MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
))
484 if (!check_cqe_seen(cqe
)) {
486 "unexpected CQE error %u (0x%02x) syndrome"
488 op_code
, op_code
, syndrome
);
489 rte_hexdump(stderr
, "MLX5 Error CQE:",
490 (const void *)((uintptr_t)err_cqe
),
494 } else if ((op_code
!= MLX5_CQE_RESP_SEND
) &&
495 (op_code
!= MLX5_CQE_REQ
)) {
496 if (!check_cqe_seen(cqe
)) {
497 DRV_LOG(ERR
, "unexpected CQE opcode %u (0x%02x)",
499 rte_hexdump(stderr
, "MLX5 CQE:",
500 (const void *)((uintptr_t)cqe
),
510 * Return the address of the WQE.
513 * Pointer to TX queue structure.
515 * WQE consumer index.
520 static inline uintptr_t *
521 tx_mlx5_wqe(struct mlx5_txq_data
*txq
, uint16_t ci
)
523 ci
&= ((1 << txq
->wqe_n
) - 1);
524 return (uintptr_t *)((uintptr_t)txq
->wqes
+ ci
* MLX5_WQE_SIZE
);
528 * Manage TX completions.
530 * When sending a burst, mlx5_tx_burst() posts several WRs.
533 * Pointer to TX queue structure.
535 static __rte_always_inline
void
536 mlx5_tx_complete(struct mlx5_txq_data
*txq
)
538 const uint16_t elts_n
= 1 << txq
->elts_n
;
539 const uint16_t elts_m
= elts_n
- 1;
540 const unsigned int cqe_n
= 1 << txq
->cqe_n
;
541 const unsigned int cqe_cnt
= cqe_n
- 1;
542 uint16_t elts_free
= txq
->elts_tail
;
544 uint16_t cq_ci
= txq
->cq_ci
;
545 volatile struct mlx5_cqe
*cqe
= NULL
;
546 volatile struct mlx5_wqe_ctrl
*ctrl
;
547 struct rte_mbuf
*m
, *free
[elts_n
];
548 struct rte_mempool
*pool
= NULL
;
549 unsigned int blk_n
= 0;
551 cqe
= &(*txq
->cqes
)[cq_ci
& cqe_cnt
];
552 if (unlikely(check_cqe(cqe
, cqe_n
, cq_ci
)))
555 if ((MLX5_CQE_OPCODE(cqe
->op_own
) == MLX5_CQE_RESP_ERR
) ||
556 (MLX5_CQE_OPCODE(cqe
->op_own
) == MLX5_CQE_REQ_ERR
)) {
557 if (!check_cqe_seen(cqe
)) {
558 DRV_LOG(ERR
, "unexpected error CQE, Tx stopped");
559 rte_hexdump(stderr
, "MLX5 TXQ:",
560 (const void *)((uintptr_t)txq
->wqes
),
568 txq
->wqe_pi
= rte_be_to_cpu_16(cqe
->wqe_counter
);
569 ctrl
= (volatile struct mlx5_wqe_ctrl
*)
570 tx_mlx5_wqe(txq
, txq
->wqe_pi
);
571 elts_tail
= ctrl
->ctrl3
;
572 assert((elts_tail
& elts_m
) < (1 << txq
->wqe_n
));
574 while (elts_free
!= elts_tail
) {
575 m
= rte_pktmbuf_prefree_seg((*txq
->elts
)[elts_free
++ & elts_m
]);
576 if (likely(m
!= NULL
)) {
577 if (likely(m
->pool
== pool
)) {
580 if (likely(pool
!= NULL
))
581 rte_mempool_put_bulk(pool
,
591 rte_mempool_put_bulk(pool
, (void *)free
, blk_n
);
593 elts_free
= txq
->elts_tail
;
595 while (elts_free
!= elts_tail
) {
596 memset(&(*txq
->elts
)[elts_free
& elts_m
],
598 sizeof((*txq
->elts
)[elts_free
& elts_m
]));
603 txq
->elts_tail
= elts_tail
;
604 /* Update the consumer index. */
605 rte_compiler_barrier();
606 *txq
->cq_db
= rte_cpu_to_be_32(cq_ci
);
610 * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
611 * as mempool is pre-configured and static.
614 * Pointer to Rx queue structure.
619 * Searched LKey on success, UINT32_MAX on no match.
621 static __rte_always_inline
uint32_t
622 mlx5_rx_addr2mr(struct mlx5_rxq_data
*rxq
, uintptr_t addr
)
624 struct mlx5_mr_ctrl
*mr_ctrl
= &rxq
->mr_ctrl
;
627 /* Linear search on MR cache array. */
628 lkey
= mlx5_mr_lookup_cache(mr_ctrl
->cache
, &mr_ctrl
->mru
,
629 MLX5_MR_CACHE_N
, addr
);
630 if (likely(lkey
!= UINT32_MAX
))
632 /* Take slower bottom-half (Binary Search) on miss. */
633 return mlx5_rx_addr2mr_bh(rxq
, addr
);
636 #define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
639 * Query LKey from a packet buffer for Tx. If not found, add the mempool.
642 * Pointer to Tx queue structure.
647 * Searched LKey on success, UINT32_MAX on no match.
649 static __rte_always_inline
uint32_t
650 mlx5_tx_addr2mr(struct mlx5_txq_data
*txq
, uintptr_t addr
)
652 struct mlx5_mr_ctrl
*mr_ctrl
= &txq
->mr_ctrl
;
655 /* Check generation bit to see if there's any change on existing MRs. */
656 if (unlikely(*mr_ctrl
->dev_gen_ptr
!= mr_ctrl
->cur_gen
))
657 mlx5_mr_flush_local_cache(mr_ctrl
);
658 /* Linear search on MR cache array. */
659 lkey
= mlx5_mr_lookup_cache(mr_ctrl
->cache
, &mr_ctrl
->mru
,
660 MLX5_MR_CACHE_N
, addr
);
661 if (likely(lkey
!= UINT32_MAX
))
663 /* Take slower bottom-half (binary search) on miss. */
664 return mlx5_tx_addr2mr_bh(txq
, addr
);
667 #define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
670 * Ring TX queue doorbell and flush the update if requested.
673 * Pointer to TX queue structure.
675 * Pointer to the last WQE posted in the NIC.
677 * Request for write memory barrier after BlueFlame update.
679 static __rte_always_inline
void
680 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data
*txq
, volatile struct mlx5_wqe
*wqe
,
683 uint64_t *dst
= (uint64_t *)((uintptr_t)txq
->bf_reg
);
684 volatile uint64_t *src
= ((volatile uint64_t *)wqe
);
687 *txq
->qp_db
= rte_cpu_to_be_32(txq
->wqe_ci
);
688 /* Ensure ordering between DB record and BF copy. */
690 mlx5_uar_write64_relaxed(*src
, dst
, txq
->uar_lock
);
696 * Ring TX queue doorbell and flush the update by write memory barrier.
699 * Pointer to TX queue structure.
701 * Pointer to the last WQE posted in the NIC.
703 static __rte_always_inline
void
704 mlx5_tx_dbrec(struct mlx5_txq_data
*txq
, volatile struct mlx5_wqe
*wqe
)
706 mlx5_tx_dbrec_cond_wmb(txq
, wqe
, 1);
710 * Convert mbuf to Verb SWP.
713 * Pointer to the Tx queue.
715 * Pointer to the mbuf.
717 * TSO offloads enabled.
719 * VLAN offloads enabled
721 * Pointer to the SWP header offsets.
723 * Pointer to the SWP header types.
725 static __rte_always_inline
void
726 txq_mbuf_to_swp(struct mlx5_txq_data
*txq
, struct rte_mbuf
*buf
,
727 uint8_t *offsets
, uint8_t *swp_types
)
729 const uint64_t vlan
= buf
->ol_flags
& PKT_TX_VLAN_PKT
;
730 const uint64_t tunnel
= buf
->ol_flags
& PKT_TX_TUNNEL_MASK
;
731 const uint64_t tso
= buf
->ol_flags
& PKT_TX_TCP_SEG
;
732 const uint64_t csum_flags
= buf
->ol_flags
& PKT_TX_L4_MASK
;
733 const uint64_t inner_ip
=
734 buf
->ol_flags
& (PKT_TX_IPV4
| PKT_TX_IPV6
);
735 const uint64_t ol_flags_mask
= PKT_TX_L4_MASK
| PKT_TX_IPV6
|
740 if (likely(!txq
->swp_en
|| (tunnel
!= PKT_TX_TUNNEL_UDP
&&
741 tunnel
!= PKT_TX_TUNNEL_IP
)))
744 * The index should have:
745 * bit[0:1] = PKT_TX_L4_MASK
746 * bit[4] = PKT_TX_IPV6
747 * bit[8] = PKT_TX_OUTER_IPV6
748 * bit[9] = PKT_TX_OUTER_UDP
750 idx
= (buf
->ol_flags
& ol_flags_mask
) >> 52;
751 if (tunnel
== PKT_TX_TUNNEL_UDP
)
753 *swp_types
= mlx5_swp_types_table
[idx
];
755 * Set offsets for SW parser. Since ConnectX-5, SW parser just
756 * complements HW parser. SW parser starts to engage only if HW parser
757 * can't reach a header. For the older devices, HW parser will not kick
758 * in if any of SWP offsets is set. Therefore, all of the L3 offsets
759 * should be set regardless of HW offload.
761 off
= buf
->outer_l2_len
+ (vlan
? sizeof(struct vlan_hdr
) : 0);
762 offsets
[1] = off
>> 1; /* Outer L3 offset. */
763 off
+= buf
->outer_l3_len
;
764 if (tunnel
== PKT_TX_TUNNEL_UDP
)
765 offsets
[0] = off
>> 1; /* Outer L4 offset. */
768 offsets
[3] = off
>> 1; /* Inner L3 offset. */
769 if (csum_flags
== PKT_TX_TCP_CKSUM
|| tso
||
770 csum_flags
== PKT_TX_UDP_CKSUM
) {
772 offsets
[2] = off
>> 1; /* Inner L4 offset. */
778 * Convert the Checksum offloads to Verbs.
781 * Pointer to the mbuf.
784 * Converted checksum flags.
786 static __rte_always_inline
uint8_t
787 txq_ol_cksum_to_cs(struct rte_mbuf
*buf
)
790 uint8_t is_tunnel
= !!(buf
->ol_flags
& PKT_TX_TUNNEL_MASK
);
791 const uint64_t ol_flags_mask
= PKT_TX_TCP_SEG
| PKT_TX_L4_MASK
|
792 PKT_TX_IP_CKSUM
| PKT_TX_OUTER_IP_CKSUM
;
795 * The index should have:
796 * bit[0] = PKT_TX_TCP_SEG
797 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM
798 * bit[4] = PKT_TX_IP_CKSUM
799 * bit[8] = PKT_TX_OUTER_IP_CKSUM
802 idx
= ((buf
->ol_flags
& ol_flags_mask
) >> 50) | (!!is_tunnel
<< 9);
803 return mlx5_cksum_table
[idx
];
807 * Count the number of contiguous single segment packets.
810 * Pointer to array of packets.
815 * Number of contiguous single segment packets.
817 static __rte_always_inline
unsigned int
818 txq_count_contig_single_seg(struct rte_mbuf
**pkts
, uint16_t pkts_n
)
824 /* Count the number of contiguous single segment packets. */
825 for (pos
= 0; pos
< pkts_n
; ++pos
)
826 if (NB_SEGS(pkts
[pos
]) > 1)
832 * Count the number of contiguous multi-segment packets.
835 * Pointer to array of packets.
840 * Number of contiguous multi-segment packets.
842 static __rte_always_inline
unsigned int
843 txq_count_contig_multi_seg(struct rte_mbuf
**pkts
, uint16_t pkts_n
)
849 /* Count the number of contiguous multi-segment packets. */
850 for (pos
= 0; pos
< pkts_n
; ++pos
)
851 if (NB_SEGS(pkts
[pos
]) == 1)
856 #endif /* RTE_PMD_MLX5_RXTX_H_ */