1 /* SPDX-License-Identifier: BSD-3-Clause
3 * Copyright (c) 2016-2018 Solarflare Communications Inc.
6 * This software was jointly developed between OKTET Labs (under contract
7 * for Solarflare) and Solarflare Communications, Inc.
11 #include "sfc_debug.h"
15 #include "sfc_tweak.h"
16 #include "sfc_kvargs.h"
19 * Maximum number of TX queue flush attempts in case of
20 * failure or flush timeout
22 #define SFC_TX_QFLUSH_ATTEMPTS (3)
25 * Time to wait between event queue polling attempts when waiting for TX
26 * queue flush done or flush failed events
28 #define SFC_TX_QFLUSH_POLL_WAIT_MS (1)
31 * Maximum number of event queue polling attempts when waiting for TX queue
32 * flush done or flush failed events; it defines TX queue flush attempt timeout
33 * together with SFC_TX_QFLUSH_POLL_WAIT_MS
35 #define SFC_TX_QFLUSH_POLL_ATTEMPTS (2000)
38 sfc_tx_get_dev_offload_caps(struct sfc_adapter
*sa
)
40 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(sa
->nic
);
43 if ((sa
->priv
.dp_tx
->features
& SFC_DP_TX_FEAT_VLAN_INSERT
) &&
44 encp
->enc_hw_tx_insert_vlan_enabled
)
45 caps
|= DEV_TX_OFFLOAD_VLAN_INSERT
;
47 if (sa
->priv
.dp_tx
->features
& SFC_DP_TX_FEAT_MULTI_SEG
)
48 caps
|= DEV_TX_OFFLOAD_MULTI_SEGS
;
50 if ((~sa
->priv
.dp_tx
->features
& SFC_DP_TX_FEAT_MULTI_POOL
) &&
51 (~sa
->priv
.dp_tx
->features
& SFC_DP_TX_FEAT_REFCNT
))
52 caps
|= DEV_TX_OFFLOAD_MBUF_FAST_FREE
;
58 sfc_tx_get_queue_offload_caps(struct sfc_adapter
*sa
)
60 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(sa
->nic
);
63 caps
|= DEV_TX_OFFLOAD_IPV4_CKSUM
;
64 caps
|= DEV_TX_OFFLOAD_UDP_CKSUM
;
65 caps
|= DEV_TX_OFFLOAD_TCP_CKSUM
;
67 if (encp
->enc_tunnel_encapsulations_supported
)
68 caps
|= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
;
71 caps
|= DEV_TX_OFFLOAD_TCP_TSO
;
74 caps
|= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO
|
75 DEV_TX_OFFLOAD_GENEVE_TNL_TSO
);
81 sfc_tx_qcheck_conf(struct sfc_adapter
*sa
, unsigned int txq_max_fill_level
,
82 const struct rte_eth_txconf
*tx_conf
,
87 if (tx_conf
->tx_rs_thresh
!= 0) {
88 sfc_err(sa
, "RS bit in transmit descriptor is not supported");
92 if (tx_conf
->tx_free_thresh
> txq_max_fill_level
) {
94 "TxQ free threshold too large: %u vs maximum %u",
95 tx_conf
->tx_free_thresh
, txq_max_fill_level
);
99 if (tx_conf
->tx_thresh
.pthresh
!= 0 ||
100 tx_conf
->tx_thresh
.hthresh
!= 0 ||
101 tx_conf
->tx_thresh
.wthresh
!= 0) {
103 "prefetch/host/writeback thresholds are not supported");
106 /* We either perform both TCP and UDP offload, or no offload at all */
107 if (((offloads
& DEV_TX_OFFLOAD_TCP_CKSUM
) == 0) !=
108 ((offloads
& DEV_TX_OFFLOAD_UDP_CKSUM
) == 0)) {
109 sfc_err(sa
, "TCP and UDP offloads can't be set independently");
117 sfc_tx_qflush_done(struct sfc_txq_info
*txq_info
)
119 txq_info
->state
|= SFC_TXQ_FLUSHED
;
120 txq_info
->state
&= ~SFC_TXQ_FLUSHING
;
124 sfc_tx_qinit(struct sfc_adapter
*sa
, unsigned int sw_index
,
125 uint16_t nb_tx_desc
, unsigned int socket_id
,
126 const struct rte_eth_txconf
*tx_conf
)
128 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(sa
->nic
);
129 unsigned int txq_entries
;
130 unsigned int evq_entries
;
131 unsigned int txq_max_fill_level
;
132 struct sfc_txq_info
*txq_info
;
136 struct sfc_dp_tx_qcreate_info info
;
138 struct sfc_dp_tx_hw_limits hw_limits
;
140 sfc_log_init(sa
, "TxQ = %u", sw_index
);
142 memset(&hw_limits
, 0, sizeof(hw_limits
));
143 hw_limits
.txq_max_entries
= sa
->txq_max_entries
;
144 hw_limits
.txq_min_entries
= sa
->txq_min_entries
;
146 rc
= sa
->priv
.dp_tx
->qsize_up_rings(nb_tx_desc
, &hw_limits
,
147 &txq_entries
, &evq_entries
,
148 &txq_max_fill_level
);
150 goto fail_size_up_rings
;
151 SFC_ASSERT(txq_entries
>= sa
->txq_min_entries
);
152 SFC_ASSERT(txq_entries
<= sa
->txq_max_entries
);
153 SFC_ASSERT(txq_entries
>= nb_tx_desc
);
154 SFC_ASSERT(txq_max_fill_level
<= nb_tx_desc
);
156 offloads
= tx_conf
->offloads
|
157 sa
->eth_dev
->data
->dev_conf
.txmode
.offloads
;
158 rc
= sfc_tx_qcheck_conf(sa
, txq_max_fill_level
, tx_conf
, offloads
);
162 SFC_ASSERT(sw_index
< sfc_sa2shared(sa
)->txq_count
);
163 txq_info
= &sfc_sa2shared(sa
)->txq_info
[sw_index
];
165 txq_info
->entries
= txq_entries
;
167 rc
= sfc_ev_qinit(sa
, SFC_EVQ_TYPE_TX
, sw_index
,
168 evq_entries
, socket_id
, &evq
);
172 txq
= &sa
->txq_ctrl
[sw_index
];
173 txq
->hw_index
= sw_index
;
175 txq_info
->free_thresh
=
176 (tx_conf
->tx_free_thresh
) ? tx_conf
->tx_free_thresh
:
177 SFC_TX_DEFAULT_FREE_THRESH
;
178 txq_info
->offloads
= offloads
;
180 rc
= sfc_dma_alloc(sa
, "txq", sw_index
,
181 efx_txq_size(sa
->nic
, txq_info
->entries
),
182 socket_id
, &txq
->mem
);
186 memset(&info
, 0, sizeof(info
));
187 info
.max_fill_level
= txq_max_fill_level
;
188 info
.free_thresh
= txq_info
->free_thresh
;
189 info
.offloads
= offloads
;
190 info
.txq_entries
= txq_info
->entries
;
191 info
.dma_desc_size_max
= encp
->enc_tx_dma_desc_size_max
;
192 info
.txq_hw_ring
= txq
->mem
.esm_base
;
193 info
.evq_entries
= evq_entries
;
194 info
.evq_hw_ring
= evq
->mem
.esm_base
;
195 info
.hw_index
= txq
->hw_index
;
196 info
.mem_bar
= sa
->mem_bar
.esb_base
;
197 info
.vi_window_shift
= encp
->enc_vi_window_shift
;
198 info
.tso_tcp_header_offset_limit
=
199 encp
->enc_tx_tso_tcp_header_offset_limit
;
201 rc
= sa
->priv
.dp_tx
->qcreate(sa
->eth_dev
->data
->port_id
, sw_index
,
202 &RTE_ETH_DEV_TO_PCI(sa
->eth_dev
)->addr
,
203 socket_id
, &info
, &txq_info
->dp
);
205 goto fail_dp_tx_qinit
;
207 evq
->dp_txq
= txq_info
->dp
;
209 txq_info
->state
= SFC_TXQ_INITIALIZED
;
211 txq_info
->deferred_start
= (tx_conf
->tx_deferred_start
!= 0);
216 sfc_dma_free(sa
, &txq
->mem
);
222 txq_info
->entries
= 0;
226 sfc_log_init(sa
, "failed (TxQ = %u, rc = %d)", sw_index
, rc
);
231 sfc_tx_qfini(struct sfc_adapter
*sa
, unsigned int sw_index
)
233 struct sfc_txq_info
*txq_info
;
236 sfc_log_init(sa
, "TxQ = %u", sw_index
);
238 SFC_ASSERT(sw_index
< sfc_sa2shared(sa
)->txq_count
);
239 sa
->eth_dev
->data
->tx_queues
[sw_index
] = NULL
;
241 txq_info
= &sfc_sa2shared(sa
)->txq_info
[sw_index
];
243 SFC_ASSERT(txq_info
->state
== SFC_TXQ_INITIALIZED
);
245 sa
->priv
.dp_tx
->qdestroy(txq_info
->dp
);
248 txq_info
->state
&= ~SFC_TXQ_INITIALIZED
;
249 txq_info
->entries
= 0;
251 txq
= &sa
->txq_ctrl
[sw_index
];
253 sfc_dma_free(sa
, &txq
->mem
);
255 sfc_ev_qfini(txq
->evq
);
260 sfc_tx_qinit_info(struct sfc_adapter
*sa
, unsigned int sw_index
)
262 sfc_log_init(sa
, "TxQ = %u", sw_index
);
268 sfc_tx_check_mode(struct sfc_adapter
*sa
, const struct rte_eth_txmode
*txmode
)
272 switch (txmode
->mq_mode
) {
276 sfc_err(sa
, "Tx multi-queue mode %u not supported",
282 * These features are claimed to be i40e-specific,
283 * but it does make sense to double-check their absence
285 if (txmode
->hw_vlan_reject_tagged
) {
286 sfc_err(sa
, "Rejecting tagged packets not supported");
290 if (txmode
->hw_vlan_reject_untagged
) {
291 sfc_err(sa
, "Rejecting untagged packets not supported");
295 if (txmode
->hw_vlan_insert_pvid
) {
296 sfc_err(sa
, "Port-based VLAN insertion not supported");
304 * Destroy excess queues that are no longer needed after reconfiguration
308 sfc_tx_fini_queues(struct sfc_adapter
*sa
, unsigned int nb_tx_queues
)
310 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
313 SFC_ASSERT(nb_tx_queues
<= sas
->txq_count
);
315 sw_index
= sas
->txq_count
;
316 while (--sw_index
>= (int)nb_tx_queues
) {
317 if (sas
->txq_info
[sw_index
].state
& SFC_TXQ_INITIALIZED
)
318 sfc_tx_qfini(sa
, sw_index
);
321 sas
->txq_count
= nb_tx_queues
;
325 sfc_tx_configure(struct sfc_adapter
*sa
)
327 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
328 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(sa
->nic
);
329 const struct rte_eth_conf
*dev_conf
= &sa
->eth_dev
->data
->dev_conf
;
330 const unsigned int nb_tx_queues
= sa
->eth_dev
->data
->nb_tx_queues
;
333 sfc_log_init(sa
, "nb_tx_queues=%u (old %u)",
334 nb_tx_queues
, sas
->txq_count
);
337 * The datapath implementation assumes absence of boundary
338 * limits on Tx DMA descriptors. Addition of these checks on
339 * datapath would simply make the datapath slower.
341 if (encp
->enc_tx_dma_desc_boundary
!= 0) {
343 goto fail_tx_dma_desc_boundary
;
346 rc
= sfc_tx_check_mode(sa
, &dev_conf
->txmode
);
348 goto fail_check_mode
;
350 if (nb_tx_queues
== sas
->txq_count
)
353 if (sas
->txq_info
== NULL
) {
354 sas
->txq_info
= rte_calloc_socket("sfc-txqs", nb_tx_queues
,
355 sizeof(sas
->txq_info
[0]), 0,
357 if (sas
->txq_info
== NULL
)
358 goto fail_txqs_alloc
;
361 * Allocate primary process only TxQ control from heap
362 * since it should not be shared.
365 sa
->txq_ctrl
= calloc(nb_tx_queues
, sizeof(sa
->txq_ctrl
[0]));
366 if (sa
->txq_ctrl
== NULL
)
367 goto fail_txqs_ctrl_alloc
;
369 struct sfc_txq_info
*new_txq_info
;
370 struct sfc_txq
*new_txq_ctrl
;
372 if (nb_tx_queues
< sas
->txq_count
)
373 sfc_tx_fini_queues(sa
, nb_tx_queues
);
376 rte_realloc(sas
->txq_info
,
377 nb_tx_queues
* sizeof(sas
->txq_info
[0]), 0);
378 if (new_txq_info
== NULL
&& nb_tx_queues
> 0)
379 goto fail_txqs_realloc
;
381 new_txq_ctrl
= realloc(sa
->txq_ctrl
,
382 nb_tx_queues
* sizeof(sa
->txq_ctrl
[0]));
383 if (new_txq_ctrl
== NULL
&& nb_tx_queues
> 0)
384 goto fail_txqs_ctrl_realloc
;
386 sas
->txq_info
= new_txq_info
;
387 sa
->txq_ctrl
= new_txq_ctrl
;
388 if (nb_tx_queues
> sas
->txq_count
) {
389 memset(&sas
->txq_info
[sas
->txq_count
], 0,
390 (nb_tx_queues
- sas
->txq_count
) *
391 sizeof(sas
->txq_info
[0]));
392 memset(&sa
->txq_ctrl
[sas
->txq_count
], 0,
393 (nb_tx_queues
- sas
->txq_count
) *
394 sizeof(sa
->txq_ctrl
[0]));
398 while (sas
->txq_count
< nb_tx_queues
) {
399 rc
= sfc_tx_qinit_info(sa
, sas
->txq_count
);
401 goto fail_tx_qinit_info
;
410 fail_txqs_ctrl_realloc
:
412 fail_txqs_ctrl_alloc
:
417 fail_tx_dma_desc_boundary
:
418 sfc_log_init(sa
, "failed (rc = %d)", rc
);
423 sfc_tx_close(struct sfc_adapter
*sa
)
425 sfc_tx_fini_queues(sa
, 0);
430 rte_free(sfc_sa2shared(sa
)->txq_info
);
431 sfc_sa2shared(sa
)->txq_info
= NULL
;
435 sfc_tx_qstart(struct sfc_adapter
*sa
, unsigned int sw_index
)
437 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
438 uint64_t offloads_supported
= sfc_tx_get_dev_offload_caps(sa
) |
439 sfc_tx_get_queue_offload_caps(sa
);
440 struct rte_eth_dev_data
*dev_data
;
441 struct sfc_txq_info
*txq_info
;
445 unsigned int desc_index
;
448 sfc_log_init(sa
, "TxQ = %u", sw_index
);
450 SFC_ASSERT(sw_index
< sas
->txq_count
);
451 txq_info
= &sas
->txq_info
[sw_index
];
453 SFC_ASSERT(txq_info
->state
== SFC_TXQ_INITIALIZED
);
455 txq
= &sa
->txq_ctrl
[sw_index
];
458 rc
= sfc_ev_qstart(evq
, sfc_evq_index_by_txq_sw_index(sa
, sw_index
));
462 if (txq_info
->offloads
& DEV_TX_OFFLOAD_IPV4_CKSUM
)
463 flags
|= EFX_TXQ_CKSUM_IPV4
;
465 if (txq_info
->offloads
& DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
)
466 flags
|= EFX_TXQ_CKSUM_INNER_IPV4
;
468 if ((txq_info
->offloads
& DEV_TX_OFFLOAD_TCP_CKSUM
) ||
469 (txq_info
->offloads
& DEV_TX_OFFLOAD_UDP_CKSUM
)) {
470 flags
|= EFX_TXQ_CKSUM_TCPUDP
;
472 if (offloads_supported
& DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
)
473 flags
|= EFX_TXQ_CKSUM_INNER_TCPUDP
;
476 if (txq_info
->offloads
& (DEV_TX_OFFLOAD_TCP_TSO
|
477 DEV_TX_OFFLOAD_VXLAN_TNL_TSO
|
478 DEV_TX_OFFLOAD_GENEVE_TNL_TSO
))
479 flags
|= EFX_TXQ_FATSOV2
;
481 rc
= efx_tx_qcreate(sa
->nic
, txq
->hw_index
, 0, &txq
->mem
,
482 txq_info
->entries
, 0 /* not used on EF10 */,
484 &txq
->common
, &desc_index
);
486 if (sa
->tso
&& (rc
== ENOSPC
))
487 sfc_err(sa
, "ran out of TSO contexts");
489 goto fail_tx_qcreate
;
492 efx_tx_qenable(txq
->common
);
494 txq_info
->state
|= SFC_TXQ_STARTED
;
496 rc
= sa
->priv
.dp_tx
->qstart(txq_info
->dp
, evq
->read_ptr
, desc_index
);
501 * It seems to be used by DPDK for debug purposes only ('rte_ether')
503 dev_data
= sa
->eth_dev
->data
;
504 dev_data
->tx_queue_state
[sw_index
] = RTE_ETH_QUEUE_STATE_STARTED
;
509 txq_info
->state
= SFC_TXQ_INITIALIZED
;
510 efx_tx_qdestroy(txq
->common
);
520 sfc_tx_qstop(struct sfc_adapter
*sa
, unsigned int sw_index
)
522 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
523 struct rte_eth_dev_data
*dev_data
;
524 struct sfc_txq_info
*txq_info
;
526 unsigned int retry_count
;
527 unsigned int wait_count
;
530 sfc_log_init(sa
, "TxQ = %u", sw_index
);
532 SFC_ASSERT(sw_index
< sas
->txq_count
);
533 txq_info
= &sas
->txq_info
[sw_index
];
535 if (txq_info
->state
== SFC_TXQ_INITIALIZED
)
538 SFC_ASSERT(txq_info
->state
& SFC_TXQ_STARTED
);
540 txq
= &sa
->txq_ctrl
[sw_index
];
541 sa
->priv
.dp_tx
->qstop(txq_info
->dp
, &txq
->evq
->read_ptr
);
544 * Retry TX queue flushing in case of flush failed or
545 * timeout; in the worst case it can delay for 6 seconds
547 for (retry_count
= 0;
548 ((txq_info
->state
& SFC_TXQ_FLUSHED
) == 0) &&
549 (retry_count
< SFC_TX_QFLUSH_ATTEMPTS
);
551 rc
= efx_tx_qflush(txq
->common
);
553 txq_info
->state
|= (rc
== EALREADY
) ?
554 SFC_TXQ_FLUSHED
: SFC_TXQ_FLUSH_FAILED
;
559 * Wait for TX queue flush done or flush failed event at least
560 * SFC_TX_QFLUSH_POLL_WAIT_MS milliseconds and not more
561 * than 2 seconds (SFC_TX_QFLUSH_POLL_WAIT_MS multiplied
562 * by SFC_TX_QFLUSH_POLL_ATTEMPTS)
566 rte_delay_ms(SFC_TX_QFLUSH_POLL_WAIT_MS
);
567 sfc_ev_qpoll(txq
->evq
);
568 } while ((txq_info
->state
& SFC_TXQ_FLUSHING
) &&
569 wait_count
++ < SFC_TX_QFLUSH_POLL_ATTEMPTS
);
571 if (txq_info
->state
& SFC_TXQ_FLUSHING
)
572 sfc_err(sa
, "TxQ %u flush timed out", sw_index
);
574 if (txq_info
->state
& SFC_TXQ_FLUSHED
)
575 sfc_notice(sa
, "TxQ %u flushed", sw_index
);
578 sa
->priv
.dp_tx
->qreap(txq_info
->dp
);
580 txq_info
->state
= SFC_TXQ_INITIALIZED
;
582 efx_tx_qdestroy(txq
->common
);
584 sfc_ev_qstop(txq
->evq
);
587 * It seems to be used by DPDK for debug purposes only ('rte_ether')
589 dev_data
= sa
->eth_dev
->data
;
590 dev_data
->tx_queue_state
[sw_index
] = RTE_ETH_QUEUE_STATE_STOPPED
;
594 sfc_tx_start(struct sfc_adapter
*sa
)
596 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
597 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(sa
->nic
);
598 unsigned int sw_index
;
601 sfc_log_init(sa
, "txq_count = %u", sas
->txq_count
);
604 if (!encp
->enc_fw_assisted_tso_v2_enabled
) {
605 sfc_warn(sa
, "TSO support was unable to be restored");
607 sa
->tso_encap
= B_FALSE
;
611 if (sa
->tso_encap
&& !encp
->enc_fw_assisted_tso_v2_encap_enabled
) {
612 sfc_warn(sa
, "Encapsulated TSO support was unable to be restored");
613 sa
->tso_encap
= B_FALSE
;
616 rc
= efx_tx_init(sa
->nic
);
618 goto fail_efx_tx_init
;
620 for (sw_index
= 0; sw_index
< sas
->txq_count
; ++sw_index
) {
621 if (sas
->txq_info
[sw_index
].state
== SFC_TXQ_INITIALIZED
&&
622 (!(sas
->txq_info
[sw_index
].deferred_start
) ||
623 sas
->txq_info
[sw_index
].deferred_started
)) {
624 rc
= sfc_tx_qstart(sa
, sw_index
);
633 while (sw_index
-- > 0)
634 sfc_tx_qstop(sa
, sw_index
);
636 efx_tx_fini(sa
->nic
);
639 sfc_log_init(sa
, "failed (rc = %d)", rc
);
644 sfc_tx_stop(struct sfc_adapter
*sa
)
646 struct sfc_adapter_shared
* const sas
= sfc_sa2shared(sa
);
647 unsigned int sw_index
;
649 sfc_log_init(sa
, "txq_count = %u", sas
->txq_count
);
651 sw_index
= sas
->txq_count
;
652 while (sw_index
-- > 0) {
653 if (sas
->txq_info
[sw_index
].state
& SFC_TXQ_STARTED
)
654 sfc_tx_qstop(sa
, sw_index
);
657 efx_tx_fini(sa
->nic
);
661 sfc_efx_tx_reap(struct sfc_efx_txq
*txq
)
663 unsigned int completed
;
665 sfc_ev_qpoll(txq
->evq
);
667 for (completed
= txq
->completed
;
668 completed
!= txq
->pending
; completed
++) {
669 struct sfc_efx_tx_sw_desc
*txd
;
671 txd
= &txq
->sw_ring
[completed
& txq
->ptr_mask
];
673 if (txd
->mbuf
!= NULL
) {
674 rte_pktmbuf_free(txd
->mbuf
);
679 txq
->completed
= completed
;
683 * The function is used to insert or update VLAN tag;
684 * the firmware has state of the firmware tag to insert per TxQ
685 * (controlled by option descriptors), hence, if the tag of the
686 * packet to be sent is different from one remembered by the firmware,
687 * the function will update it
690 sfc_efx_tx_maybe_insert_tag(struct sfc_efx_txq
*txq
, struct rte_mbuf
*m
,
693 uint16_t this_tag
= ((m
->ol_flags
& PKT_TX_VLAN_PKT
) ?
696 if (this_tag
== txq
->hw_vlan_tci
)
700 * The expression inside SFC_ASSERT() is not desired to be checked in
701 * a non-debug build because it might be too expensive on the data path
703 SFC_ASSERT(efx_nic_cfg_get(txq
->evq
->sa
->nic
)->enc_hw_tx_insert_vlan_enabled
);
705 efx_tx_qdesc_vlantci_create(txq
->common
, rte_cpu_to_be_16(this_tag
),
708 txq
->hw_vlan_tci
= this_tag
;
714 sfc_efx_prepare_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
,
717 struct sfc_dp_txq
*dp_txq
= tx_queue
;
718 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
719 const efx_nic_cfg_t
*encp
= efx_nic_cfg_get(txq
->evq
->sa
->nic
);
722 for (i
= 0; i
< nb_pkts
; i
++) {
726 * EFX Tx datapath may require extra VLAN descriptor if VLAN
727 * insertion offload is requested regardless the offload
728 * requested/supported.
730 ret
= sfc_dp_tx_prepare_pkt(tx_pkts
[i
],
731 encp
->enc_tx_tso_tcp_header_offset_limit
,
732 txq
->max_fill_level
, EFX_TX_FATSOV2_OPT_NDESCS
,
734 if (unlikely(ret
!= 0)) {
744 sfc_efx_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
, uint16_t nb_pkts
)
746 struct sfc_dp_txq
*dp_txq
= (struct sfc_dp_txq
*)tx_queue
;
747 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
748 unsigned int added
= txq
->added
;
749 unsigned int pushed
= added
;
750 unsigned int pkts_sent
= 0;
751 efx_desc_t
*pend
= &txq
->pend_desc
[0];
752 const unsigned int hard_max_fill
= txq
->max_fill_level
;
753 const unsigned int soft_max_fill
= hard_max_fill
- txq
->free_thresh
;
754 unsigned int fill_level
= added
- txq
->completed
;
757 struct rte_mbuf
**pktp
;
759 if (unlikely((txq
->flags
& SFC_EFX_TXQ_FLAG_RUNNING
) == 0))
763 * If insufficient space for a single packet is present,
764 * we should reap; otherwise, we shouldn't do that all the time
765 * to avoid latency increase
767 reap_done
= (fill_level
> soft_max_fill
);
770 sfc_efx_tx_reap(txq
);
772 * Recalculate fill level since 'txq->completed'
773 * might have changed on reap
775 fill_level
= added
- txq
->completed
;
778 for (pkts_sent
= 0, pktp
= &tx_pkts
[0];
779 (pkts_sent
< nb_pkts
) && (fill_level
<= soft_max_fill
);
780 pkts_sent
++, pktp
++) {
781 uint16_t hw_vlan_tci_prev
= txq
->hw_vlan_tci
;
782 struct rte_mbuf
*m_seg
= *pktp
;
783 size_t pkt_len
= m_seg
->pkt_len
;
784 unsigned int pkt_descs
= 0;
788 * Here VLAN TCI is expected to be zero in case if no
789 * DEV_TX_OFFLOAD_VLAN_INSERT capability is advertised;
790 * if the calling app ignores the absence of
791 * DEV_TX_OFFLOAD_VLAN_INSERT and pushes VLAN TCI, then
792 * TX_ERROR will occur
794 pkt_descs
+= sfc_efx_tx_maybe_insert_tag(txq
, m_seg
, &pend
);
796 if (m_seg
->ol_flags
& PKT_TX_TCP_SEG
) {
798 * We expect correct 'pkt->l[2, 3, 4]_len' values
799 * to be set correctly by the caller
801 if (sfc_efx_tso_do(txq
, added
, &m_seg
, &in_off
, &pend
,
802 &pkt_descs
, &pkt_len
) != 0) {
803 /* We may have reached this place if packet
804 * header linearization is needed but the
805 * header length is greater than
808 * We will deceive RTE saying that we have sent
809 * the packet, but we will actually drop it.
810 * Hence, we should revert 'pend' to the
811 * previous state (in case we have added
812 * VLAN descriptor) and start processing
813 * another one packet. But the original
814 * mbuf shouldn't be orphaned
817 txq
->hw_vlan_tci
= hw_vlan_tci_prev
;
819 rte_pktmbuf_free(*pktp
);
825 * We've only added 2 FATSOv2 option descriptors
826 * and 1 descriptor for the linearized packet header.
827 * The outstanding work will be done in the same manner
828 * as for the usual non-TSO path
832 for (; m_seg
!= NULL
; m_seg
= m_seg
->next
) {
833 efsys_dma_addr_t next_frag
;
836 seg_len
= m_seg
->data_len
;
837 next_frag
= rte_mbuf_data_iova(m_seg
);
840 * If we've started TSO transaction few steps earlier,
841 * we'll skip packet header using an offset in the
842 * current segment (which has been set to the
843 * first one containing payload)
850 efsys_dma_addr_t frag_addr
= next_frag
;
854 * It is assumed here that there is no
855 * limitation on address boundary
856 * crossing by DMA descriptor.
858 frag_len
= MIN(seg_len
, txq
->dma_desc_size_max
);
859 next_frag
+= frag_len
;
863 efx_tx_qdesc_dma_create(txq
->common
,
869 } while (seg_len
!= 0);
874 fill_level
+= pkt_descs
;
875 if (unlikely(fill_level
> hard_max_fill
)) {
877 * Our estimation for maximum number of descriptors
878 * required to send a packet seems to be wrong.
879 * Try to reap (if we haven't yet).
882 sfc_efx_tx_reap(txq
);
884 fill_level
= added
- txq
->completed
;
885 if (fill_level
> hard_max_fill
) {
887 txq
->hw_vlan_tci
= hw_vlan_tci_prev
;
892 txq
->hw_vlan_tci
= hw_vlan_tci_prev
;
897 /* Assign mbuf to the last used desc */
898 txq
->sw_ring
[(added
- 1) & txq
->ptr_mask
].mbuf
= *pktp
;
901 if (likely(pkts_sent
> 0)) {
902 rc
= efx_tx_qdesc_post(txq
->common
, txq
->pend_desc
,
903 pend
- &txq
->pend_desc
[0],
904 txq
->completed
, &txq
->added
);
907 if (likely(pushed
!= txq
->added
))
908 efx_tx_qpush(txq
->common
, txq
->added
, pushed
);
911 #if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE
913 sfc_efx_tx_reap(txq
);
920 const struct sfc_dp_tx
*
921 sfc_dp_tx_by_dp_txq(const struct sfc_dp_txq
*dp_txq
)
923 const struct sfc_dp_queue
*dpq
= &dp_txq
->dpq
;
924 struct rte_eth_dev
*eth_dev
;
925 struct sfc_adapter_priv
*sap
;
927 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq
->port_id
));
928 eth_dev
= &rte_eth_devices
[dpq
->port_id
];
930 sap
= sfc_adapter_priv_by_eth_dev(eth_dev
);
935 struct sfc_txq_info
*
936 sfc_txq_info_by_dp_txq(const struct sfc_dp_txq
*dp_txq
)
938 const struct sfc_dp_queue
*dpq
= &dp_txq
->dpq
;
939 struct rte_eth_dev
*eth_dev
;
940 struct sfc_adapter_shared
*sas
;
942 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq
->port_id
));
943 eth_dev
= &rte_eth_devices
[dpq
->port_id
];
945 sas
= sfc_adapter_shared_by_eth_dev(eth_dev
);
947 SFC_ASSERT(dpq
->queue_id
< sas
->txq_count
);
948 return &sas
->txq_info
[dpq
->queue_id
];
952 sfc_txq_by_dp_txq(const struct sfc_dp_txq
*dp_txq
)
954 const struct sfc_dp_queue
*dpq
= &dp_txq
->dpq
;
955 struct rte_eth_dev
*eth_dev
;
956 struct sfc_adapter
*sa
;
958 SFC_ASSERT(rte_eth_dev_is_valid_port(dpq
->port_id
));
959 eth_dev
= &rte_eth_devices
[dpq
->port_id
];
961 sa
= sfc_adapter_by_eth_dev(eth_dev
);
963 SFC_ASSERT(dpq
->queue_id
< sfc_sa2shared(sa
)->txq_count
);
964 return &sa
->txq_ctrl
[dpq
->queue_id
];
967 static sfc_dp_tx_qsize_up_rings_t sfc_efx_tx_qsize_up_rings
;
969 sfc_efx_tx_qsize_up_rings(uint16_t nb_tx_desc
,
970 __rte_unused
struct sfc_dp_tx_hw_limits
*limits
,
971 unsigned int *txq_entries
,
972 unsigned int *evq_entries
,
973 unsigned int *txq_max_fill_level
)
975 *txq_entries
= nb_tx_desc
;
976 *evq_entries
= nb_tx_desc
;
977 *txq_max_fill_level
= EFX_TXQ_LIMIT(*txq_entries
);
981 static sfc_dp_tx_qcreate_t sfc_efx_tx_qcreate
;
983 sfc_efx_tx_qcreate(uint16_t port_id
, uint16_t queue_id
,
984 const struct rte_pci_addr
*pci_addr
,
986 const struct sfc_dp_tx_qcreate_info
*info
,
987 struct sfc_dp_txq
**dp_txqp
)
989 struct sfc_efx_txq
*txq
;
990 struct sfc_txq
*ctrl_txq
;
994 txq
= rte_zmalloc_socket("sfc-efx-txq", sizeof(*txq
),
995 RTE_CACHE_LINE_SIZE
, socket_id
);
999 sfc_dp_queue_init(&txq
->dp
.dpq
, port_id
, queue_id
, pci_addr
);
1002 txq
->pend_desc
= rte_calloc_socket("sfc-efx-txq-pend-desc",
1003 EFX_TXQ_LIMIT(info
->txq_entries
),
1004 sizeof(*txq
->pend_desc
), 0,
1006 if (txq
->pend_desc
== NULL
)
1007 goto fail_pend_desc_alloc
;
1010 txq
->sw_ring
= rte_calloc_socket("sfc-efx-txq-sw_ring",
1012 sizeof(*txq
->sw_ring
),
1013 RTE_CACHE_LINE_SIZE
, socket_id
);
1014 if (txq
->sw_ring
== NULL
)
1015 goto fail_sw_ring_alloc
;
1017 ctrl_txq
= sfc_txq_by_dp_txq(&txq
->dp
);
1018 if (ctrl_txq
->evq
->sa
->tso
) {
1019 rc
= sfc_efx_tso_alloc_tsoh_objs(txq
->sw_ring
,
1020 info
->txq_entries
, socket_id
);
1022 goto fail_alloc_tsoh_objs
;
1025 txq
->evq
= ctrl_txq
->evq
;
1026 txq
->ptr_mask
= info
->txq_entries
- 1;
1027 txq
->max_fill_level
= info
->max_fill_level
;
1028 txq
->free_thresh
= info
->free_thresh
;
1029 txq
->dma_desc_size_max
= info
->dma_desc_size_max
;
1031 *dp_txqp
= &txq
->dp
;
1034 fail_alloc_tsoh_objs
:
1035 rte_free(txq
->sw_ring
);
1038 rte_free(txq
->pend_desc
);
1040 fail_pend_desc_alloc
:
1047 static sfc_dp_tx_qdestroy_t sfc_efx_tx_qdestroy
;
1049 sfc_efx_tx_qdestroy(struct sfc_dp_txq
*dp_txq
)
1051 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
1053 sfc_efx_tso_free_tsoh_objs(txq
->sw_ring
, txq
->ptr_mask
+ 1);
1054 rte_free(txq
->sw_ring
);
1055 rte_free(txq
->pend_desc
);
1059 static sfc_dp_tx_qstart_t sfc_efx_tx_qstart
;
1061 sfc_efx_tx_qstart(struct sfc_dp_txq
*dp_txq
,
1062 __rte_unused
unsigned int evq_read_ptr
,
1063 unsigned int txq_desc_index
)
1065 /* libefx-based datapath is specific to libefx-based PMD */
1066 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
1067 struct sfc_txq
*ctrl_txq
= sfc_txq_by_dp_txq(dp_txq
);
1069 txq
->common
= ctrl_txq
->common
;
1071 txq
->pending
= txq
->completed
= txq
->added
= txq_desc_index
;
1072 txq
->hw_vlan_tci
= 0;
1074 txq
->flags
|= (SFC_EFX_TXQ_FLAG_STARTED
| SFC_EFX_TXQ_FLAG_RUNNING
);
1079 static sfc_dp_tx_qstop_t sfc_efx_tx_qstop
;
1081 sfc_efx_tx_qstop(struct sfc_dp_txq
*dp_txq
,
1082 __rte_unused
unsigned int *evq_read_ptr
)
1084 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
1086 txq
->flags
&= ~SFC_EFX_TXQ_FLAG_RUNNING
;
1089 static sfc_dp_tx_qreap_t sfc_efx_tx_qreap
;
1091 sfc_efx_tx_qreap(struct sfc_dp_txq
*dp_txq
)
1093 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
1096 sfc_efx_tx_reap(txq
);
1098 for (txds
= 0; txds
<= txq
->ptr_mask
; txds
++) {
1099 if (txq
->sw_ring
[txds
].mbuf
!= NULL
) {
1100 rte_pktmbuf_free(txq
->sw_ring
[txds
].mbuf
);
1101 txq
->sw_ring
[txds
].mbuf
= NULL
;
1105 txq
->flags
&= ~SFC_EFX_TXQ_FLAG_STARTED
;
1108 static sfc_dp_tx_qdesc_status_t sfc_efx_tx_qdesc_status
;
1110 sfc_efx_tx_qdesc_status(struct sfc_dp_txq
*dp_txq
, uint16_t offset
)
1112 struct sfc_efx_txq
*txq
= sfc_efx_txq_by_dp_txq(dp_txq
);
1114 if (unlikely(offset
> txq
->ptr_mask
))
1117 if (unlikely(offset
>= txq
->max_fill_level
))
1118 return RTE_ETH_TX_DESC_UNAVAIL
;
1121 * Poll EvQ to derive up-to-date 'txq->pending' figure;
1122 * it is required for the queue to be running, but the
1123 * check is omitted because API design assumes that it
1124 * is the duty of the caller to satisfy all conditions
1126 SFC_ASSERT((txq
->flags
& SFC_EFX_TXQ_FLAG_RUNNING
) ==
1127 SFC_EFX_TXQ_FLAG_RUNNING
);
1128 sfc_ev_qpoll(txq
->evq
);
1131 * Ring tail is 'txq->pending', and although descriptors
1132 * between 'txq->completed' and 'txq->pending' are still
1133 * in use by the driver, they should be reported as DONE
1135 if (unlikely(offset
< (txq
->added
- txq
->pending
)))
1136 return RTE_ETH_TX_DESC_FULL
;
1139 * There is no separate return value for unused descriptors;
1140 * the latter will be reported as DONE because genuine DONE
1141 * descriptors will be freed anyway in SW on the next burst
1143 return RTE_ETH_TX_DESC_DONE
;
1146 struct sfc_dp_tx sfc_efx_tx
= {
1148 .name
= SFC_KVARG_DATAPATH_EFX
,
1152 .features
= SFC_DP_TX_FEAT_VLAN_INSERT
|
1153 SFC_DP_TX_FEAT_TSO
|
1154 SFC_DP_TX_FEAT_MULTI_POOL
|
1155 SFC_DP_TX_FEAT_REFCNT
|
1156 SFC_DP_TX_FEAT_MULTI_SEG
,
1157 .qsize_up_rings
= sfc_efx_tx_qsize_up_rings
,
1158 .qcreate
= sfc_efx_tx_qcreate
,
1159 .qdestroy
= sfc_efx_tx_qdestroy
,
1160 .qstart
= sfc_efx_tx_qstart
,
1161 .qstop
= sfc_efx_tx_qstop
,
1162 .qreap
= sfc_efx_tx_qreap
,
1163 .qdesc_status
= sfc_efx_tx_qdesc_status
,
1164 .pkt_prepare
= sfc_efx_prepare_pkts
,
1165 .pkt_burst
= sfc_efx_xmit_pkts
,