2 * Copyright (c) 2014, 2015 Netronome Systems, Inc.
5 * Small portions derived from code Copyright(c) 2010-2015 Intel Corporation.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
35 * vim:shiftwidth=8:noexpandtab
37 * @file dpdk/pmd/nfp_net.c
39 * Netronome vNIC DPDK Poll-Mode Driver: Main entry point
44 #include <rte_byteorder.h>
45 #include <rte_common.h>
47 #include <rte_debug.h>
48 #include <rte_ethdev.h>
50 #include <rte_ether.h>
51 #include <rte_malloc.h>
52 #include <rte_memzone.h>
53 #include <rte_mempool.h>
54 #include <rte_version.h>
55 #include <rte_string_fns.h>
56 #include <rte_alarm.h>
57 #include <rte_spinlock.h>
59 #include "nfp_net_pmd.h"
60 #include "nfp_net_logs.h"
61 #include "nfp_net_ctrl.h"
64 static void nfp_net_close(struct rte_eth_dev
*dev
);
65 static int nfp_net_configure(struct rte_eth_dev
*dev
);
66 static void nfp_net_dev_interrupt_handler(struct rte_intr_handle
*handle
,
68 static void nfp_net_dev_interrupt_delayed_handler(void *param
);
69 static int nfp_net_dev_mtu_set(struct rte_eth_dev
*dev
, uint16_t mtu
);
70 static void nfp_net_infos_get(struct rte_eth_dev
*dev
,
71 struct rte_eth_dev_info
*dev_info
);
72 static int nfp_net_init(struct rte_eth_dev
*eth_dev
);
73 static int nfp_net_link_update(struct rte_eth_dev
*dev
, int wait_to_complete
);
74 static void nfp_net_promisc_enable(struct rte_eth_dev
*dev
);
75 static void nfp_net_promisc_disable(struct rte_eth_dev
*dev
);
76 static int nfp_net_rx_fill_freelist(struct nfp_net_rxq
*rxq
);
77 static uint32_t nfp_net_rx_queue_count(struct rte_eth_dev
*dev
,
79 static uint16_t nfp_net_recv_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
,
81 static void nfp_net_rx_queue_release(void *rxq
);
82 static int nfp_net_rx_queue_setup(struct rte_eth_dev
*dev
, uint16_t queue_idx
,
83 uint16_t nb_desc
, unsigned int socket_id
,
84 const struct rte_eth_rxconf
*rx_conf
,
85 struct rte_mempool
*mp
);
86 static int nfp_net_tx_free_bufs(struct nfp_net_txq
*txq
);
87 static void nfp_net_tx_queue_release(void *txq
);
88 static int nfp_net_tx_queue_setup(struct rte_eth_dev
*dev
, uint16_t queue_idx
,
89 uint16_t nb_desc
, unsigned int socket_id
,
90 const struct rte_eth_txconf
*tx_conf
);
91 static int nfp_net_start(struct rte_eth_dev
*dev
);
92 static void nfp_net_stats_get(struct rte_eth_dev
*dev
,
93 struct rte_eth_stats
*stats
);
94 static void nfp_net_stats_reset(struct rte_eth_dev
*dev
);
95 static void nfp_net_stop(struct rte_eth_dev
*dev
);
96 static uint16_t nfp_net_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
,
100 * The offset of the queue controller queues in the PCIe Target. These
101 * happen to be at the same offset on the NFP6000 and the NFP3200 so
102 * we use a single macro here.
104 #define NFP_PCIE_QUEUE(_q) (0x80000 + (0x800 * ((_q) & 0xff)))
106 /* Maximum value which can be added to a queue with one transaction */
107 #define NFP_QCP_MAX_ADD 0x7f
109 #define RTE_MBUF_DMA_ADDR_DEFAULT(mb) \
110 (uint64_t)((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
112 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
114 NFP_QCP_READ_PTR
= 0,
119 * nfp_qcp_ptr_add - Add the value to the selected pointer of a queue
120 * @q: Base address for queue structure
121 * @ptr: Add to the Read or Write pointer
122 * @val: Value to add to the queue pointer
124 * If @val is greater than @NFP_QCP_MAX_ADD multiple writes are performed.
127 nfp_qcp_ptr_add(uint8_t *q
, enum nfp_qcp_ptr ptr
, uint32_t val
)
131 if (ptr
== NFP_QCP_READ_PTR
)
132 off
= NFP_QCP_QUEUE_ADD_RPTR
;
134 off
= NFP_QCP_QUEUE_ADD_WPTR
;
136 while (val
> NFP_QCP_MAX_ADD
) {
137 nn_writel(rte_cpu_to_le_32(NFP_QCP_MAX_ADD
), q
+ off
);
138 val
-= NFP_QCP_MAX_ADD
;
141 nn_writel(rte_cpu_to_le_32(val
), q
+ off
);
145 * nfp_qcp_read - Read the current Read/Write pointer value for a queue
146 * @q: Base address for queue structure
147 * @ptr: Read or Write pointer
149 static inline uint32_t
150 nfp_qcp_read(uint8_t *q
, enum nfp_qcp_ptr ptr
)
155 if (ptr
== NFP_QCP_READ_PTR
)
156 off
= NFP_QCP_QUEUE_STS_LO
;
158 off
= NFP_QCP_QUEUE_STS_HI
;
160 val
= rte_cpu_to_le_32(nn_readl(q
+ off
));
162 if (ptr
== NFP_QCP_READ_PTR
)
163 return val
& NFP_QCP_QUEUE_STS_LO_READPTR_mask
;
165 return val
& NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask
;
169 * Functions to read/write from/to Config BAR
170 * Performs any endian conversion necessary.
172 static inline uint8_t
173 nn_cfg_readb(struct nfp_net_hw
*hw
, int off
)
175 return nn_readb(hw
->ctrl_bar
+ off
);
179 nn_cfg_writeb(struct nfp_net_hw
*hw
, int off
, uint8_t val
)
181 nn_writeb(val
, hw
->ctrl_bar
+ off
);
184 static inline uint32_t
185 nn_cfg_readl(struct nfp_net_hw
*hw
, int off
)
187 return rte_le_to_cpu_32(nn_readl(hw
->ctrl_bar
+ off
));
191 nn_cfg_writel(struct nfp_net_hw
*hw
, int off
, uint32_t val
)
193 nn_writel(rte_cpu_to_le_32(val
), hw
->ctrl_bar
+ off
);
196 static inline uint64_t
197 nn_cfg_readq(struct nfp_net_hw
*hw
, int off
)
199 return rte_le_to_cpu_64(nn_readq(hw
->ctrl_bar
+ off
));
203 nn_cfg_writeq(struct nfp_net_hw
*hw
, int off
, uint64_t val
)
205 nn_writeq(rte_cpu_to_le_64(val
), hw
->ctrl_bar
+ off
);
208 /* Creating memzone for hardware rings. */
209 static const struct rte_memzone
*
210 ring_dma_zone_reserve(struct rte_eth_dev
*dev
, const char *ring_name
,
211 uint16_t queue_id
, uint32_t ring_size
, int socket_id
)
213 char z_name
[RTE_MEMZONE_NAMESIZE
];
214 const struct rte_memzone
*mz
;
216 snprintf(z_name
, sizeof(z_name
), "%s_%s_%d_%d",
217 dev
->driver
->pci_drv
.driver
.name
,
218 ring_name
, dev
->data
->port_id
, queue_id
);
220 mz
= rte_memzone_lookup(z_name
);
224 return rte_memzone_reserve_aligned(z_name
, ring_size
, socket_id
, 0,
229 * Atomically reads link status information from global structure rte_eth_dev.
232 * - Pointer to the structure rte_eth_dev to read from.
233 * - Pointer to the buffer to be saved with the link status.
236 * - On success, zero.
237 * - On failure, negative value.
240 nfp_net_dev_atomic_read_link_status(struct rte_eth_dev
*dev
,
241 struct rte_eth_link
*link
)
243 struct rte_eth_link
*dst
= link
;
244 struct rte_eth_link
*src
= &dev
->data
->dev_link
;
246 if (rte_atomic64_cmpset((uint64_t *)dst
, *(uint64_t *)dst
,
247 *(uint64_t *)src
) == 0)
254 * Atomically writes the link status information into global
255 * structure rte_eth_dev.
258 * - Pointer to the structure rte_eth_dev to read from.
259 * - Pointer to the buffer to be saved with the link status.
262 * - On success, zero.
263 * - On failure, negative value.
266 nfp_net_dev_atomic_write_link_status(struct rte_eth_dev
*dev
,
267 struct rte_eth_link
*link
)
269 struct rte_eth_link
*dst
= &dev
->data
->dev_link
;
270 struct rte_eth_link
*src
= link
;
272 if (rte_atomic64_cmpset((uint64_t *)dst
, *(uint64_t *)dst
,
273 *(uint64_t *)src
) == 0)
280 nfp_net_rx_queue_release_mbufs(struct nfp_net_rxq
*rxq
)
284 if (rxq
->rxbufs
== NULL
)
287 for (i
= 0; i
< rxq
->rx_count
; i
++) {
288 if (rxq
->rxbufs
[i
].mbuf
) {
289 rte_pktmbuf_free_seg(rxq
->rxbufs
[i
].mbuf
);
290 rxq
->rxbufs
[i
].mbuf
= NULL
;
296 nfp_net_rx_queue_release(void *rx_queue
)
298 struct nfp_net_rxq
*rxq
= rx_queue
;
301 nfp_net_rx_queue_release_mbufs(rxq
);
302 rte_free(rxq
->rxbufs
);
308 nfp_net_reset_rx_queue(struct nfp_net_rxq
*rxq
)
310 nfp_net_rx_queue_release_mbufs(rxq
);
317 nfp_net_tx_queue_release_mbufs(struct nfp_net_txq
*txq
)
321 if (txq
->txbufs
== NULL
)
324 for (i
= 0; i
< txq
->tx_count
; i
++) {
325 if (txq
->txbufs
[i
].mbuf
) {
326 rte_pktmbuf_free(txq
->txbufs
[i
].mbuf
);
327 txq
->txbufs
[i
].mbuf
= NULL
;
333 nfp_net_tx_queue_release(void *tx_queue
)
335 struct nfp_net_txq
*txq
= tx_queue
;
338 nfp_net_tx_queue_release_mbufs(txq
);
339 rte_free(txq
->txbufs
);
345 nfp_net_reset_tx_queue(struct nfp_net_txq
*txq
)
347 nfp_net_tx_queue_release_mbufs(txq
);
355 __nfp_net_reconfig(struct nfp_net_hw
*hw
, uint32_t update
)
359 struct timespec wait
;
361 PMD_DRV_LOG(DEBUG
, "Writing to the configuration queue (%p)...\n",
364 if (hw
->qcp_cfg
== NULL
)
365 rte_panic("Bad configuration queue pointer\n");
367 nfp_qcp_ptr_add(hw
->qcp_cfg
, NFP_QCP_WRITE_PTR
, 1);
370 wait
.tv_nsec
= 1000000;
372 PMD_DRV_LOG(DEBUG
, "Polling for update ack...\n");
374 /* Poll update field, waiting for NFP to ack the config */
375 for (cnt
= 0; ; cnt
++) {
376 new = nn_cfg_readl(hw
, NFP_NET_CFG_UPDATE
);
379 if (new & NFP_NET_CFG_UPDATE_ERR
) {
380 PMD_INIT_LOG(ERR
, "Reconfig error: 0x%08x\n", new);
383 if (cnt
>= NFP_NET_POLL_TIMEOUT
) {
384 PMD_INIT_LOG(ERR
, "Reconfig timeout for 0x%08x after"
385 " %dms\n", update
, cnt
);
386 rte_panic("Exiting\n");
388 nanosleep(&wait
, 0); /* waiting for a 1ms */
390 PMD_DRV_LOG(DEBUG
, "Ack DONE\n");
395 * Reconfigure the NIC
396 * @nn: device to reconfigure
397 * @ctrl: The value for the ctrl field in the BAR config
398 * @update: The value for the update field in the BAR config
400 * Write the update word to the BAR and ping the reconfig queue. Then poll
401 * until the firmware has acknowledged the update by zeroing the update word.
404 nfp_net_reconfig(struct nfp_net_hw
*hw
, uint32_t ctrl
, uint32_t update
)
408 PMD_DRV_LOG(DEBUG
, "nfp_net_reconfig: ctrl=%08x update=%08x\n",
411 rte_spinlock_lock(&hw
->reconfig_lock
);
413 nn_cfg_writel(hw
, NFP_NET_CFG_CTRL
, ctrl
);
414 nn_cfg_writel(hw
, NFP_NET_CFG_UPDATE
, update
);
418 err
= __nfp_net_reconfig(hw
, update
);
420 rte_spinlock_unlock(&hw
->reconfig_lock
);
426 * Reconfig errors imply situations where they can be handled.
427 * Otherwise, rte_panic is called inside __nfp_net_reconfig
429 PMD_INIT_LOG(ERR
, "Error nfp_net reconfig for ctrl: %x update: %x\n",
435 * Configure an Ethernet device. This function must be invoked first
436 * before any other function in the Ethernet API. This function can
437 * also be re-invoked when a device is in the stopped state.
440 nfp_net_configure(struct rte_eth_dev
*dev
)
442 struct rte_eth_conf
*dev_conf
;
443 struct rte_eth_rxmode
*rxmode
;
444 struct rte_eth_txmode
*txmode
;
445 uint32_t new_ctrl
= 0;
447 struct nfp_net_hw
*hw
;
449 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
452 * A DPDK app sends info about how many queues to use and how
453 * those queues need to be configured. This is used by the
454 * DPDK core and it makes sure no more queues than those
455 * advertised by the driver are requested. This function is
456 * called after that internal process
459 PMD_INIT_LOG(DEBUG
, "Configure\n");
461 dev_conf
= &dev
->data
->dev_conf
;
462 rxmode
= &dev_conf
->rxmode
;
463 txmode
= &dev_conf
->txmode
;
465 /* Checking TX mode */
466 if (txmode
->mq_mode
) {
467 PMD_INIT_LOG(INFO
, "TX mq_mode DCB and VMDq not supported\n");
471 /* Checking RX mode */
472 if (rxmode
->mq_mode
& ETH_MQ_RX_RSS
) {
473 if (hw
->cap
& NFP_NET_CFG_CTRL_RSS
) {
474 update
= NFP_NET_CFG_UPDATE_RSS
;
475 new_ctrl
= NFP_NET_CFG_CTRL_RSS
;
477 PMD_INIT_LOG(INFO
, "RSS not supported\n");
482 if (rxmode
->split_hdr_size
) {
483 PMD_INIT_LOG(INFO
, "rxmode does not support split header\n");
487 if (rxmode
->hw_ip_checksum
) {
488 if (hw
->cap
& NFP_NET_CFG_CTRL_RXCSUM
) {
489 new_ctrl
|= NFP_NET_CFG_CTRL_RXCSUM
;
491 PMD_INIT_LOG(INFO
, "RXCSUM not supported\n");
496 if (rxmode
->hw_vlan_filter
) {
497 PMD_INIT_LOG(INFO
, "VLAN filter not supported\n");
501 if (rxmode
->hw_vlan_strip
) {
502 if (hw
->cap
& NFP_NET_CFG_CTRL_RXVLAN
) {
503 new_ctrl
|= NFP_NET_CFG_CTRL_RXVLAN
;
505 PMD_INIT_LOG(INFO
, "hw vlan strip not supported\n");
510 if (rxmode
->hw_vlan_extend
) {
511 PMD_INIT_LOG(INFO
, "VLAN extended not supported\n");
515 /* Supporting VLAN insertion by default */
516 if (hw
->cap
& NFP_NET_CFG_CTRL_TXVLAN
)
517 new_ctrl
|= NFP_NET_CFG_CTRL_TXVLAN
;
519 if (rxmode
->jumbo_frame
)
520 /* this is handled in rte_eth_dev_configure */
522 if (rxmode
->hw_strip_crc
) {
523 PMD_INIT_LOG(INFO
, "strip CRC not supported\n");
527 if (rxmode
->enable_scatter
) {
528 PMD_INIT_LOG(INFO
, "Scatter not supported\n");
535 update
|= NFP_NET_CFG_UPDATE_GEN
;
537 nn_cfg_writel(hw
, NFP_NET_CFG_CTRL
, new_ctrl
);
538 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
547 nfp_net_enable_queues(struct rte_eth_dev
*dev
)
549 struct nfp_net_hw
*hw
;
550 uint64_t enabled_queues
= 0;
553 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
555 /* Enabling the required TX queues in the device */
556 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
557 enabled_queues
|= (1 << i
);
559 nn_cfg_writeq(hw
, NFP_NET_CFG_TXRS_ENABLE
, enabled_queues
);
563 /* Enabling the required RX queues in the device */
564 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++)
565 enabled_queues
|= (1 << i
);
567 nn_cfg_writeq(hw
, NFP_NET_CFG_RXRS_ENABLE
, enabled_queues
);
571 nfp_net_disable_queues(struct rte_eth_dev
*dev
)
573 struct nfp_net_hw
*hw
;
574 uint32_t new_ctrl
, update
= 0;
576 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
578 nn_cfg_writeq(hw
, NFP_NET_CFG_TXRS_ENABLE
, 0);
579 nn_cfg_writeq(hw
, NFP_NET_CFG_RXRS_ENABLE
, 0);
581 new_ctrl
= hw
->ctrl
& ~NFP_NET_CFG_CTRL_ENABLE
;
582 update
= NFP_NET_CFG_UPDATE_GEN
| NFP_NET_CFG_UPDATE_RING
|
583 NFP_NET_CFG_UPDATE_MSIX
;
585 if (hw
->cap
& NFP_NET_CFG_CTRL_RINGCFG
)
586 new_ctrl
&= ~NFP_NET_CFG_CTRL_RINGCFG
;
588 /* If an error when reconfig we avoid to change hw state */
589 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
596 nfp_net_rx_freelist_setup(struct rte_eth_dev
*dev
)
600 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
601 if (nfp_net_rx_fill_freelist(dev
->data
->rx_queues
[i
]) < 0)
608 nfp_net_params_setup(struct nfp_net_hw
*hw
)
610 nn_cfg_writel(hw
, NFP_NET_CFG_MTU
, hw
->mtu
);
611 nn_cfg_writel(hw
, NFP_NET_CFG_FLBUFSZ
, hw
->flbufsz
);
615 nfp_net_cfg_queue_setup(struct nfp_net_hw
*hw
)
617 hw
->qcp_cfg
= hw
->tx_bar
+ NFP_QCP_QUEUE_ADDR_SZ
;
620 static void nfp_net_read_mac(struct nfp_net_hw
*hw
)
624 tmp
= rte_be_to_cpu_32(nn_cfg_readl(hw
, NFP_NET_CFG_MACADDR
));
625 memcpy(&hw
->mac_addr
[0], &tmp
, sizeof(struct ether_addr
));
627 tmp
= rte_be_to_cpu_32(nn_cfg_readl(hw
, NFP_NET_CFG_MACADDR
+ 4));
628 memcpy(&hw
->mac_addr
[4], &tmp
, 2);
632 nfp_net_start(struct rte_eth_dev
*dev
)
634 uint32_t new_ctrl
, update
= 0;
635 struct nfp_net_hw
*hw
;
638 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
640 PMD_INIT_LOG(DEBUG
, "Start\n");
642 /* Disabling queues just in case... */
643 nfp_net_disable_queues(dev
);
645 /* Writing configuration parameters in the device */
646 nfp_net_params_setup(hw
);
648 /* Enabling the required queues in the device */
649 nfp_net_enable_queues(dev
);
652 new_ctrl
= hw
->ctrl
| NFP_NET_CFG_CTRL_ENABLE
| NFP_NET_CFG_UPDATE_MSIX
;
653 update
= NFP_NET_CFG_UPDATE_GEN
| NFP_NET_CFG_UPDATE_RING
;
655 if (hw
->cap
& NFP_NET_CFG_CTRL_RINGCFG
)
656 new_ctrl
|= NFP_NET_CFG_CTRL_RINGCFG
;
658 nn_cfg_writel(hw
, NFP_NET_CFG_CTRL
, new_ctrl
);
659 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
663 * Allocating rte mbuffs for configured rx queues.
664 * This requires queues being enabled before
666 if (nfp_net_rx_freelist_setup(dev
) < 0) {
677 * An error returned by this function should mean the app
678 * exiting and then the system releasing all the memory
679 * allocated even memory coming from hugepages.
681 * The device could be enabled at this point with some queues
682 * ready for getting packets. This is true if the call to
683 * nfp_net_rx_freelist_setup() succeeds for some queues but
684 * fails for subsequent queues.
686 * This should make the app exiting but better if we tell the
689 nfp_net_disable_queues(dev
);
694 /* Stop device: disable rx and tx functions to allow for reconfiguring. */
696 nfp_net_stop(struct rte_eth_dev
*dev
)
700 PMD_INIT_LOG(DEBUG
, "Stop\n");
702 nfp_net_disable_queues(dev
);
705 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
706 nfp_net_reset_tx_queue(
707 (struct nfp_net_txq
*)dev
->data
->tx_queues
[i
]);
710 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
711 nfp_net_reset_rx_queue(
712 (struct nfp_net_rxq
*)dev
->data
->rx_queues
[i
]);
716 /* Reset and stop device. The device can not be restarted. */
718 nfp_net_close(struct rte_eth_dev
*dev
)
720 struct nfp_net_hw
*hw
;
722 PMD_INIT_LOG(DEBUG
, "Close\n");
724 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
727 * We assume that the DPDK application is stopping all the
728 * threads/queues before calling the device close function.
733 rte_intr_disable(&dev
->pci_dev
->intr_handle
);
734 nn_cfg_writeb(hw
, NFP_NET_CFG_LSC
, 0xff);
736 /* unregister callback func from eal lib */
737 rte_intr_callback_unregister(&dev
->pci_dev
->intr_handle
,
738 nfp_net_dev_interrupt_handler
,
742 * The ixgbe PMD driver disables the pcie master on the
743 * device. The i40e does not...
748 nfp_net_promisc_enable(struct rte_eth_dev
*dev
)
750 uint32_t new_ctrl
, update
= 0;
751 struct nfp_net_hw
*hw
;
753 PMD_DRV_LOG(DEBUG
, "Promiscuous mode enable\n");
755 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
757 if (!(hw
->cap
& NFP_NET_CFG_CTRL_PROMISC
)) {
758 PMD_INIT_LOG(INFO
, "Promiscuous mode not supported\n");
762 if (hw
->ctrl
& NFP_NET_CFG_CTRL_PROMISC
) {
763 PMD_DRV_LOG(INFO
, "Promiscuous mode already enabled\n");
767 new_ctrl
= hw
->ctrl
| NFP_NET_CFG_CTRL_PROMISC
;
768 update
= NFP_NET_CFG_UPDATE_GEN
;
771 * DPDK sets promiscuous mode on just after this call assuming
772 * it can not fail ...
774 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
781 nfp_net_promisc_disable(struct rte_eth_dev
*dev
)
783 uint32_t new_ctrl
, update
= 0;
784 struct nfp_net_hw
*hw
;
786 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
788 if ((hw
->ctrl
& NFP_NET_CFG_CTRL_PROMISC
) == 0) {
789 PMD_DRV_LOG(INFO
, "Promiscuous mode already disabled\n");
793 new_ctrl
= hw
->ctrl
& ~NFP_NET_CFG_CTRL_PROMISC
;
794 update
= NFP_NET_CFG_UPDATE_GEN
;
797 * DPDK sets promiscuous mode off just before this call
798 * assuming it can not fail ...
800 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
807 * return 0 means link status changed, -1 means not changed
809 * Wait to complete is needed as it can take up to 9 seconds to get the Link
813 nfp_net_link_update(struct rte_eth_dev
*dev
, __rte_unused
int wait_to_complete
)
815 struct nfp_net_hw
*hw
;
816 struct rte_eth_link link
, old
;
817 uint32_t nn_link_status
;
819 PMD_DRV_LOG(DEBUG
, "Link update\n");
821 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
823 memset(&old
, 0, sizeof(old
));
824 nfp_net_dev_atomic_read_link_status(dev
, &old
);
826 nn_link_status
= nn_cfg_readl(hw
, NFP_NET_CFG_STS
);
828 memset(&link
, 0, sizeof(struct rte_eth_link
));
830 if (nn_link_status
& NFP_NET_CFG_STS_LINK
)
831 link
.link_status
= ETH_LINK_UP
;
833 link
.link_duplex
= ETH_LINK_FULL_DUPLEX
;
834 /* Other cards can limit the tx and rx rate per VF */
835 link
.link_speed
= ETH_SPEED_NUM_40G
;
837 if (old
.link_status
!= link
.link_status
) {
838 nfp_net_dev_atomic_write_link_status(dev
, &link
);
839 if (link
.link_status
)
840 PMD_DRV_LOG(INFO
, "NIC Link is Up\n");
842 PMD_DRV_LOG(INFO
, "NIC Link is Down\n");
850 nfp_net_stats_get(struct rte_eth_dev
*dev
, struct rte_eth_stats
*stats
)
853 struct nfp_net_hw
*hw
;
854 struct rte_eth_stats nfp_dev_stats
;
856 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
858 /* RTE_ETHDEV_QUEUE_STAT_CNTRS default value is 16 */
860 /* reading per RX ring stats */
861 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
862 if (i
== RTE_ETHDEV_QUEUE_STAT_CNTRS
)
865 nfp_dev_stats
.q_ipackets
[i
] =
866 nn_cfg_readq(hw
, NFP_NET_CFG_RXR_STATS(i
));
868 nfp_dev_stats
.q_ipackets
[i
] -=
869 hw
->eth_stats_base
.q_ipackets
[i
];
871 nfp_dev_stats
.q_ibytes
[i
] =
872 nn_cfg_readq(hw
, NFP_NET_CFG_RXR_STATS(i
) + 0x8);
874 nfp_dev_stats
.q_ibytes
[i
] -=
875 hw
->eth_stats_base
.q_ibytes
[i
];
878 /* reading per TX ring stats */
879 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
880 if (i
== RTE_ETHDEV_QUEUE_STAT_CNTRS
)
883 nfp_dev_stats
.q_opackets
[i
] =
884 nn_cfg_readq(hw
, NFP_NET_CFG_TXR_STATS(i
));
886 nfp_dev_stats
.q_opackets
[i
] -=
887 hw
->eth_stats_base
.q_opackets
[i
];
889 nfp_dev_stats
.q_obytes
[i
] =
890 nn_cfg_readq(hw
, NFP_NET_CFG_TXR_STATS(i
) + 0x8);
892 nfp_dev_stats
.q_obytes
[i
] -=
893 hw
->eth_stats_base
.q_obytes
[i
];
896 nfp_dev_stats
.ipackets
=
897 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_FRAMES
);
899 nfp_dev_stats
.ipackets
-= hw
->eth_stats_base
.ipackets
;
901 nfp_dev_stats
.ibytes
=
902 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_OCTETS
);
904 nfp_dev_stats
.ibytes
-= hw
->eth_stats_base
.ibytes
;
906 nfp_dev_stats
.opackets
=
907 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_FRAMES
);
909 nfp_dev_stats
.opackets
-= hw
->eth_stats_base
.opackets
;
911 nfp_dev_stats
.obytes
=
912 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_OCTETS
);
914 nfp_dev_stats
.obytes
-= hw
->eth_stats_base
.obytes
;
916 /* reading general device stats */
917 nfp_dev_stats
.ierrors
=
918 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_ERRORS
);
920 nfp_dev_stats
.ierrors
-= hw
->eth_stats_base
.ierrors
;
922 nfp_dev_stats
.oerrors
=
923 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_ERRORS
);
925 nfp_dev_stats
.oerrors
-= hw
->eth_stats_base
.oerrors
;
927 /* RX ring mbuf allocation failures */
928 nfp_dev_stats
.rx_nombuf
= dev
->data
->rx_mbuf_alloc_failed
;
930 nfp_dev_stats
.imissed
=
931 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_DISCARDS
);
933 nfp_dev_stats
.imissed
-= hw
->eth_stats_base
.imissed
;
936 memcpy(stats
, &nfp_dev_stats
, sizeof(*stats
));
940 nfp_net_stats_reset(struct rte_eth_dev
*dev
)
943 struct nfp_net_hw
*hw
;
945 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
948 * hw->eth_stats_base records the per counter starting point.
952 /* reading per RX ring stats */
953 for (i
= 0; i
< dev
->data
->nb_rx_queues
; i
++) {
954 if (i
== RTE_ETHDEV_QUEUE_STAT_CNTRS
)
957 hw
->eth_stats_base
.q_ipackets
[i
] =
958 nn_cfg_readq(hw
, NFP_NET_CFG_RXR_STATS(i
));
960 hw
->eth_stats_base
.q_ibytes
[i
] =
961 nn_cfg_readq(hw
, NFP_NET_CFG_RXR_STATS(i
) + 0x8);
964 /* reading per TX ring stats */
965 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
966 if (i
== RTE_ETHDEV_QUEUE_STAT_CNTRS
)
969 hw
->eth_stats_base
.q_opackets
[i
] =
970 nn_cfg_readq(hw
, NFP_NET_CFG_TXR_STATS(i
));
972 hw
->eth_stats_base
.q_obytes
[i
] =
973 nn_cfg_readq(hw
, NFP_NET_CFG_TXR_STATS(i
) + 0x8);
976 hw
->eth_stats_base
.ipackets
=
977 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_FRAMES
);
979 hw
->eth_stats_base
.ibytes
=
980 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_OCTETS
);
982 hw
->eth_stats_base
.opackets
=
983 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_FRAMES
);
985 hw
->eth_stats_base
.obytes
=
986 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_OCTETS
);
988 /* reading general device stats */
989 hw
->eth_stats_base
.ierrors
=
990 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_ERRORS
);
992 hw
->eth_stats_base
.oerrors
=
993 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_TX_ERRORS
);
995 /* RX ring mbuf allocation failures */
996 dev
->data
->rx_mbuf_alloc_failed
= 0;
998 hw
->eth_stats_base
.imissed
=
999 nn_cfg_readq(hw
, NFP_NET_CFG_STATS_RX_DISCARDS
);
1003 nfp_net_infos_get(struct rte_eth_dev
*dev
, struct rte_eth_dev_info
*dev_info
)
1005 struct nfp_net_hw
*hw
;
1007 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
1009 dev_info
->driver_name
= dev
->driver
->pci_drv
.driver
.name
;
1010 dev_info
->max_rx_queues
= (uint16_t)hw
->max_rx_queues
;
1011 dev_info
->max_tx_queues
= (uint16_t)hw
->max_tx_queues
;
1012 dev_info
->min_rx_bufsize
= ETHER_MIN_MTU
;
1013 dev_info
->max_rx_pktlen
= hw
->mtu
;
1014 /* Next should change when PF support is implemented */
1015 dev_info
->max_mac_addrs
= 1;
1017 if (hw
->cap
& NFP_NET_CFG_CTRL_RXVLAN
)
1018 dev_info
->rx_offload_capa
= DEV_RX_OFFLOAD_VLAN_STRIP
;
1020 if (hw
->cap
& NFP_NET_CFG_CTRL_RXCSUM
)
1021 dev_info
->rx_offload_capa
|= DEV_RX_OFFLOAD_IPV4_CKSUM
|
1022 DEV_RX_OFFLOAD_UDP_CKSUM
|
1023 DEV_RX_OFFLOAD_TCP_CKSUM
;
1025 if (hw
->cap
& NFP_NET_CFG_CTRL_TXVLAN
)
1026 dev_info
->tx_offload_capa
= DEV_TX_OFFLOAD_VLAN_INSERT
;
1028 if (hw
->cap
& NFP_NET_CFG_CTRL_TXCSUM
)
1029 dev_info
->tx_offload_capa
|= DEV_TX_OFFLOAD_IPV4_CKSUM
|
1030 DEV_RX_OFFLOAD_UDP_CKSUM
|
1031 DEV_RX_OFFLOAD_TCP_CKSUM
;
1033 dev_info
->default_rxconf
= (struct rte_eth_rxconf
) {
1035 .pthresh
= DEFAULT_RX_PTHRESH
,
1036 .hthresh
= DEFAULT_RX_HTHRESH
,
1037 .wthresh
= DEFAULT_RX_WTHRESH
,
1039 .rx_free_thresh
= DEFAULT_RX_FREE_THRESH
,
1043 dev_info
->default_txconf
= (struct rte_eth_txconf
) {
1045 .pthresh
= DEFAULT_TX_PTHRESH
,
1046 .hthresh
= DEFAULT_TX_HTHRESH
,
1047 .wthresh
= DEFAULT_TX_WTHRESH
,
1049 .tx_free_thresh
= DEFAULT_TX_FREE_THRESH
,
1050 .tx_rs_thresh
= DEFAULT_TX_RSBIT_THRESH
,
1051 .txq_flags
= ETH_TXQ_FLAGS_NOMULTSEGS
|
1052 ETH_TXQ_FLAGS_NOOFFLOADS
,
1055 dev_info
->reta_size
= NFP_NET_CFG_RSS_ITBL_SZ
;
1056 dev_info
->hash_key_size
= NFP_NET_CFG_RSS_KEY_SZ
;
1058 dev_info
->speed_capa
= ETH_LINK_SPEED_40G
| ETH_LINK_SPEED_100G
;
1061 static const uint32_t *
1062 nfp_net_supported_ptypes_get(struct rte_eth_dev
*dev
)
1064 static const uint32_t ptypes
[] = {
1065 /* refers to nfp_net_set_hash() */
1066 RTE_PTYPE_INNER_L3_IPV4
,
1067 RTE_PTYPE_INNER_L3_IPV6
,
1068 RTE_PTYPE_INNER_L3_IPV6_EXT
,
1069 RTE_PTYPE_INNER_L4_MASK
,
1073 if (dev
->rx_pkt_burst
== nfp_net_recv_pkts
)
1079 nfp_net_rx_queue_count(struct rte_eth_dev
*dev
, uint16_t queue_idx
)
1081 struct nfp_net_rxq
*rxq
;
1082 struct nfp_net_rx_desc
*rxds
;
1086 rxq
= (struct nfp_net_rxq
*)dev
->data
->rx_queues
[queue_idx
];
1089 PMD_INIT_LOG(ERR
, "Bad queue: %u\n", queue_idx
);
1093 idx
= rxq
->rd_p
% rxq
->rx_count
;
1094 rxds
= &rxq
->rxds
[idx
];
1099 * Other PMDs are just checking the DD bit in intervals of 4
1100 * descriptors and counting all four if the first has the DD
1101 * bit on. Of course, this is not accurate but can be good for
1102 * perfomance. But ideally that should be done in descriptors
1103 * chunks belonging to the same cache line
1106 while (count
< rxq
->rx_count
) {
1107 rxds
= &rxq
->rxds
[idx
];
1108 if ((rxds
->rxd
.meta_len_dd
& PCIE_DESC_RX_DD
) == 0)
1115 if ((idx
) == rxq
->rx_count
)
1123 nfp_net_dev_link_status_print(struct rte_eth_dev
*dev
)
1125 struct rte_eth_link link
;
1127 memset(&link
, 0, sizeof(link
));
1128 nfp_net_dev_atomic_read_link_status(dev
, &link
);
1129 if (link
.link_status
)
1130 RTE_LOG(INFO
, PMD
, "Port %d: Link Up - speed %u Mbps - %s\n",
1131 (int)(dev
->data
->port_id
), (unsigned)link
.link_speed
,
1132 link
.link_duplex
== ETH_LINK_FULL_DUPLEX
1133 ? "full-duplex" : "half-duplex");
1135 RTE_LOG(INFO
, PMD
, " Port %d: Link Down\n",
1136 (int)(dev
->data
->port_id
));
1138 RTE_LOG(INFO
, PMD
, "PCI Address: %04d:%02d:%02d:%d\n",
1139 dev
->pci_dev
->addr
.domain
, dev
->pci_dev
->addr
.bus
,
1140 dev
->pci_dev
->addr
.devid
, dev
->pci_dev
->addr
.function
);
1143 /* Interrupt configuration and handling */
1146 * nfp_net_irq_unmask - Unmask an interrupt
1148 * If MSI-X auto-masking is enabled clear the mask bit, otherwise
1149 * clear the ICR for the entry.
1152 nfp_net_irq_unmask(struct rte_eth_dev
*dev
)
1154 struct nfp_net_hw
*hw
;
1156 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
1158 if (hw
->ctrl
& NFP_NET_CFG_CTRL_MSIXAUTO
) {
1159 /* If MSI-X auto-masking is used, clear the entry */
1161 rte_intr_enable(&dev
->pci_dev
->intr_handle
);
1163 /* Make sure all updates are written before un-masking */
1165 nn_cfg_writeb(hw
, NFP_NET_CFG_ICR(NFP_NET_IRQ_LSC_IDX
),
1166 NFP_NET_CFG_ICR_UNMASKED
);
1171 nfp_net_dev_interrupt_handler(__rte_unused
struct rte_intr_handle
*handle
,
1175 struct rte_eth_link link
;
1176 struct rte_eth_dev
*dev
= (struct rte_eth_dev
*)param
;
1178 PMD_DRV_LOG(DEBUG
, "We got a LSC interrupt!!!\n");
1180 /* get the link status */
1181 memset(&link
, 0, sizeof(link
));
1182 nfp_net_dev_atomic_read_link_status(dev
, &link
);
1184 nfp_net_link_update(dev
, 0);
1187 if (!link
.link_status
) {
1188 /* handle it 1 sec later, wait it being stable */
1189 timeout
= NFP_NET_LINK_UP_CHECK_TIMEOUT
;
1190 /* likely to down */
1192 /* handle it 4 sec later, wait it being stable */
1193 timeout
= NFP_NET_LINK_DOWN_CHECK_TIMEOUT
;
1196 if (rte_eal_alarm_set(timeout
* 1000,
1197 nfp_net_dev_interrupt_delayed_handler
,
1199 RTE_LOG(ERR
, PMD
, "Error setting alarm");
1201 nfp_net_irq_unmask(dev
);
1206 * Interrupt handler which shall be registered for alarm callback for delayed
1207 * handling specific interrupt to wait for the stable nic state. As the NIC
1208 * interrupt state is not stable for nfp after link is just down, it needs
1209 * to wait 4 seconds to get the stable status.
1211 * @param handle Pointer to interrupt handle.
1212 * @param param The address of parameter (struct rte_eth_dev *)
1217 nfp_net_dev_interrupt_delayed_handler(void *param
)
1219 struct rte_eth_dev
*dev
= (struct rte_eth_dev
*)param
;
1221 nfp_net_link_update(dev
, 0);
1222 _rte_eth_dev_callback_process(dev
, RTE_ETH_EVENT_INTR_LSC
, NULL
);
1224 nfp_net_dev_link_status_print(dev
);
1227 nfp_net_irq_unmask(dev
);
1231 nfp_net_dev_mtu_set(struct rte_eth_dev
*dev
, uint16_t mtu
)
1233 struct nfp_net_hw
*hw
;
1235 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
1237 /* check that mtu is within the allowed range */
1238 if ((mtu
< ETHER_MIN_MTU
) || ((uint32_t)mtu
> hw
->max_mtu
))
1241 /* switch to jumbo mode if needed */
1242 if ((uint32_t)mtu
> ETHER_MAX_LEN
)
1243 dev
->data
->dev_conf
.rxmode
.jumbo_frame
= 1;
1245 dev
->data
->dev_conf
.rxmode
.jumbo_frame
= 0;
1247 /* update max frame size */
1248 dev
->data
->dev_conf
.rxmode
.max_rx_pkt_len
= (uint32_t)mtu
;
1250 /* writing to configuration space */
1251 nn_cfg_writel(hw
, NFP_NET_CFG_MTU
, (uint32_t)mtu
);
1259 nfp_net_rx_queue_setup(struct rte_eth_dev
*dev
,
1260 uint16_t queue_idx
, uint16_t nb_desc
,
1261 unsigned int socket_id
,
1262 const struct rte_eth_rxconf
*rx_conf
,
1263 struct rte_mempool
*mp
)
1265 const struct rte_memzone
*tz
;
1266 struct nfp_net_rxq
*rxq
;
1267 struct nfp_net_hw
*hw
;
1269 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
1271 PMD_INIT_FUNC_TRACE();
1273 /* Validating number of descriptors */
1274 if (((nb_desc
* sizeof(struct nfp_net_rx_desc
)) % 128) != 0 ||
1275 (nb_desc
> NFP_NET_MAX_RX_DESC
) ||
1276 (nb_desc
< NFP_NET_MIN_RX_DESC
)) {
1277 RTE_LOG(ERR
, PMD
, "Wrong nb_desc value\n");
1282 * Free memory prior to re-allocation if needed. This is the case after
1283 * calling nfp_net_stop
1285 if (dev
->data
->rx_queues
[queue_idx
]) {
1286 nfp_net_rx_queue_release(dev
->data
->rx_queues
[queue_idx
]);
1287 dev
->data
->rx_queues
[queue_idx
] = NULL
;
1290 /* Allocating rx queue data structure */
1291 rxq
= rte_zmalloc_socket("ethdev RX queue", sizeof(struct nfp_net_rxq
),
1292 RTE_CACHE_LINE_SIZE
, socket_id
);
1296 /* Hw queues mapping based on firmware confifguration */
1297 rxq
->qidx
= queue_idx
;
1298 rxq
->fl_qcidx
= queue_idx
* hw
->stride_rx
;
1299 rxq
->rx_qcidx
= rxq
->fl_qcidx
+ (hw
->stride_rx
- 1);
1300 rxq
->qcp_fl
= hw
->rx_bar
+ NFP_QCP_QUEUE_OFF(rxq
->fl_qcidx
);
1301 rxq
->qcp_rx
= hw
->rx_bar
+ NFP_QCP_QUEUE_OFF(rxq
->rx_qcidx
);
1304 * Tracking mbuf size for detecting a potential mbuf overflow due to
1308 rxq
->mbuf_size
= rxq
->mem_pool
->elt_size
;
1309 rxq
->mbuf_size
-= (sizeof(struct rte_mbuf
) + RTE_PKTMBUF_HEADROOM
);
1310 hw
->flbufsz
= rxq
->mbuf_size
;
1312 rxq
->rx_count
= nb_desc
;
1313 rxq
->port_id
= dev
->data
->port_id
;
1314 rxq
->rx_free_thresh
= rx_conf
->rx_free_thresh
;
1315 rxq
->crc_len
= (uint8_t) ((dev
->data
->dev_conf
.rxmode
.hw_strip_crc
) ? 0
1317 rxq
->drop_en
= rx_conf
->rx_drop_en
;
1320 * Allocate RX ring hardware descriptors. A memzone large enough to
1321 * handle the maximum ring size is allocated in order to allow for
1322 * resizing in later calls to the queue setup function.
1324 tz
= ring_dma_zone_reserve(dev
, "rx_ring", queue_idx
,
1325 sizeof(struct nfp_net_rx_desc
) *
1326 NFP_NET_MAX_RX_DESC
, socket_id
);
1329 RTE_LOG(ERR
, PMD
, "Error allocatig rx dma\n");
1330 nfp_net_rx_queue_release(rxq
);
1334 /* Saving physical and virtual addresses for the RX ring */
1335 rxq
->dma
= (uint64_t)tz
->phys_addr
;
1336 rxq
->rxds
= (struct nfp_net_rx_desc
*)tz
->addr
;
1338 /* mbuf pointers array for referencing mbufs linked to RX descriptors */
1339 rxq
->rxbufs
= rte_zmalloc_socket("rxq->rxbufs",
1340 sizeof(*rxq
->rxbufs
) * nb_desc
,
1341 RTE_CACHE_LINE_SIZE
, socket_id
);
1342 if (rxq
->rxbufs
== NULL
) {
1343 nfp_net_rx_queue_release(rxq
);
1347 PMD_RX_LOG(DEBUG
, "rxbufs=%p hw_ring=%p dma_addr=0x%" PRIx64
"\n",
1348 rxq
->rxbufs
, rxq
->rxds
, (unsigned long int)rxq
->dma
);
1350 nfp_net_reset_rx_queue(rxq
);
1352 dev
->data
->rx_queues
[queue_idx
] = rxq
;
1356 * Telling the HW about the physical address of the RX ring and number
1357 * of descriptors in log2 format
1359 nn_cfg_writeq(hw
, NFP_NET_CFG_RXR_ADDR(queue_idx
), rxq
->dma
);
1360 nn_cfg_writeb(hw
, NFP_NET_CFG_RXR_SZ(queue_idx
), log2(nb_desc
));
1366 nfp_net_rx_fill_freelist(struct nfp_net_rxq
*rxq
)
1368 struct nfp_net_rx_buff
*rxe
= rxq
->rxbufs
;
1372 PMD_RX_LOG(DEBUG
, "nfp_net_rx_fill_freelist for %u descriptors\n",
1375 for (i
= 0; i
< rxq
->rx_count
; i
++) {
1376 struct nfp_net_rx_desc
*rxd
;
1377 struct rte_mbuf
*mbuf
= rte_pktmbuf_alloc(rxq
->mem_pool
);
1380 RTE_LOG(ERR
, PMD
, "RX mbuf alloc failed queue_id=%u\n",
1381 (unsigned)rxq
->qidx
);
1385 dma_addr
= rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(mbuf
));
1387 rxd
= &rxq
->rxds
[i
];
1389 rxd
->fld
.dma_addr_hi
= (dma_addr
>> 32) & 0xff;
1390 rxd
->fld
.dma_addr_lo
= dma_addr
& 0xffffffff;
1392 PMD_RX_LOG(DEBUG
, "[%d]: %" PRIx64
"\n", i
, dma_addr
);
1397 /* Make sure all writes are flushed before telling the hardware */
1400 /* Not advertising the whole ring as the firmware gets confused if so */
1401 PMD_RX_LOG(DEBUG
, "Increment FL write pointer in %u\n",
1404 nfp_qcp_ptr_add(rxq
->qcp_fl
, NFP_QCP_WRITE_PTR
, rxq
->rx_count
- 1);
1410 nfp_net_tx_queue_setup(struct rte_eth_dev
*dev
, uint16_t queue_idx
,
1411 uint16_t nb_desc
, unsigned int socket_id
,
1412 const struct rte_eth_txconf
*tx_conf
)
1414 const struct rte_memzone
*tz
;
1415 struct nfp_net_txq
*txq
;
1416 uint16_t tx_free_thresh
;
1417 struct nfp_net_hw
*hw
;
1419 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
1421 PMD_INIT_FUNC_TRACE();
1423 /* Validating number of descriptors */
1424 if (((nb_desc
* sizeof(struct nfp_net_tx_desc
)) % 128) != 0 ||
1425 (nb_desc
> NFP_NET_MAX_TX_DESC
) ||
1426 (nb_desc
< NFP_NET_MIN_TX_DESC
)) {
1427 RTE_LOG(ERR
, PMD
, "Wrong nb_desc value\n");
1431 tx_free_thresh
= (uint16_t)((tx_conf
->tx_free_thresh
) ?
1432 tx_conf
->tx_free_thresh
:
1433 DEFAULT_TX_FREE_THRESH
);
1435 if (tx_free_thresh
> (nb_desc
)) {
1437 "tx_free_thresh must be less than the number of TX "
1438 "descriptors. (tx_free_thresh=%u port=%d "
1439 "queue=%d)\n", (unsigned int)tx_free_thresh
,
1440 (int)dev
->data
->port_id
, (int)queue_idx
);
1445 * Free memory prior to re-allocation if needed. This is the case after
1446 * calling nfp_net_stop
1448 if (dev
->data
->tx_queues
[queue_idx
]) {
1449 PMD_TX_LOG(DEBUG
, "Freeing memory prior to re-allocation %d\n",
1451 nfp_net_tx_queue_release(dev
->data
->tx_queues
[queue_idx
]);
1452 dev
->data
->tx_queues
[queue_idx
] = NULL
;
1455 /* Allocating tx queue data structure */
1456 txq
= rte_zmalloc_socket("ethdev TX queue", sizeof(struct nfp_net_txq
),
1457 RTE_CACHE_LINE_SIZE
, socket_id
);
1459 RTE_LOG(ERR
, PMD
, "Error allocating tx dma\n");
1464 * Allocate TX ring hardware descriptors. A memzone large enough to
1465 * handle the maximum ring size is allocated in order to allow for
1466 * resizing in later calls to the queue setup function.
1468 tz
= ring_dma_zone_reserve(dev
, "tx_ring", queue_idx
,
1469 sizeof(struct nfp_net_tx_desc
) *
1470 NFP_NET_MAX_TX_DESC
, socket_id
);
1472 RTE_LOG(ERR
, PMD
, "Error allocating tx dma\n");
1473 nfp_net_tx_queue_release(txq
);
1477 txq
->tx_count
= nb_desc
;
1479 txq
->tx_free_thresh
= tx_free_thresh
;
1480 txq
->tx_pthresh
= tx_conf
->tx_thresh
.pthresh
;
1481 txq
->tx_hthresh
= tx_conf
->tx_thresh
.hthresh
;
1482 txq
->tx_wthresh
= tx_conf
->tx_thresh
.wthresh
;
1484 /* queue mapping based on firmware configuration */
1485 txq
->qidx
= queue_idx
;
1486 txq
->tx_qcidx
= queue_idx
* hw
->stride_tx
;
1487 txq
->qcp_q
= hw
->tx_bar
+ NFP_QCP_QUEUE_OFF(txq
->tx_qcidx
);
1489 txq
->port_id
= dev
->data
->port_id
;
1490 txq
->txq_flags
= tx_conf
->txq_flags
;
1492 /* Saving physical and virtual addresses for the TX ring */
1493 txq
->dma
= (uint64_t)tz
->phys_addr
;
1494 txq
->txds
= (struct nfp_net_tx_desc
*)tz
->addr
;
1496 /* mbuf pointers array for referencing mbufs linked to TX descriptors */
1497 txq
->txbufs
= rte_zmalloc_socket("txq->txbufs",
1498 sizeof(*txq
->txbufs
) * nb_desc
,
1499 RTE_CACHE_LINE_SIZE
, socket_id
);
1500 if (txq
->txbufs
== NULL
) {
1501 nfp_net_tx_queue_release(txq
);
1504 PMD_TX_LOG(DEBUG
, "txbufs=%p hw_ring=%p dma_addr=0x%" PRIx64
"\n",
1505 txq
->txbufs
, txq
->txds
, (unsigned long int)txq
->dma
);
1507 nfp_net_reset_tx_queue(txq
);
1509 dev
->data
->tx_queues
[queue_idx
] = txq
;
1513 * Telling the HW about the physical address of the TX ring and number
1514 * of descriptors in log2 format
1516 nn_cfg_writeq(hw
, NFP_NET_CFG_TXR_ADDR(queue_idx
), txq
->dma
);
1517 nn_cfg_writeb(hw
, NFP_NET_CFG_TXR_SZ(queue_idx
), log2(nb_desc
));
1522 /* nfp_net_tx_cksum - Set TX CSUM offload flags in TX descriptor */
1524 nfp_net_tx_cksum(struct nfp_net_txq
*txq
, struct nfp_net_tx_desc
*txd
,
1525 struct rte_mbuf
*mb
)
1528 struct nfp_net_hw
*hw
= txq
->hw
;
1530 if (!(hw
->cap
& NFP_NET_CFG_CTRL_TXCSUM
))
1533 ol_flags
= mb
->ol_flags
;
1535 /* IPv6 does not need checksum */
1536 if (ol_flags
& PKT_TX_IP_CKSUM
)
1537 txd
->flags
|= PCIE_DESC_TX_IP4_CSUM
;
1539 switch (ol_flags
& PKT_TX_L4_MASK
) {
1540 case PKT_TX_UDP_CKSUM
:
1541 txd
->flags
|= PCIE_DESC_TX_UDP_CSUM
;
1543 case PKT_TX_TCP_CKSUM
:
1544 txd
->flags
|= PCIE_DESC_TX_TCP_CSUM
;
1548 if (ol_flags
& (PKT_TX_IP_CKSUM
| PKT_TX_L4_MASK
))
1549 txd
->flags
|= PCIE_DESC_TX_CSUM
;
1552 /* nfp_net_rx_cksum - set mbuf checksum flags based on RX descriptor flags */
1554 nfp_net_rx_cksum(struct nfp_net_rxq
*rxq
, struct nfp_net_rx_desc
*rxd
,
1555 struct rte_mbuf
*mb
)
1557 struct nfp_net_hw
*hw
= rxq
->hw
;
1559 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RXCSUM
))
1562 /* If IPv4 and IP checksum error, fail */
1563 if ((rxd
->rxd
.flags
& PCIE_DESC_RX_IP4_CSUM
) &&
1564 !(rxd
->rxd
.flags
& PCIE_DESC_RX_IP4_CSUM_OK
))
1565 mb
->ol_flags
|= PKT_RX_IP_CKSUM_BAD
;
1567 /* If neither UDP nor TCP return */
1568 if (!(rxd
->rxd
.flags
& PCIE_DESC_RX_TCP_CSUM
) &&
1569 !(rxd
->rxd
.flags
& PCIE_DESC_RX_UDP_CSUM
))
1572 if ((rxd
->rxd
.flags
& PCIE_DESC_RX_TCP_CSUM
) &&
1573 !(rxd
->rxd
.flags
& PCIE_DESC_RX_TCP_CSUM_OK
))
1574 mb
->ol_flags
|= PKT_RX_L4_CKSUM_BAD
;
1576 if ((rxd
->rxd
.flags
& PCIE_DESC_RX_UDP_CSUM
) &&
1577 !(rxd
->rxd
.flags
& PCIE_DESC_RX_UDP_CSUM_OK
))
1578 mb
->ol_flags
|= PKT_RX_L4_CKSUM_BAD
;
1581 #define NFP_HASH_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 4)
1582 #define NFP_HASH_TYPE_OFFSET ((uint8_t *)mbuf->buf_addr + mbuf->data_off - 8)
1585 * nfp_net_set_hash - Set mbuf hash data
1587 * The RSS hash and hash-type are pre-pended to the packet data.
1588 * Extract and decode it and set the mbuf fields.
1591 nfp_net_set_hash(struct nfp_net_rxq
*rxq
, struct nfp_net_rx_desc
*rxd
,
1592 struct rte_mbuf
*mbuf
)
1596 struct nfp_net_hw
*hw
= rxq
->hw
;
1598 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RSS
))
1601 if (!(rxd
->rxd
.flags
& PCIE_DESC_RX_RSS
))
1604 hash
= rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_OFFSET
);
1605 hash_type
= rte_be_to_cpu_32(*(uint32_t *)NFP_HASH_TYPE_OFFSET
);
1608 * hash type is sharing the same word with input port info
1613 mbuf
->hash
.rss
= hash
;
1614 mbuf
->ol_flags
|= PKT_RX_RSS_HASH
;
1616 switch (hash_type
) {
1617 case NFP_NET_RSS_IPV4
:
1618 mbuf
->packet_type
|= RTE_PTYPE_INNER_L3_IPV4
;
1620 case NFP_NET_RSS_IPV6
:
1621 mbuf
->packet_type
|= RTE_PTYPE_INNER_L3_IPV6
;
1623 case NFP_NET_RSS_IPV6_EX
:
1624 mbuf
->packet_type
|= RTE_PTYPE_INNER_L3_IPV6_EXT
;
1627 mbuf
->packet_type
|= RTE_PTYPE_INNER_L4_MASK
;
1631 /* nfp_net_check_port - Set mbuf in_port field */
1633 nfp_net_check_port(struct nfp_net_rx_desc
*rxd
, struct rte_mbuf
*mbuf
)
1637 if (!(rxd
->rxd
.flags
& PCIE_DESC_RX_INGRESS_PORT
)) {
1642 port
= rte_be_to_cpu_32(*(uint32_t *)((uint8_t *)mbuf
->buf_addr
+
1643 mbuf
->data_off
- 8));
1646 * hash type is sharing the same word with input port info
1650 port
= (uint8_t)(port
>> 8);
1655 nfp_net_mbuf_alloc_failed(struct nfp_net_rxq
*rxq
)
1657 rte_eth_devices
[rxq
->port_id
].data
->rx_mbuf_alloc_failed
++;
1660 #define NFP_DESC_META_LEN(d) (d->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK)
1665 * There are some decissions to take:
1666 * 1) How to check DD RX descriptors bit
1667 * 2) How and when to allocate new mbufs
1669 * Current implementation checks just one single DD bit each loop. As each
1670 * descriptor is 8 bytes, it is likely a good idea to check descriptors in
1671 * a single cache line instead. Tests with this change have not shown any
1672 * performance improvement but it requires further investigation. For example,
1673 * depending on which descriptor is next, the number of descriptors could be
1674 * less than 8 for just checking those in the same cache line. This implies
1675 * extra work which could be counterproductive by itself. Indeed, last firmware
1676 * changes are just doing this: writing several descriptors with the DD bit
1677 * for saving PCIe bandwidth and DMA operations from the NFP.
1679 * Mbuf allocation is done when a new packet is received. Then the descriptor
1680 * is automatically linked with the new mbuf and the old one is given to the
1681 * user. The main drawback with this design is mbuf allocation is heavier than
1682 * using bulk allocations allowed by DPDK with rte_mempool_get_bulk. From the
1683 * cache point of view it does not seem allocating the mbuf early on as we are
1684 * doing now have any benefit at all. Again, tests with this change have not
1685 * shown any improvement. Also, rte_mempool_get_bulk returns all or nothing
1686 * so looking at the implications of this type of allocation should be studied
1691 nfp_net_recv_pkts(void *rx_queue
, struct rte_mbuf
**rx_pkts
, uint16_t nb_pkts
)
1693 struct nfp_net_rxq
*rxq
;
1694 struct nfp_net_rx_desc
*rxds
;
1695 struct nfp_net_rx_buff
*rxb
;
1696 struct nfp_net_hw
*hw
;
1697 struct rte_mbuf
*mb
;
1698 struct rte_mbuf
*new_mb
;
1705 if (unlikely(rxq
== NULL
)) {
1707 * DPDK just checks the queue is lower than max queues
1708 * enabled. But the queue needs to be configured
1710 RTE_LOG(ERR
, PMD
, "RX Bad queue\n");
1718 while (avail
< nb_pkts
) {
1719 idx
= rxq
->rd_p
% rxq
->rx_count
;
1721 rxb
= &rxq
->rxbufs
[idx
];
1722 if (unlikely(rxb
== NULL
)) {
1723 RTE_LOG(ERR
, PMD
, "rxb does not exist!\n");
1728 * Memory barrier to ensure that we won't do other
1729 * reads before the DD bit.
1733 rxds
= &rxq
->rxds
[idx
];
1734 if ((rxds
->rxd
.meta_len_dd
& PCIE_DESC_RX_DD
) == 0)
1738 * We got a packet. Let's alloc a new mbuff for refilling the
1739 * free descriptor ring as soon as possible
1741 new_mb
= rte_pktmbuf_alloc(rxq
->mem_pool
);
1742 if (unlikely(new_mb
== NULL
)) {
1743 RTE_LOG(DEBUG
, PMD
, "RX mbuf alloc failed port_id=%u "
1744 "queue_id=%u\n", (unsigned)rxq
->port_id
,
1745 (unsigned)rxq
->qidx
);
1746 nfp_net_mbuf_alloc_failed(rxq
);
1753 * Grab the mbuff and refill the descriptor with the
1754 * previously allocated mbuff
1759 PMD_RX_LOG(DEBUG
, "Packet len: %u, mbuf_size: %u\n",
1760 rxds
->rxd
.data_len
, rxq
->mbuf_size
);
1762 /* Size of this segment */
1763 mb
->data_len
= rxds
->rxd
.data_len
- NFP_DESC_META_LEN(rxds
);
1764 /* Size of the whole packet. We just support 1 segment */
1765 mb
->pkt_len
= rxds
->rxd
.data_len
- NFP_DESC_META_LEN(rxds
);
1767 if (unlikely((mb
->data_len
+ hw
->rx_offset
) >
1770 * This should not happen and the user has the
1771 * responsibility of avoiding it. But we have
1772 * to give some info about the error
1775 "mbuf overflow likely due to the RX offset.\n"
1776 "\t\tYour mbuf size should have extra space for"
1777 " RX offset=%u bytes.\n"
1778 "\t\tCurrently you just have %u bytes available"
1779 " but the received packet is %u bytes long",
1781 rxq
->mbuf_size
- hw
->rx_offset
,
1786 /* Filling the received mbuff with packet info */
1788 mb
->data_off
= RTE_PKTMBUF_HEADROOM
+ hw
->rx_offset
;
1790 mb
->data_off
= RTE_PKTMBUF_HEADROOM
+
1791 NFP_DESC_META_LEN(rxds
);
1793 /* No scatter mode supported */
1797 /* Checking the RSS flag */
1798 nfp_net_set_hash(rxq
, rxds
, mb
);
1800 /* Checking the checksum flag */
1801 nfp_net_rx_cksum(rxq
, rxds
, mb
);
1803 /* Checking the port flag */
1804 nfp_net_check_port(rxds
, mb
);
1806 if ((rxds
->rxd
.flags
& PCIE_DESC_RX_VLAN
) &&
1807 (hw
->ctrl
& NFP_NET_CFG_CTRL_RXVLAN
)) {
1808 mb
->vlan_tci
= rte_cpu_to_le_32(rxds
->rxd
.vlan
);
1809 mb
->ol_flags
|= PKT_RX_VLAN_PKT
| PKT_RX_VLAN_STRIPPED
;
1812 /* Adding the mbuff to the mbuff array passed by the app */
1813 rx_pkts
[avail
++] = mb
;
1815 /* Now resetting and updating the descriptor */
1818 dma_addr
= rte_cpu_to_le_64(RTE_MBUF_DMA_ADDR_DEFAULT(new_mb
));
1820 rxds
->fld
.dma_addr_hi
= (dma_addr
>> 32) & 0xff;
1821 rxds
->fld
.dma_addr_lo
= dma_addr
& 0xffffffff;
1829 PMD_RX_LOG(DEBUG
, "RX port_id=%u queue_id=%u, %d packets received\n",
1830 (unsigned)rxq
->port_id
, (unsigned)rxq
->qidx
, nb_hold
);
1832 nb_hold
+= rxq
->nb_rx_hold
;
1835 * FL descriptors needs to be written before incrementing the
1836 * FL queue WR pointer
1839 if (nb_hold
> rxq
->rx_free_thresh
) {
1840 PMD_RX_LOG(DEBUG
, "port=%u queue=%u nb_hold=%u avail=%u\n",
1841 (unsigned)rxq
->port_id
, (unsigned)rxq
->qidx
,
1842 (unsigned)nb_hold
, (unsigned)avail
);
1843 nfp_qcp_ptr_add(rxq
->qcp_fl
, NFP_QCP_WRITE_PTR
, nb_hold
);
1846 rxq
->nb_rx_hold
= nb_hold
;
1852 * nfp_net_tx_free_bufs - Check for descriptors with a complete
1854 * @txq: TX queue to work with
1855 * Returns number of descriptors freed
1858 nfp_net_tx_free_bufs(struct nfp_net_txq
*txq
)
1863 PMD_TX_LOG(DEBUG
, "queue %u. Check for descriptor with a complete"
1864 " status\n", txq
->qidx
);
1866 /* Work out how many packets have been sent */
1867 qcp_rd_p
= nfp_qcp_read(txq
->qcp_q
, NFP_QCP_READ_PTR
);
1869 if (qcp_rd_p
== txq
->qcp_rd_p
) {
1870 PMD_TX_LOG(DEBUG
, "queue %u: It seems harrier is not sending "
1871 "packets (%u, %u)\n", txq
->qidx
,
1872 qcp_rd_p
, txq
->qcp_rd_p
);
1876 if (qcp_rd_p
> txq
->qcp_rd_p
)
1877 todo
= qcp_rd_p
- txq
->qcp_rd_p
;
1879 todo
= qcp_rd_p
+ txq
->tx_count
- txq
->qcp_rd_p
;
1881 PMD_TX_LOG(DEBUG
, "qcp_rd_p %u, txq->qcp_rd_p: %u, qcp->rd_p: %u\n",
1882 qcp_rd_p
, txq
->qcp_rd_p
, txq
->rd_p
);
1887 txq
->qcp_rd_p
+= todo
;
1888 txq
->qcp_rd_p
%= txq
->tx_count
;
1894 /* Leaving always free descriptors for avoiding wrapping confusion */
1895 #define NFP_FREE_TX_DESC(t) (t->tx_count - (t->wr_p - t->rd_p) - 8)
1898 * nfp_net_txq_full - Check if the TX queue free descriptors
1899 * is below tx_free_threshold
1901 * @txq: TX queue to check
1903 * This function uses the host copy* of read/write pointers
1906 int nfp_net_txq_full(struct nfp_net_txq
*txq
)
1908 return NFP_FREE_TX_DESC(txq
) < txq
->tx_free_thresh
;
1912 nfp_net_xmit_pkts(void *tx_queue
, struct rte_mbuf
**tx_pkts
, uint16_t nb_pkts
)
1914 struct nfp_net_txq
*txq
;
1915 struct nfp_net_hw
*hw
;
1916 struct nfp_net_tx_desc
*txds
;
1917 struct rte_mbuf
*pkt
;
1919 int pkt_size
, dma_size
;
1920 uint16_t free_descs
, issued_descs
;
1921 struct rte_mbuf
**lmbuf
;
1926 txds
= &txq
->txds
[txq
->tail
];
1928 PMD_TX_LOG(DEBUG
, "working for queue %u at pos %d and %u packets\n",
1929 txq
->qidx
, txq
->tail
, nb_pkts
);
1931 if ((NFP_FREE_TX_DESC(txq
) < nb_pkts
) || (nfp_net_txq_full(txq
)))
1932 nfp_net_tx_free_bufs(txq
);
1934 free_descs
= (uint16_t)NFP_FREE_TX_DESC(txq
);
1935 if (unlikely(free_descs
== 0))
1942 PMD_TX_LOG(DEBUG
, "queue: %u. Sending %u packets\n",
1943 txq
->qidx
, nb_pkts
);
1944 /* Sending packets */
1945 while ((i
< nb_pkts
) && free_descs
) {
1946 /* Grabbing the mbuf linked to the current descriptor */
1947 lmbuf
= &txq
->txbufs
[txq
->tail
].mbuf
;
1948 /* Warming the cache for releasing the mbuf later on */
1949 RTE_MBUF_PREFETCH_TO_FREE(*lmbuf
);
1951 pkt
= *(tx_pkts
+ i
);
1953 if (unlikely((pkt
->nb_segs
> 1) &&
1954 !(hw
->cap
& NFP_NET_CFG_CTRL_GATHER
))) {
1955 PMD_INIT_LOG(INFO
, "NFP_NET_CFG_CTRL_GATHER not set\n");
1956 rte_panic("Multisegment packet unsupported\n");
1959 /* Checking if we have enough descriptors */
1960 if (unlikely(pkt
->nb_segs
> free_descs
))
1964 * Checksum and VLAN flags just in the first descriptor for a
1965 * multisegment packet
1967 nfp_net_tx_cksum(txq
, txds
, pkt
);
1969 if ((pkt
->ol_flags
& PKT_TX_VLAN_PKT
) &&
1970 (hw
->cap
& NFP_NET_CFG_CTRL_TXVLAN
)) {
1971 txds
->flags
|= PCIE_DESC_TX_VLAN
;
1972 txds
->vlan
= pkt
->vlan_tci
;
1975 if (pkt
->ol_flags
& PKT_TX_TCP_SEG
)
1976 rte_panic("TSO is not supported\n");
1979 * mbuf data_len is the data in one segment and pkt_len data
1980 * in the whole packet. When the packet is just one segment,
1981 * then data_len = pkt_len
1983 pkt_size
= pkt
->pkt_len
;
1985 /* Releasing mbuf which was prefetched above */
1987 rte_pktmbuf_free(*lmbuf
);
1989 * Linking mbuf with descriptor for being released
1990 * next time descriptor is used
1995 dma_size
= pkt
->data_len
;
1996 dma_addr
= rte_mbuf_data_dma_addr(pkt
);
1997 PMD_TX_LOG(DEBUG
, "Working with mbuf at dma address:"
1998 "%" PRIx64
"\n", dma_addr
);
2000 /* Filling descriptors fields */
2001 txds
->dma_len
= dma_size
;
2002 txds
->data_len
= pkt
->pkt_len
;
2003 txds
->dma_addr_hi
= (dma_addr
>> 32) & 0xff;
2004 txds
->dma_addr_lo
= (dma_addr
& 0xffffffff);
2005 ASSERT(free_descs
> 0);
2010 if (unlikely(txq
->tail
== txq
->tx_count
)) /* wrapping?*/
2013 pkt_size
-= dma_size
;
2016 txds
->offset_eop
|= PCIE_DESC_TX_EOP
;
2018 txds
->offset_eop
&= PCIE_DESC_TX_OFFSET_MASK
;
2021 /* Referencing next free TX descriptor */
2022 txds
= &txq
->txds
[txq
->tail
];
2029 /* Increment write pointers. Force memory write before we let HW know */
2031 nfp_qcp_ptr_add(txq
->qcp_q
, NFP_QCP_WRITE_PTR
, issued_descs
);
2037 nfp_net_vlan_offload_set(struct rte_eth_dev
*dev
, int mask
)
2039 uint32_t new_ctrl
, update
;
2040 struct nfp_net_hw
*hw
;
2042 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
2045 if ((mask
& ETH_VLAN_FILTER_OFFLOAD
) ||
2046 (mask
& ETH_VLAN_FILTER_OFFLOAD
))
2047 RTE_LOG(INFO
, PMD
, "Not support for ETH_VLAN_FILTER_OFFLOAD or"
2048 " ETH_VLAN_FILTER_EXTEND");
2050 /* Enable vlan strip if it is not configured yet */
2051 if ((mask
& ETH_VLAN_STRIP_OFFLOAD
) &&
2052 !(hw
->ctrl
& NFP_NET_CFG_CTRL_RXVLAN
))
2053 new_ctrl
= hw
->ctrl
| NFP_NET_CFG_CTRL_RXVLAN
;
2055 /* Disable vlan strip just if it is configured */
2056 if (!(mask
& ETH_VLAN_STRIP_OFFLOAD
) &&
2057 (hw
->ctrl
& NFP_NET_CFG_CTRL_RXVLAN
))
2058 new_ctrl
= hw
->ctrl
& ~NFP_NET_CFG_CTRL_RXVLAN
;
2063 update
= NFP_NET_CFG_UPDATE_GEN
;
2065 if (nfp_net_reconfig(hw
, new_ctrl
, update
) < 0)
2068 hw
->ctrl
= new_ctrl
;
2071 /* Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device */
2073 nfp_net_reta_update(struct rte_eth_dev
*dev
,
2074 struct rte_eth_rss_reta_entry64
*reta_conf
,
2077 uint32_t reta
, mask
;
2081 struct nfp_net_hw
*hw
=
2082 NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
2084 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RSS
))
2087 if (reta_size
!= NFP_NET_CFG_RSS_ITBL_SZ
) {
2088 RTE_LOG(ERR
, PMD
, "The size of hash lookup table configured "
2089 "(%d) doesn't match the number hardware can supported "
2090 "(%d)\n", reta_size
, NFP_NET_CFG_RSS_ITBL_SZ
);
2095 * Update Redirection Table. There are 128 8bit-entries which can be
2096 * manage as 32 32bit-entries
2098 for (i
= 0; i
< reta_size
; i
+= 4) {
2099 /* Handling 4 RSS entries per loop */
2100 idx
= i
/ RTE_RETA_GROUP_SIZE
;
2101 shift
= i
% RTE_RETA_GROUP_SIZE
;
2102 mask
= (uint8_t)((reta_conf
[idx
].mask
>> shift
) & 0xF);
2108 /* If all 4 entries were set, don't need read RETA register */
2110 reta
= nn_cfg_readl(hw
, NFP_NET_CFG_RSS_ITBL
+ i
);
2112 for (j
= 0; j
< 4; j
++) {
2113 if (!(mask
& (0x1 << j
)))
2116 /* Clearing the entry bits */
2117 reta
&= ~(0xFF << (8 * j
));
2118 reta
|= reta_conf
[idx
].reta
[shift
+ j
] << (8 * j
);
2120 nn_cfg_writel(hw
, NFP_NET_CFG_RSS_ITBL
+ shift
, reta
);
2123 update
= NFP_NET_CFG_UPDATE_RSS
;
2125 if (nfp_net_reconfig(hw
, hw
->ctrl
, update
) < 0)
2131 /* Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. */
2133 nfp_net_reta_query(struct rte_eth_dev
*dev
,
2134 struct rte_eth_rss_reta_entry64
*reta_conf
,
2140 struct nfp_net_hw
*hw
;
2142 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
2144 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RSS
))
2147 if (reta_size
!= NFP_NET_CFG_RSS_ITBL_SZ
) {
2148 RTE_LOG(ERR
, PMD
, "The size of hash lookup table configured "
2149 "(%d) doesn't match the number hardware can supported "
2150 "(%d)\n", reta_size
, NFP_NET_CFG_RSS_ITBL_SZ
);
2155 * Reading Redirection Table. There are 128 8bit-entries which can be
2156 * manage as 32 32bit-entries
2158 for (i
= 0; i
< reta_size
; i
+= 4) {
2159 /* Handling 4 RSS entries per loop */
2160 idx
= i
/ RTE_RETA_GROUP_SIZE
;
2161 shift
= i
% RTE_RETA_GROUP_SIZE
;
2162 mask
= (uint8_t)((reta_conf
[idx
].mask
>> shift
) & 0xF);
2167 reta
= nn_cfg_readl(hw
, NFP_NET_CFG_RSS_ITBL
+ shift
);
2168 for (j
= 0; j
< 4; j
++) {
2169 if (!(mask
& (0x1 << j
)))
2171 reta_conf
->reta
[shift
+ j
] =
2172 (uint8_t)((reta
>> (8 * j
)) & 0xF);
2179 nfp_net_rss_hash_update(struct rte_eth_dev
*dev
,
2180 struct rte_eth_rss_conf
*rss_conf
)
2183 uint32_t cfg_rss_ctrl
= 0;
2187 struct nfp_net_hw
*hw
;
2189 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
2191 rss_hf
= rss_conf
->rss_hf
;
2193 /* Checking if RSS is enabled */
2194 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RSS
)) {
2195 if (rss_hf
!= 0) { /* Enable RSS? */
2196 RTE_LOG(ERR
, PMD
, "RSS unsupported\n");
2199 return 0; /* Nothing to do */
2202 if (rss_conf
->rss_key_len
> NFP_NET_CFG_RSS_KEY_SZ
) {
2203 RTE_LOG(ERR
, PMD
, "hash key too long\n");
2207 if (rss_hf
& ETH_RSS_IPV4
)
2208 cfg_rss_ctrl
|= NFP_NET_CFG_RSS_IPV4
|
2209 NFP_NET_CFG_RSS_IPV4_TCP
|
2210 NFP_NET_CFG_RSS_IPV4_UDP
;
2212 if (rss_hf
& ETH_RSS_IPV6
)
2213 cfg_rss_ctrl
|= NFP_NET_CFG_RSS_IPV6
|
2214 NFP_NET_CFG_RSS_IPV6_TCP
|
2215 NFP_NET_CFG_RSS_IPV6_UDP
;
2217 /* configuring where to apply the RSS hash */
2218 nn_cfg_writel(hw
, NFP_NET_CFG_RSS_CTRL
, cfg_rss_ctrl
);
2220 /* Writing the key byte a byte */
2221 for (i
= 0; i
< rss_conf
->rss_key_len
; i
++) {
2222 memcpy(&key
, &rss_conf
->rss_key
[i
], 1);
2223 nn_cfg_writeb(hw
, NFP_NET_CFG_RSS_KEY
+ i
, key
);
2226 /* Writing the key size */
2227 nn_cfg_writeb(hw
, NFP_NET_CFG_RSS_KEY_SZ
, rss_conf
->rss_key_len
);
2229 update
= NFP_NET_CFG_UPDATE_RSS
;
2231 if (nfp_net_reconfig(hw
, hw
->ctrl
, update
) < 0)
2238 nfp_net_rss_hash_conf_get(struct rte_eth_dev
*dev
,
2239 struct rte_eth_rss_conf
*rss_conf
)
2242 uint32_t cfg_rss_ctrl
;
2245 struct nfp_net_hw
*hw
;
2247 hw
= NFP_NET_DEV_PRIVATE_TO_HW(dev
->data
->dev_private
);
2249 if (!(hw
->ctrl
& NFP_NET_CFG_CTRL_RSS
))
2252 rss_hf
= rss_conf
->rss_hf
;
2253 cfg_rss_ctrl
= nn_cfg_readl(hw
, NFP_NET_CFG_RSS_CTRL
);
2255 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV4
)
2256 rss_hf
|= ETH_RSS_NONFRAG_IPV4_TCP
| ETH_RSS_NONFRAG_IPV4_UDP
;
2258 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV4_TCP
)
2259 rss_hf
|= ETH_RSS_NONFRAG_IPV4_TCP
;
2261 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV6_TCP
)
2262 rss_hf
|= ETH_RSS_NONFRAG_IPV6_TCP
;
2264 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV4_UDP
)
2265 rss_hf
|= ETH_RSS_NONFRAG_IPV4_UDP
;
2267 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV6_UDP
)
2268 rss_hf
|= ETH_RSS_NONFRAG_IPV6_UDP
;
2270 if (cfg_rss_ctrl
& NFP_NET_CFG_RSS_IPV6
)
2271 rss_hf
|= ETH_RSS_NONFRAG_IPV4_UDP
| ETH_RSS_NONFRAG_IPV6_UDP
;
2273 /* Reading the key size */
2274 rss_conf
->rss_key_len
= nn_cfg_readl(hw
, NFP_NET_CFG_RSS_KEY_SZ
);
2276 /* Reading the key byte a byte */
2277 for (i
= 0; i
< rss_conf
->rss_key_len
; i
++) {
2278 key
= nn_cfg_readb(hw
, NFP_NET_CFG_RSS_KEY
+ i
);
2279 memcpy(&rss_conf
->rss_key
[i
], &key
, 1);
2285 /* Initialise and register driver with DPDK Application */
2286 static const struct eth_dev_ops nfp_net_eth_dev_ops
= {
2287 .dev_configure
= nfp_net_configure
,
2288 .dev_start
= nfp_net_start
,
2289 .dev_stop
= nfp_net_stop
,
2290 .dev_close
= nfp_net_close
,
2291 .promiscuous_enable
= nfp_net_promisc_enable
,
2292 .promiscuous_disable
= nfp_net_promisc_disable
,
2293 .link_update
= nfp_net_link_update
,
2294 .stats_get
= nfp_net_stats_get
,
2295 .stats_reset
= nfp_net_stats_reset
,
2296 .dev_infos_get
= nfp_net_infos_get
,
2297 .dev_supported_ptypes_get
= nfp_net_supported_ptypes_get
,
2298 .mtu_set
= nfp_net_dev_mtu_set
,
2299 .vlan_offload_set
= nfp_net_vlan_offload_set
,
2300 .reta_update
= nfp_net_reta_update
,
2301 .reta_query
= nfp_net_reta_query
,
2302 .rss_hash_update
= nfp_net_rss_hash_update
,
2303 .rss_hash_conf_get
= nfp_net_rss_hash_conf_get
,
2304 .rx_queue_setup
= nfp_net_rx_queue_setup
,
2305 .rx_queue_release
= nfp_net_rx_queue_release
,
2306 .rx_queue_count
= nfp_net_rx_queue_count
,
2307 .tx_queue_setup
= nfp_net_tx_queue_setup
,
2308 .tx_queue_release
= nfp_net_tx_queue_release
,
2312 nfp_net_init(struct rte_eth_dev
*eth_dev
)
2314 struct rte_pci_device
*pci_dev
;
2315 struct nfp_net_hw
*hw
;
2317 uint32_t tx_bar_off
, rx_bar_off
;
2321 PMD_INIT_FUNC_TRACE();
2323 hw
= NFP_NET_DEV_PRIVATE_TO_HW(eth_dev
->data
->dev_private
);
2325 eth_dev
->dev_ops
= &nfp_net_eth_dev_ops
;
2326 eth_dev
->rx_pkt_burst
= &nfp_net_recv_pkts
;
2327 eth_dev
->tx_pkt_burst
= &nfp_net_xmit_pkts
;
2329 /* For secondary processes, the primary has done all the work */
2330 if (rte_eal_process_type() != RTE_PROC_PRIMARY
)
2333 pci_dev
= eth_dev
->pci_dev
;
2334 rte_eth_copy_pci_info(eth_dev
, pci_dev
);
2336 hw
->device_id
= pci_dev
->id
.device_id
;
2337 hw
->vendor_id
= pci_dev
->id
.vendor_id
;
2338 hw
->subsystem_device_id
= pci_dev
->id
.subsystem_device_id
;
2339 hw
->subsystem_vendor_id
= pci_dev
->id
.subsystem_vendor_id
;
2341 PMD_INIT_LOG(DEBUG
, "nfp_net: device (%u:%u) %u:%u:%u:%u\n",
2342 pci_dev
->id
.vendor_id
, pci_dev
->id
.device_id
,
2343 pci_dev
->addr
.domain
, pci_dev
->addr
.bus
,
2344 pci_dev
->addr
.devid
, pci_dev
->addr
.function
);
2346 hw
->ctrl_bar
= (uint8_t *)pci_dev
->mem_resource
[0].addr
;
2347 if (hw
->ctrl_bar
== NULL
) {
2349 "hw->ctrl_bar is NULL. BAR0 not configured\n");
2352 hw
->max_rx_queues
= nn_cfg_readl(hw
, NFP_NET_CFG_MAX_RXRINGS
);
2353 hw
->max_tx_queues
= nn_cfg_readl(hw
, NFP_NET_CFG_MAX_TXRINGS
);
2355 /* Work out where in the BAR the queues start. */
2356 switch (pci_dev
->id
.device_id
) {
2357 case PCI_DEVICE_ID_NFP6000_VF_NIC
:
2358 start_q
= nn_cfg_readl(hw
, NFP_NET_CFG_START_TXQ
);
2359 tx_bar_off
= NFP_PCIE_QUEUE(start_q
);
2360 start_q
= nn_cfg_readl(hw
, NFP_NET_CFG_START_RXQ
);
2361 rx_bar_off
= NFP_PCIE_QUEUE(start_q
);
2364 RTE_LOG(ERR
, PMD
, "nfp_net: no device ID matching\n");
2368 PMD_INIT_LOG(DEBUG
, "tx_bar_off: 0x%08x\n", tx_bar_off
);
2369 PMD_INIT_LOG(DEBUG
, "rx_bar_off: 0x%08x\n", rx_bar_off
);
2371 hw
->tx_bar
= (uint8_t *)pci_dev
->mem_resource
[2].addr
+ tx_bar_off
;
2372 hw
->rx_bar
= (uint8_t *)pci_dev
->mem_resource
[2].addr
+ rx_bar_off
;
2374 PMD_INIT_LOG(DEBUG
, "ctrl_bar: %p, tx_bar: %p, rx_bar: %p\n",
2375 hw
->ctrl_bar
, hw
->tx_bar
, hw
->rx_bar
);
2377 nfp_net_cfg_queue_setup(hw
);
2379 /* Get some of the read-only fields from the config BAR */
2380 hw
->ver
= nn_cfg_readl(hw
, NFP_NET_CFG_VERSION
);
2381 hw
->cap
= nn_cfg_readl(hw
, NFP_NET_CFG_CAP
);
2382 hw
->max_mtu
= nn_cfg_readl(hw
, NFP_NET_CFG_MAX_MTU
);
2383 hw
->mtu
= hw
->max_mtu
;
2385 if (NFD_CFG_MAJOR_VERSION_of(hw
->ver
) < 2)
2386 hw
->rx_offset
= NFP_NET_RX_OFFSET
;
2388 hw
->rx_offset
= nn_cfg_readl(hw
, NFP_NET_CFG_RX_OFFSET_ADDR
);
2390 PMD_INIT_LOG(INFO
, "VER: %#x, Maximum supported MTU: %d\n",
2391 hw
->ver
, hw
->max_mtu
);
2392 PMD_INIT_LOG(INFO
, "CAP: %#x, %s%s%s%s%s%s%s%s%s\n", hw
->cap
,
2393 hw
->cap
& NFP_NET_CFG_CTRL_PROMISC
? "PROMISC " : "",
2394 hw
->cap
& NFP_NET_CFG_CTRL_RXCSUM
? "RXCSUM " : "",
2395 hw
->cap
& NFP_NET_CFG_CTRL_TXCSUM
? "TXCSUM " : "",
2396 hw
->cap
& NFP_NET_CFG_CTRL_RXVLAN
? "RXVLAN " : "",
2397 hw
->cap
& NFP_NET_CFG_CTRL_TXVLAN
? "TXVLAN " : "",
2398 hw
->cap
& NFP_NET_CFG_CTRL_SCATTER
? "SCATTER " : "",
2399 hw
->cap
& NFP_NET_CFG_CTRL_GATHER
? "GATHER " : "",
2400 hw
->cap
& NFP_NET_CFG_CTRL_LSO
? "TSO " : "",
2401 hw
->cap
& NFP_NET_CFG_CTRL_RSS
? "RSS " : "");
2403 pci_dev
= eth_dev
->pci_dev
;
2406 hw
->stride_rx
= stride
;
2407 hw
->stride_tx
= stride
;
2409 PMD_INIT_LOG(INFO
, "max_rx_queues: %u, max_tx_queues: %u\n",
2410 hw
->max_rx_queues
, hw
->max_tx_queues
);
2412 /* Initializing spinlock for reconfigs */
2413 rte_spinlock_init(&hw
->reconfig_lock
);
2415 /* Allocating memory for mac addr */
2416 eth_dev
->data
->mac_addrs
= rte_zmalloc("mac_addr", ETHER_ADDR_LEN
, 0);
2417 if (eth_dev
->data
->mac_addrs
== NULL
) {
2418 PMD_INIT_LOG(ERR
, "Failed to space for MAC address");
2422 nfp_net_read_mac(hw
);
2424 if (!is_valid_assigned_ether_addr((struct ether_addr
*)&hw
->mac_addr
))
2425 /* Using random mac addresses for VFs */
2426 eth_random_addr(&hw
->mac_addr
[0]);
2428 /* Copying mac address to DPDK eth_dev struct */
2429 ether_addr_copy((struct ether_addr
*)hw
->mac_addr
,
2430 ð_dev
->data
->mac_addrs
[0]);
2432 PMD_INIT_LOG(INFO
, "port %d VendorID=0x%x DeviceID=0x%x "
2433 "mac=%02x:%02x:%02x:%02x:%02x:%02x",
2434 eth_dev
->data
->port_id
, pci_dev
->id
.vendor_id
,
2435 pci_dev
->id
.device_id
,
2436 hw
->mac_addr
[0], hw
->mac_addr
[1], hw
->mac_addr
[2],
2437 hw
->mac_addr
[3], hw
->mac_addr
[4], hw
->mac_addr
[5]);
2439 /* Registering LSC interrupt handler */
2440 rte_intr_callback_register(&pci_dev
->intr_handle
,
2441 nfp_net_dev_interrupt_handler
,
2444 /* enable uio intr after callback register */
2445 rte_intr_enable(&pci_dev
->intr_handle
);
2447 /* Telling the firmware about the LSC interrupt entry */
2448 nn_cfg_writeb(hw
, NFP_NET_CFG_LSC
, NFP_NET_IRQ_LSC_IDX
);
2450 /* Recording current stats counters values */
2451 nfp_net_stats_reset(eth_dev
);
2456 static struct rte_pci_id pci_id_nfp_net_map
[] = {
2458 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME
,
2459 PCI_DEVICE_ID_NFP6000_PF_NIC
)
2462 RTE_PCI_DEVICE(PCI_VENDOR_ID_NETRONOME
,
2463 PCI_DEVICE_ID_NFP6000_VF_NIC
)
2470 static struct eth_driver rte_nfp_net_pmd
= {
2472 .id_table
= pci_id_nfp_net_map
,
2473 .drv_flags
= RTE_PCI_DRV_NEED_MAPPING
| RTE_PCI_DRV_INTR_LSC
|
2474 RTE_PCI_DRV_DETACHABLE
,
2475 .probe
= rte_eth_dev_pci_probe
,
2476 .remove
= rte_eth_dev_pci_remove
,
2478 .eth_dev_init
= nfp_net_init
,
2479 .dev_private_size
= sizeof(struct nfp_net_adapter
),
2482 RTE_PMD_REGISTER_PCI(net_nfp
, rte_nfp_net_pmd
.pci_drv
);
2483 RTE_PMD_REGISTER_PCI_TABLE(net_nfp
, pci_id_nfp_net_map
);
2487 * c-file-style: "Linux"
2488 * indent-tabs-mode: t