1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2016 Cavium, Inc
14 #include <netinet/in.h>
15 #include <sys/queue.h>
17 #include <rte_alarm.h>
18 #include <rte_branch_prediction.h>
19 #include <rte_byteorder.h>
20 #include <rte_common.h>
21 #include <rte_cycles.h>
22 #include <rte_debug.h>
25 #include <rte_ether.h>
26 #include <rte_ethdev_driver.h>
27 #include <rte_ethdev_pci.h>
28 #include <rte_interrupts.h>
30 #include <rte_memory.h>
31 #include <rte_memzone.h>
32 #include <rte_malloc.h>
33 #include <rte_random.h>
35 #include <rte_bus_pci.h>
36 #include <rte_tailq.h>
37 #include <rte_devargs.h>
38 #include <rte_kvargs.h>
40 #include "base/nicvf_plat.h"
42 #include "nicvf_ethdev.h"
43 #include "nicvf_rxtx.h"
44 #include "nicvf_svf.h"
45 #include "nicvf_logs.h"
47 int nicvf_logtype_mbox
;
48 int nicvf_logtype_init
;
49 int nicvf_logtype_driver
;
51 static void nicvf_dev_stop(struct rte_eth_dev
*dev
);
52 static void nicvf_dev_stop_cleanup(struct rte_eth_dev
*dev
, bool cleanup
);
53 static void nicvf_vf_stop(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
55 static int nicvf_vlan_offload_config(struct rte_eth_dev
*dev
, int mask
);
56 static int nicvf_vlan_offload_set(struct rte_eth_dev
*dev
, int mask
);
58 RTE_INIT(nicvf_init_log
)
60 nicvf_logtype_mbox
= rte_log_register("pmd.net.thunderx.mbox");
61 if (nicvf_logtype_mbox
>= 0)
62 rte_log_set_level(nicvf_logtype_mbox
, RTE_LOG_NOTICE
);
64 nicvf_logtype_init
= rte_log_register("pmd.net.thunderx.init");
65 if (nicvf_logtype_init
>= 0)
66 rte_log_set_level(nicvf_logtype_init
, RTE_LOG_NOTICE
);
68 nicvf_logtype_driver
= rte_log_register("pmd.net.thunderx.driver");
69 if (nicvf_logtype_driver
>= 0)
70 rte_log_set_level(nicvf_logtype_driver
, RTE_LOG_NOTICE
);
74 nicvf_link_status_update(struct nicvf
*nic
,
75 struct rte_eth_link
*link
)
77 memset(link
, 0, sizeof(*link
));
79 link
->link_status
= nic
->link_up
? ETH_LINK_UP
: ETH_LINK_DOWN
;
81 if (nic
->duplex
== NICVF_HALF_DUPLEX
)
82 link
->link_duplex
= ETH_LINK_HALF_DUPLEX
;
83 else if (nic
->duplex
== NICVF_FULL_DUPLEX
)
84 link
->link_duplex
= ETH_LINK_FULL_DUPLEX
;
85 link
->link_speed
= nic
->speed
;
86 link
->link_autoneg
= ETH_LINK_AUTONEG
;
90 nicvf_interrupt(void *arg
)
92 struct rte_eth_dev
*dev
= arg
;
93 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
94 struct rte_eth_link link
;
96 if (nicvf_reg_poll_interrupts(nic
) == NIC_MBOX_MSG_BGX_LINK_CHANGE
) {
97 if (dev
->data
->dev_conf
.intr_conf
.lsc
) {
98 nicvf_link_status_update(nic
, &link
);
99 rte_eth_linkstatus_set(dev
, &link
);
101 _rte_eth_dev_callback_process(dev
,
102 RTE_ETH_EVENT_INTR_LSC
,
107 rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS
* 1000,
108 nicvf_interrupt
, dev
);
112 nicvf_vf_interrupt(void *arg
)
114 struct nicvf
*nic
= arg
;
116 nicvf_reg_poll_interrupts(nic
);
118 rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS
* 1000,
119 nicvf_vf_interrupt
, nic
);
123 nicvf_periodic_alarm_start(void (fn
)(void *), void *arg
)
125 return rte_eal_alarm_set(NICVF_INTR_POLL_INTERVAL_MS
* 1000, fn
, arg
);
129 nicvf_periodic_alarm_stop(void (fn
)(void *), void *arg
)
131 return rte_eal_alarm_cancel(fn
, arg
);
135 * Return 0 means link status changed, -1 means not changed
138 nicvf_dev_link_update(struct rte_eth_dev
*dev
, int wait_to_complete
)
140 #define CHECK_INTERVAL 100 /* 100ms */
141 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
142 struct rte_eth_link link
;
143 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
146 PMD_INIT_FUNC_TRACE();
148 if (wait_to_complete
) {
149 /* rte_eth_link_get() might need to wait up to 9 seconds */
150 for (i
= 0; i
< MAX_CHECK_TIME
; i
++) {
151 nicvf_link_status_update(nic
, &link
);
152 if (link
.link_status
== ETH_LINK_UP
)
154 rte_delay_ms(CHECK_INTERVAL
);
157 nicvf_link_status_update(nic
, &link
);
160 return rte_eth_linkstatus_set(dev
, &link
);
164 nicvf_dev_set_mtu(struct rte_eth_dev
*dev
, uint16_t mtu
)
166 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
167 uint32_t buffsz
, frame_size
= mtu
+ NIC_HW_L2_OVERHEAD
;
169 struct rte_eth_rxmode
*rxmode
= &dev
->data
->dev_conf
.rxmode
;
171 PMD_INIT_FUNC_TRACE();
173 if (frame_size
> NIC_HW_MAX_FRS
)
176 if (frame_size
< NIC_HW_MIN_FRS
)
179 buffsz
= dev
->data
->min_rx_buf_size
- RTE_PKTMBUF_HEADROOM
;
182 * Refuse mtu that requires the support of scattered packets
183 * when this feature has not been enabled before.
185 if (dev
->data
->dev_started
&& !dev
->data
->scattered_rx
&&
186 (frame_size
+ 2 * VLAN_TAG_SIZE
> buffsz
))
189 /* check <seg size> * <max_seg> >= max_frame */
190 if (dev
->data
->scattered_rx
&&
191 (frame_size
+ 2 * VLAN_TAG_SIZE
> buffsz
* NIC_HW_MAX_SEGS
))
194 if (frame_size
> RTE_ETHER_MAX_LEN
)
195 rxmode
->offloads
|= DEV_RX_OFFLOAD_JUMBO_FRAME
;
197 rxmode
->offloads
&= ~DEV_RX_OFFLOAD_JUMBO_FRAME
;
199 if (nicvf_mbox_update_hw_max_frs(nic
, mtu
))
202 /* Update max_rx_pkt_len */
203 rxmode
->max_rx_pkt_len
= mtu
+ RTE_ETHER_HDR_LEN
;
206 for (i
= 0; i
< nic
->sqs_count
; i
++)
207 nic
->snicvf
[i
]->mtu
= mtu
;
213 nicvf_dev_get_regs(struct rte_eth_dev
*dev
, struct rte_dev_reg_info
*regs
)
215 uint64_t *data
= regs
->data
;
216 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
219 regs
->length
= nicvf_reg_get_count();
220 regs
->width
= THUNDERX_REG_BYTES
;
224 /* Support only full register dump */
225 if ((regs
->length
== 0) ||
226 (regs
->length
== (uint32_t)nicvf_reg_get_count())) {
227 regs
->version
= nic
->vendor_id
<< 16 | nic
->device_id
;
228 nicvf_reg_dump(nic
, data
);
235 nicvf_dev_stats_get(struct rte_eth_dev
*dev
, struct rte_eth_stats
*stats
)
238 struct nicvf_hw_rx_qstats rx_qstats
;
239 struct nicvf_hw_tx_qstats tx_qstats
;
240 struct nicvf_hw_stats port_stats
;
241 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
242 uint16_t rx_start
, rx_end
;
243 uint16_t tx_start
, tx_end
;
246 /* RX queue indices for the first VF */
247 nicvf_rx_range(dev
, nic
, &rx_start
, &rx_end
);
249 /* Reading per RX ring stats */
250 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
251 if (qidx
>= RTE_ETHDEV_QUEUE_STAT_CNTRS
)
254 nicvf_hw_get_rx_qstats(nic
, &rx_qstats
, qidx
);
255 stats
->q_ibytes
[qidx
] = rx_qstats
.q_rx_bytes
;
256 stats
->q_ipackets
[qidx
] = rx_qstats
.q_rx_packets
;
259 /* TX queue indices for the first VF */
260 nicvf_tx_range(dev
, nic
, &tx_start
, &tx_end
);
262 /* Reading per TX ring stats */
263 for (qidx
= tx_start
; qidx
<= tx_end
; qidx
++) {
264 if (qidx
>= RTE_ETHDEV_QUEUE_STAT_CNTRS
)
267 nicvf_hw_get_tx_qstats(nic
, &tx_qstats
, qidx
);
268 stats
->q_obytes
[qidx
] = tx_qstats
.q_tx_bytes
;
269 stats
->q_opackets
[qidx
] = tx_qstats
.q_tx_packets
;
272 for (i
= 0; i
< nic
->sqs_count
; i
++) {
273 struct nicvf
*snic
= nic
->snicvf
[i
];
278 /* RX queue indices for a secondary VF */
279 nicvf_rx_range(dev
, snic
, &rx_start
, &rx_end
);
281 /* Reading per RX ring stats */
282 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
283 if (qidx
>= RTE_ETHDEV_QUEUE_STAT_CNTRS
)
286 nicvf_hw_get_rx_qstats(snic
, &rx_qstats
,
287 qidx
% MAX_RCV_QUEUES_PER_QS
);
288 stats
->q_ibytes
[qidx
] = rx_qstats
.q_rx_bytes
;
289 stats
->q_ipackets
[qidx
] = rx_qstats
.q_rx_packets
;
292 /* TX queue indices for a secondary VF */
293 nicvf_tx_range(dev
, snic
, &tx_start
, &tx_end
);
294 /* Reading per TX ring stats */
295 for (qidx
= tx_start
; qidx
<= tx_end
; qidx
++) {
296 if (qidx
>= RTE_ETHDEV_QUEUE_STAT_CNTRS
)
299 nicvf_hw_get_tx_qstats(snic
, &tx_qstats
,
300 qidx
% MAX_SND_QUEUES_PER_QS
);
301 stats
->q_obytes
[qidx
] = tx_qstats
.q_tx_bytes
;
302 stats
->q_opackets
[qidx
] = tx_qstats
.q_tx_packets
;
306 nicvf_hw_get_stats(nic
, &port_stats
);
307 stats
->ibytes
= port_stats
.rx_bytes
;
308 stats
->ipackets
= port_stats
.rx_ucast_frames
;
309 stats
->ipackets
+= port_stats
.rx_bcast_frames
;
310 stats
->ipackets
+= port_stats
.rx_mcast_frames
;
311 stats
->ierrors
= port_stats
.rx_l2_errors
;
312 stats
->imissed
= port_stats
.rx_drop_red
;
313 stats
->imissed
+= port_stats
.rx_drop_overrun
;
314 stats
->imissed
+= port_stats
.rx_drop_bcast
;
315 stats
->imissed
+= port_stats
.rx_drop_mcast
;
316 stats
->imissed
+= port_stats
.rx_drop_l3_bcast
;
317 stats
->imissed
+= port_stats
.rx_drop_l3_mcast
;
319 stats
->obytes
= port_stats
.tx_bytes_ok
;
320 stats
->opackets
= port_stats
.tx_ucast_frames_ok
;
321 stats
->opackets
+= port_stats
.tx_bcast_frames_ok
;
322 stats
->opackets
+= port_stats
.tx_mcast_frames_ok
;
323 stats
->oerrors
= port_stats
.tx_drops
;
328 static const uint32_t *
329 nicvf_dev_supported_ptypes_get(struct rte_eth_dev
*dev
)
332 static uint32_t ptypes
[32];
333 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
334 static const uint32_t ptypes_common
[] = {
336 RTE_PTYPE_L3_IPV4_EXT
,
338 RTE_PTYPE_L3_IPV6_EXT
,
343 static const uint32_t ptypes_tunnel
[] = {
344 RTE_PTYPE_TUNNEL_GRE
,
345 RTE_PTYPE_TUNNEL_GENEVE
,
346 RTE_PTYPE_TUNNEL_VXLAN
,
347 RTE_PTYPE_TUNNEL_NVGRE
,
349 static const uint32_t ptypes_end
= RTE_PTYPE_UNKNOWN
;
351 copied
= sizeof(ptypes_common
);
352 memcpy(ptypes
, ptypes_common
, copied
);
353 if (nicvf_hw_cap(nic
) & NICVF_CAP_TUNNEL_PARSING
) {
354 memcpy((char *)ptypes
+ copied
, ptypes_tunnel
,
355 sizeof(ptypes_tunnel
));
356 copied
+= sizeof(ptypes_tunnel
);
359 memcpy((char *)ptypes
+ copied
, &ptypes_end
, sizeof(ptypes_end
));
361 /* All Ptypes are supported in all Rx functions. */
366 nicvf_dev_stats_reset(struct rte_eth_dev
*dev
)
369 uint16_t rxqs
= 0, txqs
= 0;
370 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
371 uint16_t rx_start
, rx_end
;
372 uint16_t tx_start
, tx_end
;
375 /* Reset all primary nic counters */
376 nicvf_rx_range(dev
, nic
, &rx_start
, &rx_end
);
377 for (i
= rx_start
; i
<= rx_end
; i
++)
378 rxqs
|= (0x3 << (i
* 2));
380 nicvf_tx_range(dev
, nic
, &tx_start
, &tx_end
);
381 for (i
= tx_start
; i
<= tx_end
; i
++)
382 txqs
|= (0x3 << (i
* 2));
384 ret
= nicvf_mbox_reset_stat_counters(nic
, 0x3FFF, 0x1F, rxqs
, txqs
);
388 /* Reset secondary nic queue counters */
389 for (i
= 0; i
< nic
->sqs_count
; i
++) {
390 struct nicvf
*snic
= nic
->snicvf
[i
];
394 nicvf_rx_range(dev
, snic
, &rx_start
, &rx_end
);
395 for (i
= rx_start
; i
<= rx_end
; i
++)
396 rxqs
|= (0x3 << ((i
% MAX_CMP_QUEUES_PER_QS
) * 2));
398 nicvf_tx_range(dev
, snic
, &tx_start
, &tx_end
);
399 for (i
= tx_start
; i
<= tx_end
; i
++)
400 txqs
|= (0x3 << ((i
% MAX_SND_QUEUES_PER_QS
) * 2));
402 ret
= nicvf_mbox_reset_stat_counters(snic
, 0, 0, rxqs
, txqs
);
410 /* Promiscuous mode enabled by default in LMAC to VF 1:1 map configuration */
412 nicvf_dev_promisc_enable(struct rte_eth_dev
*dev __rte_unused
)
417 static inline uint64_t
418 nicvf_rss_ethdev_to_nic(struct nicvf
*nic
, uint64_t ethdev_rss
)
420 uint64_t nic_rss
= 0;
422 if (ethdev_rss
& ETH_RSS_IPV4
)
423 nic_rss
|= RSS_IP_ENA
;
425 if (ethdev_rss
& ETH_RSS_IPV6
)
426 nic_rss
|= RSS_IP_ENA
;
428 if (ethdev_rss
& ETH_RSS_NONFRAG_IPV4_UDP
)
429 nic_rss
|= (RSS_IP_ENA
| RSS_UDP_ENA
);
431 if (ethdev_rss
& ETH_RSS_NONFRAG_IPV4_TCP
)
432 nic_rss
|= (RSS_IP_ENA
| RSS_TCP_ENA
);
434 if (ethdev_rss
& ETH_RSS_NONFRAG_IPV6_UDP
)
435 nic_rss
|= (RSS_IP_ENA
| RSS_UDP_ENA
);
437 if (ethdev_rss
& ETH_RSS_NONFRAG_IPV6_TCP
)
438 nic_rss
|= (RSS_IP_ENA
| RSS_TCP_ENA
);
440 if (ethdev_rss
& ETH_RSS_PORT
)
441 nic_rss
|= RSS_L2_EXTENDED_HASH_ENA
;
443 if (nicvf_hw_cap(nic
) & NICVF_CAP_TUNNEL_PARSING
) {
444 if (ethdev_rss
& ETH_RSS_VXLAN
)
445 nic_rss
|= RSS_TUN_VXLAN_ENA
;
447 if (ethdev_rss
& ETH_RSS_GENEVE
)
448 nic_rss
|= RSS_TUN_GENEVE_ENA
;
450 if (ethdev_rss
& ETH_RSS_NVGRE
)
451 nic_rss
|= RSS_TUN_NVGRE_ENA
;
457 static inline uint64_t
458 nicvf_rss_nic_to_ethdev(struct nicvf
*nic
, uint64_t nic_rss
)
460 uint64_t ethdev_rss
= 0;
462 if (nic_rss
& RSS_IP_ENA
)
463 ethdev_rss
|= (ETH_RSS_IPV4
| ETH_RSS_IPV6
);
465 if ((nic_rss
& RSS_IP_ENA
) && (nic_rss
& RSS_TCP_ENA
))
466 ethdev_rss
|= (ETH_RSS_NONFRAG_IPV4_TCP
|
467 ETH_RSS_NONFRAG_IPV6_TCP
);
469 if ((nic_rss
& RSS_IP_ENA
) && (nic_rss
& RSS_UDP_ENA
))
470 ethdev_rss
|= (ETH_RSS_NONFRAG_IPV4_UDP
|
471 ETH_RSS_NONFRAG_IPV6_UDP
);
473 if (nic_rss
& RSS_L2_EXTENDED_HASH_ENA
)
474 ethdev_rss
|= ETH_RSS_PORT
;
476 if (nicvf_hw_cap(nic
) & NICVF_CAP_TUNNEL_PARSING
) {
477 if (nic_rss
& RSS_TUN_VXLAN_ENA
)
478 ethdev_rss
|= ETH_RSS_VXLAN
;
480 if (nic_rss
& RSS_TUN_GENEVE_ENA
)
481 ethdev_rss
|= ETH_RSS_GENEVE
;
483 if (nic_rss
& RSS_TUN_NVGRE_ENA
)
484 ethdev_rss
|= ETH_RSS_NVGRE
;
490 nicvf_dev_reta_query(struct rte_eth_dev
*dev
,
491 struct rte_eth_rss_reta_entry64
*reta_conf
,
494 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
495 uint8_t tbl
[NIC_MAX_RSS_IDR_TBL_SIZE
];
498 if (reta_size
!= NIC_MAX_RSS_IDR_TBL_SIZE
) {
500 "The size of hash lookup table configured "
501 "(%u) doesn't match the number hardware can supported "
502 "(%u)", reta_size
, NIC_MAX_RSS_IDR_TBL_SIZE
);
506 ret
= nicvf_rss_reta_query(nic
, tbl
, NIC_MAX_RSS_IDR_TBL_SIZE
);
510 /* Copy RETA table */
511 for (i
= 0; i
< (NIC_MAX_RSS_IDR_TBL_SIZE
/ RTE_RETA_GROUP_SIZE
); i
++) {
512 for (j
= 0; j
< RTE_RETA_GROUP_SIZE
; j
++)
513 if ((reta_conf
[i
].mask
>> j
) & 0x01)
514 reta_conf
[i
].reta
[j
] = tbl
[j
];
521 nicvf_dev_reta_update(struct rte_eth_dev
*dev
,
522 struct rte_eth_rss_reta_entry64
*reta_conf
,
525 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
526 uint8_t tbl
[NIC_MAX_RSS_IDR_TBL_SIZE
];
529 if (reta_size
!= NIC_MAX_RSS_IDR_TBL_SIZE
) {
530 PMD_DRV_LOG(ERR
, "The size of hash lookup table configured "
531 "(%u) doesn't match the number hardware can supported "
532 "(%u)", reta_size
, NIC_MAX_RSS_IDR_TBL_SIZE
);
536 ret
= nicvf_rss_reta_query(nic
, tbl
, NIC_MAX_RSS_IDR_TBL_SIZE
);
540 /* Copy RETA table */
541 for (i
= 0; i
< (NIC_MAX_RSS_IDR_TBL_SIZE
/ RTE_RETA_GROUP_SIZE
); i
++) {
542 for (j
= 0; j
< RTE_RETA_GROUP_SIZE
; j
++)
543 if ((reta_conf
[i
].mask
>> j
) & 0x01)
544 tbl
[j
] = reta_conf
[i
].reta
[j
];
547 return nicvf_rss_reta_update(nic
, tbl
, NIC_MAX_RSS_IDR_TBL_SIZE
);
551 nicvf_dev_rss_hash_conf_get(struct rte_eth_dev
*dev
,
552 struct rte_eth_rss_conf
*rss_conf
)
554 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
556 if (rss_conf
->rss_key
)
557 nicvf_rss_get_key(nic
, rss_conf
->rss_key
);
559 rss_conf
->rss_key_len
= RSS_HASH_KEY_BYTE_SIZE
;
560 rss_conf
->rss_hf
= nicvf_rss_nic_to_ethdev(nic
, nicvf_rss_get_cfg(nic
));
565 nicvf_dev_rss_hash_update(struct rte_eth_dev
*dev
,
566 struct rte_eth_rss_conf
*rss_conf
)
568 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
571 if (rss_conf
->rss_key
&&
572 rss_conf
->rss_key_len
!= RSS_HASH_KEY_BYTE_SIZE
) {
573 PMD_DRV_LOG(ERR
, "Hash key size mismatch %u",
574 rss_conf
->rss_key_len
);
578 if (rss_conf
->rss_key
)
579 nicvf_rss_set_key(nic
, rss_conf
->rss_key
);
581 nic_rss
= nicvf_rss_ethdev_to_nic(nic
, rss_conf
->rss_hf
);
582 nicvf_rss_set_cfg(nic
, nic_rss
);
587 nicvf_qset_cq_alloc(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
588 struct nicvf_rxq
*rxq
, uint16_t qidx
, uint32_t desc_cnt
)
590 const struct rte_memzone
*rz
;
591 uint32_t ring_size
= CMP_QUEUE_SZ_MAX
* sizeof(union cq_entry_t
);
593 rz
= rte_eth_dma_zone_reserve(dev
, "cq_ring",
594 nicvf_netdev_qidx(nic
, qidx
), ring_size
,
595 NICVF_CQ_BASE_ALIGN_BYTES
, nic
->node
);
597 PMD_INIT_LOG(ERR
, "Failed to allocate mem for cq hw ring");
601 memset(rz
->addr
, 0, ring_size
);
603 rxq
->phys
= rz
->iova
;
604 rxq
->desc
= rz
->addr
;
605 rxq
->qlen_mask
= desc_cnt
- 1;
611 nicvf_qset_sq_alloc(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
612 struct nicvf_txq
*sq
, uint16_t qidx
, uint32_t desc_cnt
)
614 const struct rte_memzone
*rz
;
615 uint32_t ring_size
= SND_QUEUE_SZ_MAX
* sizeof(union sq_entry_t
);
617 rz
= rte_eth_dma_zone_reserve(dev
, "sq",
618 nicvf_netdev_qidx(nic
, qidx
), ring_size
,
619 NICVF_SQ_BASE_ALIGN_BYTES
, nic
->node
);
621 PMD_INIT_LOG(ERR
, "Failed allocate mem for sq hw ring");
625 memset(rz
->addr
, 0, ring_size
);
629 sq
->qlen_mask
= desc_cnt
- 1;
635 nicvf_qset_rbdr_alloc(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
636 uint32_t desc_cnt
, uint32_t buffsz
)
638 struct nicvf_rbdr
*rbdr
;
639 const struct rte_memzone
*rz
;
642 assert(nic
->rbdr
== NULL
);
643 rbdr
= rte_zmalloc_socket("rbdr", sizeof(struct nicvf_rbdr
),
644 RTE_CACHE_LINE_SIZE
, nic
->node
);
646 PMD_INIT_LOG(ERR
, "Failed to allocate mem for rbdr");
650 ring_size
= sizeof(struct rbdr_entry_t
) * RBDR_QUEUE_SZ_MAX
;
651 rz
= rte_eth_dma_zone_reserve(dev
, "rbdr",
652 nicvf_netdev_qidx(nic
, 0), ring_size
,
653 NICVF_RBDR_BASE_ALIGN_BYTES
, nic
->node
);
655 PMD_INIT_LOG(ERR
, "Failed to allocate mem for rbdr desc ring");
659 memset(rz
->addr
, 0, ring_size
);
661 rbdr
->phys
= rz
->iova
;
664 rbdr
->desc
= rz
->addr
;
665 rbdr
->buffsz
= buffsz
;
666 rbdr
->qlen_mask
= desc_cnt
- 1;
668 nicvf_qset_base(nic
, 0) + NIC_QSET_RBDR_0_1_STATUS0
;
670 nicvf_qset_base(nic
, 0) + NIC_QSET_RBDR_0_1_DOOR
;
677 nicvf_rbdr_release_mbuf(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
678 nicvf_iova_addr_t phy
)
682 struct nicvf_rxq
*rxq
;
683 uint16_t rx_start
, rx_end
;
685 /* Get queue ranges for this VF */
686 nicvf_rx_range(dev
, nic
, &rx_start
, &rx_end
);
688 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
689 rxq
= dev
->data
->rx_queues
[qidx
];
690 if (rxq
->precharge_cnt
) {
691 obj
= (void *)nicvf_mbuff_phy2virt(phy
,
693 rte_mempool_put(rxq
->pool
, obj
);
694 rxq
->precharge_cnt
--;
701 nicvf_rbdr_release_mbufs(struct rte_eth_dev
*dev
, struct nicvf
*nic
)
703 uint32_t qlen_mask
, head
;
704 struct rbdr_entry_t
*entry
;
705 struct nicvf_rbdr
*rbdr
= nic
->rbdr
;
707 qlen_mask
= rbdr
->qlen_mask
;
709 while (head
!= rbdr
->tail
) {
710 entry
= rbdr
->desc
+ head
;
711 nicvf_rbdr_release_mbuf(dev
, nic
, entry
->full_addr
);
713 head
= head
& qlen_mask
;
718 nicvf_tx_queue_release_mbufs(struct nicvf_txq
*txq
)
723 while (head
!= txq
->tail
) {
724 if (txq
->txbuffs
[head
]) {
725 rte_pktmbuf_free_seg(txq
->txbuffs
[head
]);
726 txq
->txbuffs
[head
] = NULL
;
729 head
= head
& txq
->qlen_mask
;
734 nicvf_tx_queue_reset(struct nicvf_txq
*txq
)
736 uint32_t txq_desc_cnt
= txq
->qlen_mask
+ 1;
738 memset(txq
->desc
, 0, sizeof(union sq_entry_t
) * txq_desc_cnt
);
739 memset(txq
->txbuffs
, 0, sizeof(struct rte_mbuf
*) * txq_desc_cnt
);
746 nicvf_vf_start_tx_queue(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
749 struct nicvf_txq
*txq
;
752 assert(qidx
< MAX_SND_QUEUES_PER_QS
);
754 if (dev
->data
->tx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] ==
755 RTE_ETH_QUEUE_STATE_STARTED
)
758 txq
= dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)];
760 ret
= nicvf_qset_sq_config(nic
, qidx
, txq
);
762 PMD_INIT_LOG(ERR
, "Failed to configure sq VF%d %d %d",
763 nic
->vf_id
, qidx
, ret
);
764 goto config_sq_error
;
767 dev
->data
->tx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
768 RTE_ETH_QUEUE_STATE_STARTED
;
772 nicvf_qset_sq_reclaim(nic
, qidx
);
777 nicvf_vf_stop_tx_queue(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
780 struct nicvf_txq
*txq
;
783 assert(qidx
< MAX_SND_QUEUES_PER_QS
);
785 if (dev
->data
->tx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] ==
786 RTE_ETH_QUEUE_STATE_STOPPED
)
789 ret
= nicvf_qset_sq_reclaim(nic
, qidx
);
791 PMD_INIT_LOG(ERR
, "Failed to reclaim sq VF%d %d %d",
792 nic
->vf_id
, qidx
, ret
);
794 txq
= dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)];
795 nicvf_tx_queue_release_mbufs(txq
);
796 nicvf_tx_queue_reset(txq
);
798 dev
->data
->tx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
799 RTE_ETH_QUEUE_STATE_STOPPED
;
804 nicvf_configure_cpi(struct rte_eth_dev
*dev
)
806 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
810 /* Count started rx queues */
811 for (qidx
= qcnt
= 0; qidx
< dev
->data
->nb_rx_queues
; qidx
++)
812 if (dev
->data
->rx_queue_state
[qidx
] ==
813 RTE_ETH_QUEUE_STATE_STARTED
)
816 nic
->cpi_alg
= CPI_ALG_NONE
;
817 ret
= nicvf_mbox_config_cpi(nic
, qcnt
);
819 PMD_INIT_LOG(ERR
, "Failed to configure CPI %d", ret
);
825 nicvf_configure_rss(struct rte_eth_dev
*dev
)
827 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
831 rsshf
= nicvf_rss_ethdev_to_nic(nic
,
832 dev
->data
->dev_conf
.rx_adv_conf
.rss_conf
.rss_hf
);
833 PMD_DRV_LOG(INFO
, "mode=%d rx_queues=%d loopback=%d rsshf=0x%" PRIx64
,
834 dev
->data
->dev_conf
.rxmode
.mq_mode
,
835 dev
->data
->nb_rx_queues
,
836 dev
->data
->dev_conf
.lpbk_mode
, rsshf
);
838 if (dev
->data
->dev_conf
.rxmode
.mq_mode
== ETH_MQ_RX_NONE
)
839 ret
= nicvf_rss_term(nic
);
840 else if (dev
->data
->dev_conf
.rxmode
.mq_mode
== ETH_MQ_RX_RSS
)
841 ret
= nicvf_rss_config(nic
, dev
->data
->nb_rx_queues
, rsshf
);
843 PMD_INIT_LOG(ERR
, "Failed to configure RSS %d", ret
);
849 nicvf_configure_rss_reta(struct rte_eth_dev
*dev
)
851 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
852 unsigned int idx
, qmap_size
;
853 uint8_t qmap
[RTE_MAX_QUEUES_PER_PORT
];
854 uint8_t default_reta
[NIC_MAX_RSS_IDR_TBL_SIZE
];
856 if (nic
->cpi_alg
!= CPI_ALG_NONE
)
859 /* Prepare queue map */
860 for (idx
= 0, qmap_size
= 0; idx
< dev
->data
->nb_rx_queues
; idx
++) {
861 if (dev
->data
->rx_queue_state
[idx
] ==
862 RTE_ETH_QUEUE_STATE_STARTED
)
863 qmap
[qmap_size
++] = idx
;
866 /* Update default RSS RETA */
867 for (idx
= 0; idx
< NIC_MAX_RSS_IDR_TBL_SIZE
; idx
++)
868 default_reta
[idx
] = qmap
[idx
% qmap_size
];
870 return nicvf_rss_reta_update(nic
, default_reta
,
871 NIC_MAX_RSS_IDR_TBL_SIZE
);
875 nicvf_dev_tx_queue_release(void *sq
)
877 struct nicvf_txq
*txq
;
879 PMD_INIT_FUNC_TRACE();
881 txq
= (struct nicvf_txq
*)sq
;
883 if (txq
->txbuffs
!= NULL
) {
884 nicvf_tx_queue_release_mbufs(txq
);
885 rte_free(txq
->txbuffs
);
893 nicvf_set_tx_function(struct rte_eth_dev
*dev
)
895 struct nicvf_txq
*txq
= NULL
;
897 bool multiseg
= false;
899 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++) {
900 txq
= dev
->data
->tx_queues
[i
];
901 if (txq
->offloads
& DEV_TX_OFFLOAD_MULTI_SEGS
) {
907 /* Use a simple Tx queue (no offloads, no multi segs) if possible */
909 PMD_DRV_LOG(DEBUG
, "Using multi-segment tx callback");
910 dev
->tx_pkt_burst
= nicvf_xmit_pkts_multiseg
;
912 PMD_DRV_LOG(DEBUG
, "Using single-segment tx callback");
913 dev
->tx_pkt_burst
= nicvf_xmit_pkts
;
919 if (txq
->pool_free
== nicvf_single_pool_free_xmited_buffers
)
920 PMD_DRV_LOG(DEBUG
, "Using single-mempool tx free method");
922 PMD_DRV_LOG(DEBUG
, "Using multi-mempool tx free method");
926 nicvf_set_rx_function(struct rte_eth_dev
*dev
)
928 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
930 const eth_rx_burst_t rx_burst_func
[2][2][2] = {
931 /* [NORMAL/SCATTER] [CKSUM/NO_CKSUM] [VLAN_STRIP/NO_VLAN_STRIP] */
932 [0][0][0] = nicvf_recv_pkts_no_offload
,
933 [0][0][1] = nicvf_recv_pkts_vlan_strip
,
934 [0][1][0] = nicvf_recv_pkts_cksum
,
935 [0][1][1] = nicvf_recv_pkts_cksum_vlan_strip
,
936 [1][0][0] = nicvf_recv_pkts_multiseg_no_offload
,
937 [1][0][1] = nicvf_recv_pkts_multiseg_vlan_strip
,
938 [1][1][0] = nicvf_recv_pkts_multiseg_cksum
,
939 [1][1][1] = nicvf_recv_pkts_multiseg_cksum_vlan_strip
,
943 rx_burst_func
[dev
->data
->scattered_rx
]
944 [nic
->offload_cksum
][nic
->vlan_strip
];
948 nicvf_dev_tx_queue_setup(struct rte_eth_dev
*dev
, uint16_t qidx
,
949 uint16_t nb_desc
, unsigned int socket_id
,
950 const struct rte_eth_txconf
*tx_conf
)
952 uint16_t tx_free_thresh
;
954 struct nicvf_txq
*txq
;
955 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
958 PMD_INIT_FUNC_TRACE();
960 if (qidx
>= MAX_SND_QUEUES_PER_QS
)
961 nic
= nic
->snicvf
[qidx
/ MAX_SND_QUEUES_PER_QS
- 1];
963 qidx
= qidx
% MAX_SND_QUEUES_PER_QS
;
965 /* Socket id check */
966 if (socket_id
!= (unsigned int)SOCKET_ID_ANY
&& socket_id
!= nic
->node
)
967 PMD_DRV_LOG(WARNING
, "socket_id expected %d, configured %d",
968 socket_id
, nic
->node
);
970 /* Tx deferred start is not supported */
971 if (tx_conf
->tx_deferred_start
) {
972 PMD_INIT_LOG(ERR
, "Tx deferred start not supported");
976 /* Roundup nb_desc to available qsize and validate max number of desc */
977 nb_desc
= nicvf_qsize_sq_roundup(nb_desc
);
979 PMD_INIT_LOG(ERR
, "Value of nb_desc beyond available sq qsize");
983 /* Validate tx_free_thresh */
984 tx_free_thresh
= (uint16_t)((tx_conf
->tx_free_thresh
) ?
985 tx_conf
->tx_free_thresh
:
986 NICVF_DEFAULT_TX_FREE_THRESH
);
988 if (tx_free_thresh
> (nb_desc
) ||
989 tx_free_thresh
> NICVF_MAX_TX_FREE_THRESH
) {
991 "tx_free_thresh must be less than the number of TX "
992 "descriptors. (tx_free_thresh=%u port=%d "
993 "queue=%d)", (unsigned int)tx_free_thresh
,
994 (int)dev
->data
->port_id
, (int)qidx
);
998 /* Free memory prior to re-allocation if needed. */
999 if (dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)] != NULL
) {
1000 PMD_TX_LOG(DEBUG
, "Freeing memory prior to re-allocation %d",
1001 nicvf_netdev_qidx(nic
, qidx
));
1002 nicvf_dev_tx_queue_release(
1003 dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)]);
1004 dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)] = NULL
;
1007 /* Allocating tx queue data structure */
1008 txq
= rte_zmalloc_socket("ethdev TX queue", sizeof(struct nicvf_txq
),
1009 RTE_CACHE_LINE_SIZE
, nic
->node
);
1011 PMD_INIT_LOG(ERR
, "Failed to allocate txq=%d",
1012 nicvf_netdev_qidx(nic
, qidx
));
1017 txq
->queue_id
= qidx
;
1018 txq
->tx_free_thresh
= tx_free_thresh
;
1019 txq
->sq_head
= nicvf_qset_base(nic
, qidx
) + NIC_QSET_SQ_0_7_HEAD
;
1020 txq
->sq_door
= nicvf_qset_base(nic
, qidx
) + NIC_QSET_SQ_0_7_DOOR
;
1021 offloads
= tx_conf
->offloads
| dev
->data
->dev_conf
.txmode
.offloads
;
1022 txq
->offloads
= offloads
;
1024 is_single_pool
= !!(offloads
& DEV_TX_OFFLOAD_MBUF_FAST_FREE
);
1026 /* Choose optimum free threshold value for multipool case */
1027 if (!is_single_pool
) {
1028 txq
->tx_free_thresh
= (uint16_t)
1029 (tx_conf
->tx_free_thresh
== NICVF_DEFAULT_TX_FREE_THRESH
?
1030 NICVF_TX_FREE_MPOOL_THRESH
:
1031 tx_conf
->tx_free_thresh
);
1032 txq
->pool_free
= nicvf_multi_pool_free_xmited_buffers
;
1034 txq
->pool_free
= nicvf_single_pool_free_xmited_buffers
;
1037 /* Allocate software ring */
1038 txq
->txbuffs
= rte_zmalloc_socket("txq->txbuffs",
1039 nb_desc
* sizeof(struct rte_mbuf
*),
1040 RTE_CACHE_LINE_SIZE
, nic
->node
);
1042 if (txq
->txbuffs
== NULL
) {
1043 nicvf_dev_tx_queue_release(txq
);
1047 if (nicvf_qset_sq_alloc(dev
, nic
, txq
, qidx
, nb_desc
)) {
1048 PMD_INIT_LOG(ERR
, "Failed to allocate mem for sq %d", qidx
);
1049 nicvf_dev_tx_queue_release(txq
);
1053 nicvf_tx_queue_reset(txq
);
1055 PMD_INIT_LOG(DEBUG
, "[%d] txq=%p nb_desc=%d desc=%p"
1056 " phys=0x%" PRIx64
" offloads=0x%" PRIx64
,
1057 nicvf_netdev_qidx(nic
, qidx
), txq
, nb_desc
, txq
->desc
,
1058 txq
->phys
, txq
->offloads
);
1060 dev
->data
->tx_queues
[nicvf_netdev_qidx(nic
, qidx
)] = txq
;
1061 dev
->data
->tx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
1062 RTE_ETH_QUEUE_STATE_STOPPED
;
1067 nicvf_rx_queue_release_mbufs(struct rte_eth_dev
*dev
, struct nicvf_rxq
*rxq
)
1070 uint32_t nb_pkts
, released_pkts
= 0;
1071 uint32_t refill_cnt
= 0;
1072 struct rte_mbuf
*rx_pkts
[NICVF_MAX_RX_FREE_THRESH
];
1074 if (dev
->rx_pkt_burst
== NULL
)
1077 while ((rxq_cnt
= nicvf_dev_rx_queue_count(dev
,
1078 nicvf_netdev_qidx(rxq
->nic
, rxq
->queue_id
)))) {
1079 nb_pkts
= dev
->rx_pkt_burst(rxq
, rx_pkts
,
1080 NICVF_MAX_RX_FREE_THRESH
);
1081 PMD_DRV_LOG(INFO
, "nb_pkts=%d rxq_cnt=%d", nb_pkts
, rxq_cnt
);
1083 rte_pktmbuf_free_seg(rx_pkts
[--nb_pkts
]);
1089 refill_cnt
+= nicvf_dev_rbdr_refill(dev
,
1090 nicvf_netdev_qidx(rxq
->nic
, rxq
->queue_id
));
1092 PMD_DRV_LOG(INFO
, "free_cnt=%d refill_cnt=%d",
1093 released_pkts
, refill_cnt
);
1097 nicvf_rx_queue_reset(struct nicvf_rxq
*rxq
)
1100 rxq
->available_space
= 0;
1101 rxq
->recv_buffers
= 0;
1105 nicvf_vf_start_rx_queue(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
1108 struct nicvf_rxq
*rxq
;
1111 assert(qidx
< MAX_RCV_QUEUES_PER_QS
);
1113 if (dev
->data
->rx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] ==
1114 RTE_ETH_QUEUE_STATE_STARTED
)
1117 /* Update rbdr pointer to all rxq */
1118 rxq
= dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)];
1119 rxq
->shared_rbdr
= nic
->rbdr
;
1121 ret
= nicvf_qset_rq_config(nic
, qidx
, rxq
);
1123 PMD_INIT_LOG(ERR
, "Failed to configure rq VF%d %d %d",
1124 nic
->vf_id
, qidx
, ret
);
1125 goto config_rq_error
;
1127 ret
= nicvf_qset_cq_config(nic
, qidx
, rxq
);
1129 PMD_INIT_LOG(ERR
, "Failed to configure cq VF%d %d %d",
1130 nic
->vf_id
, qidx
, ret
);
1131 goto config_cq_error
;
1134 dev
->data
->rx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
1135 RTE_ETH_QUEUE_STATE_STARTED
;
1139 nicvf_qset_cq_reclaim(nic
, qidx
);
1141 nicvf_qset_rq_reclaim(nic
, qidx
);
1146 nicvf_vf_stop_rx_queue(struct rte_eth_dev
*dev
, struct nicvf
*nic
,
1149 struct nicvf_rxq
*rxq
;
1150 int ret
, other_error
;
1152 if (dev
->data
->rx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] ==
1153 RTE_ETH_QUEUE_STATE_STOPPED
)
1156 ret
= nicvf_qset_rq_reclaim(nic
, qidx
);
1158 PMD_INIT_LOG(ERR
, "Failed to reclaim rq VF%d %d %d",
1159 nic
->vf_id
, qidx
, ret
);
1162 rxq
= dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)];
1163 nicvf_rx_queue_release_mbufs(dev
, rxq
);
1164 nicvf_rx_queue_reset(rxq
);
1166 ret
= nicvf_qset_cq_reclaim(nic
, qidx
);
1168 PMD_INIT_LOG(ERR
, "Failed to reclaim cq VF%d %d %d",
1169 nic
->vf_id
, qidx
, ret
);
1172 dev
->data
->rx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
1173 RTE_ETH_QUEUE_STATE_STOPPED
;
1178 nicvf_dev_rx_queue_release(void *rx_queue
)
1180 PMD_INIT_FUNC_TRACE();
1186 nicvf_dev_rx_queue_start(struct rte_eth_dev
*dev
, uint16_t qidx
)
1188 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1191 if (qidx
>= MAX_RCV_QUEUES_PER_QS
)
1192 nic
= nic
->snicvf
[(qidx
/ MAX_RCV_QUEUES_PER_QS
- 1)];
1194 qidx
= qidx
% MAX_RCV_QUEUES_PER_QS
;
1196 ret
= nicvf_vf_start_rx_queue(dev
, nic
, qidx
);
1200 ret
= nicvf_configure_cpi(dev
);
1204 return nicvf_configure_rss_reta(dev
);
1208 nicvf_dev_rx_queue_stop(struct rte_eth_dev
*dev
, uint16_t qidx
)
1211 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1213 if (qidx
>= MAX_SND_QUEUES_PER_QS
)
1214 nic
= nic
->snicvf
[(qidx
/ MAX_SND_QUEUES_PER_QS
- 1)];
1216 qidx
= qidx
% MAX_RCV_QUEUES_PER_QS
;
1218 ret
= nicvf_vf_stop_rx_queue(dev
, nic
, qidx
);
1219 ret
|= nicvf_configure_cpi(dev
);
1220 ret
|= nicvf_configure_rss_reta(dev
);
1225 nicvf_dev_tx_queue_start(struct rte_eth_dev
*dev
, uint16_t qidx
)
1227 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1229 if (qidx
>= MAX_SND_QUEUES_PER_QS
)
1230 nic
= nic
->snicvf
[(qidx
/ MAX_SND_QUEUES_PER_QS
- 1)];
1232 qidx
= qidx
% MAX_SND_QUEUES_PER_QS
;
1234 return nicvf_vf_start_tx_queue(dev
, nic
, qidx
);
1238 nicvf_dev_tx_queue_stop(struct rte_eth_dev
*dev
, uint16_t qidx
)
1240 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1242 if (qidx
>= MAX_SND_QUEUES_PER_QS
)
1243 nic
= nic
->snicvf
[(qidx
/ MAX_SND_QUEUES_PER_QS
- 1)];
1245 qidx
= qidx
% MAX_SND_QUEUES_PER_QS
;
1247 return nicvf_vf_stop_tx_queue(dev
, nic
, qidx
);
1251 nicvf_rxq_mbuf_setup(struct nicvf_rxq
*rxq
)
1254 struct rte_mbuf mb_def
;
1255 struct nicvf
*nic
= rxq
->nic
;
1257 RTE_BUILD_BUG_ON(sizeof(union mbuf_initializer
) != 8);
1258 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf
, data_off
) % 8 != 0);
1259 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf
, refcnt
) -
1260 offsetof(struct rte_mbuf
, data_off
) != 2);
1261 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf
, nb_segs
) -
1262 offsetof(struct rte_mbuf
, data_off
) != 4);
1263 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf
, port
) -
1264 offsetof(struct rte_mbuf
, data_off
) != 6);
1265 RTE_BUILD_BUG_ON(offsetof(struct nicvf_rxq
, rxq_fastpath_data_end
) -
1266 offsetof(struct nicvf_rxq
,
1267 rxq_fastpath_data_start
) > 128);
1269 mb_def
.data_off
= RTE_PKTMBUF_HEADROOM
+ (nic
->skip_bytes
);
1270 mb_def
.port
= rxq
->port_id
;
1271 rte_mbuf_refcnt_set(&mb_def
, 1);
1273 /* Prevent compiler reordering: rearm_data covers previous fields */
1274 rte_compiler_barrier();
1275 p
= (uintptr_t)&mb_def
.rearm_data
;
1276 rxq
->mbuf_initializer
.value
= *(uint64_t *)p
;
1280 nicvf_dev_rx_queue_setup(struct rte_eth_dev
*dev
, uint16_t qidx
,
1281 uint16_t nb_desc
, unsigned int socket_id
,
1282 const struct rte_eth_rxconf
*rx_conf
,
1283 struct rte_mempool
*mp
)
1285 uint16_t rx_free_thresh
;
1286 struct nicvf_rxq
*rxq
;
1287 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1290 struct rte_pktmbuf_pool_private
*mbp_priv
;
1292 PMD_INIT_FUNC_TRACE();
1294 /* First skip check */
1295 mbp_priv
= rte_mempool_get_priv(mp
);
1296 buffsz
= mbp_priv
->mbuf_data_room_size
- RTE_PKTMBUF_HEADROOM
;
1297 if (buffsz
< (uint32_t)(nic
->skip_bytes
)) {
1298 PMD_INIT_LOG(ERR
, "First skip is more than configured buffer size");
1302 if (qidx
>= MAX_RCV_QUEUES_PER_QS
)
1303 nic
= nic
->snicvf
[qidx
/ MAX_RCV_QUEUES_PER_QS
- 1];
1305 qidx
= qidx
% MAX_RCV_QUEUES_PER_QS
;
1307 /* Socket id check */
1308 if (socket_id
!= (unsigned int)SOCKET_ID_ANY
&& socket_id
!= nic
->node
)
1309 PMD_DRV_LOG(WARNING
, "socket_id expected %d, configured %d",
1310 socket_id
, nic
->node
);
1312 /* Mempool memory must be contiguous, so must be one memory segment*/
1313 if (mp
->nb_mem_chunks
!= 1) {
1314 PMD_INIT_LOG(ERR
, "Non-contiguous mempool, add more huge pages");
1318 /* Mempool memory must be physically contiguous */
1319 if (mp
->flags
& MEMPOOL_F_NO_IOVA_CONTIG
) {
1320 PMD_INIT_LOG(ERR
, "Mempool memory must be physically contiguous");
1324 /* Rx deferred start is not supported */
1325 if (rx_conf
->rx_deferred_start
) {
1326 PMD_INIT_LOG(ERR
, "Rx deferred start not supported");
1330 /* Roundup nb_desc to available qsize and validate max number of desc */
1331 nb_desc
= nicvf_qsize_cq_roundup(nb_desc
);
1333 PMD_INIT_LOG(ERR
, "Value nb_desc beyond available hw cq qsize");
1338 /* Check rx_free_thresh upper bound */
1339 rx_free_thresh
= (uint16_t)((rx_conf
->rx_free_thresh
) ?
1340 rx_conf
->rx_free_thresh
:
1341 NICVF_DEFAULT_RX_FREE_THRESH
);
1342 if (rx_free_thresh
> NICVF_MAX_RX_FREE_THRESH
||
1343 rx_free_thresh
>= nb_desc
* .75) {
1344 PMD_INIT_LOG(ERR
, "rx_free_thresh greater than expected %d",
1349 /* Free memory prior to re-allocation if needed */
1350 if (dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)] != NULL
) {
1351 PMD_RX_LOG(DEBUG
, "Freeing memory prior to re-allocation %d",
1352 nicvf_netdev_qidx(nic
, qidx
));
1353 nicvf_dev_rx_queue_release(
1354 dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)]);
1355 dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)] = NULL
;
1358 /* Allocate rxq memory */
1359 rxq
= rte_zmalloc_socket("ethdev rx queue", sizeof(struct nicvf_rxq
),
1360 RTE_CACHE_LINE_SIZE
, nic
->node
);
1362 PMD_INIT_LOG(ERR
, "Failed to allocate rxq=%d",
1363 nicvf_netdev_qidx(nic
, qidx
));
1369 rxq
->queue_id
= qidx
;
1370 rxq
->port_id
= dev
->data
->port_id
;
1371 rxq
->rx_free_thresh
= rx_free_thresh
;
1372 rxq
->rx_drop_en
= rx_conf
->rx_drop_en
;
1373 rxq
->cq_status
= nicvf_qset_base(nic
, qidx
) + NIC_QSET_CQ_0_7_STATUS
;
1374 rxq
->cq_door
= nicvf_qset_base(nic
, qidx
) + NIC_QSET_CQ_0_7_DOOR
;
1375 rxq
->precharge_cnt
= 0;
1377 if (nicvf_hw_cap(nic
) & NICVF_CAP_CQE_RX2
)
1378 rxq
->rbptr_offset
= NICVF_CQE_RX2_RBPTR_WORD
;
1380 rxq
->rbptr_offset
= NICVF_CQE_RBPTR_WORD
;
1382 nicvf_rxq_mbuf_setup(rxq
);
1384 /* Alloc completion queue */
1385 if (nicvf_qset_cq_alloc(dev
, nic
, rxq
, rxq
->queue_id
, nb_desc
)) {
1386 PMD_INIT_LOG(ERR
, "failed to allocate cq %u", rxq
->queue_id
);
1387 nicvf_dev_rx_queue_release(rxq
);
1391 nicvf_rx_queue_reset(rxq
);
1393 offloads
= rx_conf
->offloads
| dev
->data
->dev_conf
.rxmode
.offloads
;
1394 PMD_INIT_LOG(DEBUG
, "[%d] rxq=%p pool=%s nb_desc=(%d/%d)"
1395 " phy=0x%" PRIx64
" offloads=0x%" PRIx64
,
1396 nicvf_netdev_qidx(nic
, qidx
), rxq
, mp
->name
, nb_desc
,
1397 rte_mempool_avail_count(mp
), rxq
->phys
, offloads
);
1399 dev
->data
->rx_queues
[nicvf_netdev_qidx(nic
, qidx
)] = rxq
;
1400 dev
->data
->rx_queue_state
[nicvf_netdev_qidx(nic
, qidx
)] =
1401 RTE_ETH_QUEUE_STATE_STOPPED
;
1406 nicvf_dev_info_get(struct rte_eth_dev
*dev
, struct rte_eth_dev_info
*dev_info
)
1408 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1409 struct rte_pci_device
*pci_dev
= RTE_ETH_DEV_TO_PCI(dev
);
1411 PMD_INIT_FUNC_TRACE();
1413 /* Autonegotiation may be disabled */
1414 dev_info
->speed_capa
= ETH_LINK_SPEED_FIXED
;
1415 dev_info
->speed_capa
|= ETH_LINK_SPEED_10M
| ETH_LINK_SPEED_100M
|
1416 ETH_LINK_SPEED_1G
| ETH_LINK_SPEED_10G
;
1417 if (nicvf_hw_version(nic
) != PCI_SUB_DEVICE_ID_CN81XX_NICVF
)
1418 dev_info
->speed_capa
|= ETH_LINK_SPEED_40G
;
1420 dev_info
->min_rx_bufsize
= RTE_ETHER_MIN_MTU
;
1421 dev_info
->max_rx_pktlen
= NIC_HW_MAX_MTU
+ RTE_ETHER_HDR_LEN
;
1422 dev_info
->max_rx_queues
=
1423 (uint16_t)MAX_RCV_QUEUES_PER_QS
* (MAX_SQS_PER_VF
+ 1);
1424 dev_info
->max_tx_queues
=
1425 (uint16_t)MAX_SND_QUEUES_PER_QS
* (MAX_SQS_PER_VF
+ 1);
1426 dev_info
->max_mac_addrs
= 1;
1427 dev_info
->max_vfs
= pci_dev
->max_vfs
;
1429 dev_info
->rx_offload_capa
= NICVF_RX_OFFLOAD_CAPA
;
1430 dev_info
->tx_offload_capa
= NICVF_TX_OFFLOAD_CAPA
;
1431 dev_info
->rx_queue_offload_capa
= NICVF_RX_OFFLOAD_CAPA
;
1432 dev_info
->tx_queue_offload_capa
= NICVF_TX_OFFLOAD_CAPA
;
1434 dev_info
->reta_size
= nic
->rss_info
.rss_size
;
1435 dev_info
->hash_key_size
= RSS_HASH_KEY_BYTE_SIZE
;
1436 dev_info
->flow_type_rss_offloads
= NICVF_RSS_OFFLOAD_PASS1
;
1437 if (nicvf_hw_cap(nic
) & NICVF_CAP_TUNNEL_PARSING
)
1438 dev_info
->flow_type_rss_offloads
|= NICVF_RSS_OFFLOAD_TUNNEL
;
1440 dev_info
->default_rxconf
= (struct rte_eth_rxconf
) {
1441 .rx_free_thresh
= NICVF_DEFAULT_RX_FREE_THRESH
,
1445 dev_info
->default_txconf
= (struct rte_eth_txconf
) {
1446 .tx_free_thresh
= NICVF_DEFAULT_TX_FREE_THRESH
,
1447 .offloads
= DEV_TX_OFFLOAD_MBUF_FAST_FREE
|
1448 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
|
1449 DEV_TX_OFFLOAD_UDP_CKSUM
|
1450 DEV_TX_OFFLOAD_TCP_CKSUM
,
1456 static nicvf_iova_addr_t
1457 rbdr_rte_mempool_get(void *dev
, void *opaque
)
1461 struct nicvf_rxq
*rxq
;
1462 struct rte_eth_dev
*eth_dev
= (struct rte_eth_dev
*)dev
;
1463 struct nicvf
*nic
= (struct nicvf
*)opaque
;
1464 uint16_t rx_start
, rx_end
;
1466 /* Get queue ranges for this VF */
1467 nicvf_rx_range(eth_dev
, nic
, &rx_start
, &rx_end
);
1469 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
1470 rxq
= eth_dev
->data
->rx_queues
[qidx
];
1471 /* Maintain equal buffer count across all pools */
1472 if (rxq
->precharge_cnt
>= rxq
->qlen_mask
)
1474 rxq
->precharge_cnt
++;
1475 mbuf
= (uintptr_t)rte_pktmbuf_alloc(rxq
->pool
);
1477 return nicvf_mbuff_virt2phy(mbuf
, rxq
->mbuf_phys_off
);
1483 nicvf_vf_start(struct rte_eth_dev
*dev
, struct nicvf
*nic
, uint32_t rbdrsz
)
1486 uint16_t qidx
, data_off
;
1487 uint32_t total_rxq_desc
, nb_rbdr_desc
, exp_buffs
;
1488 uint64_t mbuf_phys_off
= 0;
1489 struct nicvf_rxq
*rxq
;
1490 struct rte_mbuf
*mbuf
;
1491 uint16_t rx_start
, rx_end
;
1492 uint16_t tx_start
, tx_end
;
1495 PMD_INIT_FUNC_TRACE();
1497 /* Userspace process exited without proper shutdown in last run */
1498 if (nicvf_qset_rbdr_active(nic
, 0))
1499 nicvf_vf_stop(dev
, nic
, false);
1501 /* Get queue ranges for this VF */
1502 nicvf_rx_range(dev
, nic
, &rx_start
, &rx_end
);
1505 * Thunderx nicvf PMD can support more than one pool per port only when
1506 * 1) Data payload size is same across all the pools in given port
1508 * 2) All mbuffs in the pools are from the same hugepage
1510 * 3) Mbuff metadata size is same across all the pools in given port
1512 * This is to support existing application that uses multiple pool/port.
1513 * But, the purpose of using multipool for QoS will not be addressed.
1517 /* Validate mempool attributes */
1518 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
1519 rxq
= dev
->data
->rx_queues
[qidx
];
1520 rxq
->mbuf_phys_off
= nicvf_mempool_phy_offset(rxq
->pool
);
1521 mbuf
= rte_pktmbuf_alloc(rxq
->pool
);
1523 PMD_INIT_LOG(ERR
, "Failed allocate mbuf VF%d qid=%d "
1525 nic
->vf_id
, qidx
, rxq
->pool
->name
);
1528 data_off
= nicvf_mbuff_meta_length(mbuf
);
1529 data_off
+= RTE_PKTMBUF_HEADROOM
;
1530 rte_pktmbuf_free(mbuf
);
1532 if (data_off
% RTE_CACHE_LINE_SIZE
) {
1533 PMD_INIT_LOG(ERR
, "%s: unaligned data_off=%d delta=%d",
1534 rxq
->pool
->name
, data_off
,
1535 data_off
% RTE_CACHE_LINE_SIZE
);
1538 rxq
->mbuf_phys_off
-= data_off
;
1539 rxq
->mbuf_phys_off
-= nic
->skip_bytes
;
1541 if (mbuf_phys_off
== 0)
1542 mbuf_phys_off
= rxq
->mbuf_phys_off
;
1543 if (mbuf_phys_off
!= rxq
->mbuf_phys_off
) {
1544 PMD_INIT_LOG(ERR
, "pool params not same,%s VF%d %"
1545 PRIx64
, rxq
->pool
->name
, nic
->vf_id
,
1551 /* Check the level of buffers in the pool */
1553 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
1554 rxq
= dev
->data
->rx_queues
[qidx
];
1555 /* Count total numbers of rxq descs */
1556 total_rxq_desc
+= rxq
->qlen_mask
+ 1;
1557 exp_buffs
= RTE_MEMPOOL_CACHE_MAX_SIZE
+ rxq
->rx_free_thresh
;
1558 exp_buffs
*= dev
->data
->nb_rx_queues
;
1559 if (rte_mempool_avail_count(rxq
->pool
) < exp_buffs
) {
1560 PMD_INIT_LOG(ERR
, "Buff shortage in pool=%s (%d/%d)",
1562 rte_mempool_avail_count(rxq
->pool
),
1568 /* Check RBDR desc overflow */
1569 ret
= nicvf_qsize_rbdr_roundup(total_rxq_desc
);
1571 PMD_INIT_LOG(ERR
, "Reached RBDR desc limit, reduce nr desc "
1572 "VF%d", nic
->vf_id
);
1577 ret
= nicvf_qset_config(nic
);
1579 PMD_INIT_LOG(ERR
, "Failed to enable qset %d VF%d", ret
,
1584 /* Allocate RBDR and RBDR ring desc */
1585 nb_rbdr_desc
= nicvf_qsize_rbdr_roundup(total_rxq_desc
);
1586 ret
= nicvf_qset_rbdr_alloc(dev
, nic
, nb_rbdr_desc
, rbdrsz
);
1588 PMD_INIT_LOG(ERR
, "Failed to allocate memory for rbdr alloc "
1589 "VF%d", nic
->vf_id
);
1593 /* Enable and configure RBDR registers */
1594 ret
= nicvf_qset_rbdr_config(nic
, 0);
1596 PMD_INIT_LOG(ERR
, "Failed to configure rbdr %d VF%d", ret
,
1598 goto qset_rbdr_free
;
1601 /* Fill rte_mempool buffers in RBDR pool and precharge it */
1602 ret
= nicvf_qset_rbdr_precharge(dev
, nic
, 0, rbdr_rte_mempool_get
,
1605 PMD_INIT_LOG(ERR
, "Failed to fill rbdr %d VF%d", ret
,
1607 goto qset_rbdr_reclaim
;
1610 PMD_DRV_LOG(INFO
, "Filled %d out of %d entries in RBDR VF%d",
1611 nic
->rbdr
->tail
, nb_rbdr_desc
, nic
->vf_id
);
1613 /* Configure VLAN Strip */
1614 mask
= ETH_VLAN_STRIP_MASK
| ETH_VLAN_FILTER_MASK
|
1615 ETH_VLAN_EXTEND_MASK
;
1616 ret
= nicvf_vlan_offload_config(dev
, mask
);
1618 /* Based on the packet type(IPv4 or IPv6), the nicvf HW aligns L3 data
1619 * to the 64bit memory address.
1620 * The alignment creates a hole in mbuf(between the end of headroom and
1621 * packet data start). The new revision of the HW provides an option to
1622 * disable the L3 alignment feature and make mbuf layout looks
1623 * more like other NICs. For better application compatibility, disabling
1624 * l3 alignment feature on the hardware revisions it supports
1626 nicvf_apad_config(nic
, false);
1628 /* Get queue ranges for this VF */
1629 nicvf_tx_range(dev
, nic
, &tx_start
, &tx_end
);
1631 /* Configure TX queues */
1632 for (qidx
= tx_start
; qidx
<= tx_end
; qidx
++) {
1633 ret
= nicvf_vf_start_tx_queue(dev
, nic
,
1634 qidx
% MAX_SND_QUEUES_PER_QS
);
1636 goto start_txq_error
;
1639 /* Configure RX queues */
1640 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++) {
1641 ret
= nicvf_vf_start_rx_queue(dev
, nic
,
1642 qidx
% MAX_RCV_QUEUES_PER_QS
);
1644 goto start_rxq_error
;
1647 if (!nic
->sqs_mode
) {
1648 /* Configure CPI algorithm */
1649 ret
= nicvf_configure_cpi(dev
);
1651 goto start_txq_error
;
1653 ret
= nicvf_mbox_get_rss_size(nic
);
1655 PMD_INIT_LOG(ERR
, "Failed to get rss table size");
1656 goto qset_rss_error
;
1660 ret
= nicvf_configure_rss(dev
);
1662 goto qset_rss_error
;
1665 /* Done; Let PF make the BGX's RX and TX switches to ON position */
1666 nicvf_mbox_cfg_done(nic
);
1670 nicvf_rss_term(nic
);
1672 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++)
1673 nicvf_vf_stop_rx_queue(dev
, nic
, qidx
% MAX_RCV_QUEUES_PER_QS
);
1675 for (qidx
= tx_start
; qidx
<= tx_end
; qidx
++)
1676 nicvf_vf_stop_tx_queue(dev
, nic
, qidx
% MAX_SND_QUEUES_PER_QS
);
1678 nicvf_qset_rbdr_reclaim(nic
, 0);
1679 nicvf_rbdr_release_mbufs(dev
, nic
);
1682 rte_free(nic
->rbdr
);
1686 nicvf_qset_reclaim(nic
);
1691 nicvf_dev_start(struct rte_eth_dev
*dev
)
1696 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1697 struct rte_eth_rxmode
*rx_conf
= &dev
->data
->dev_conf
.rxmode
;
1699 uint32_t buffsz
= 0, rbdrsz
= 0;
1700 struct rte_pktmbuf_pool_private
*mbp_priv
;
1701 struct nicvf_rxq
*rxq
;
1703 PMD_INIT_FUNC_TRACE();
1705 /* This function must be called for a primary device */
1706 assert_primary(nic
);
1708 /* Validate RBDR buff size */
1709 for (qidx
= 0; qidx
< dev
->data
->nb_rx_queues
; qidx
++) {
1710 rxq
= dev
->data
->rx_queues
[qidx
];
1711 mbp_priv
= rte_mempool_get_priv(rxq
->pool
);
1712 buffsz
= mbp_priv
->mbuf_data_room_size
- RTE_PKTMBUF_HEADROOM
;
1714 PMD_INIT_LOG(ERR
, "rxbuf size must be multiply of 128");
1719 if (rbdrsz
!= buffsz
) {
1720 PMD_INIT_LOG(ERR
, "buffsz not same, qidx=%d (%d/%d)",
1721 qidx
, rbdrsz
, buffsz
);
1726 /* Configure loopback */
1727 ret
= nicvf_loopback_config(nic
, dev
->data
->dev_conf
.lpbk_mode
);
1729 PMD_INIT_LOG(ERR
, "Failed to configure loopback %d", ret
);
1733 /* Reset all statistics counters attached to this port */
1734 ret
= nicvf_mbox_reset_stat_counters(nic
, 0x3FFF, 0x1F, 0xFFFF, 0xFFFF);
1736 PMD_INIT_LOG(ERR
, "Failed to reset stat counters %d", ret
);
1740 /* Setup scatter mode if needed by jumbo */
1741 if (dev
->data
->dev_conf
.rxmode
.max_rx_pkt_len
+
1742 2 * VLAN_TAG_SIZE
> buffsz
)
1743 dev
->data
->scattered_rx
= 1;
1744 if ((rx_conf
->offloads
& DEV_RX_OFFLOAD_SCATTER
) != 0)
1745 dev
->data
->scattered_rx
= 1;
1747 /* Setup MTU based on max_rx_pkt_len or default */
1748 mtu
= dev
->data
->dev_conf
.rxmode
.offloads
& DEV_RX_OFFLOAD_JUMBO_FRAME
?
1749 dev
->data
->dev_conf
.rxmode
.max_rx_pkt_len
1750 - RTE_ETHER_HDR_LEN
: RTE_ETHER_MTU
;
1752 if (nicvf_dev_set_mtu(dev
, mtu
)) {
1753 PMD_INIT_LOG(ERR
, "Failed to set default mtu size");
1757 ret
= nicvf_vf_start(dev
, nic
, rbdrsz
);
1761 for (i
= 0; i
< nic
->sqs_count
; i
++) {
1762 assert(nic
->snicvf
[i
]);
1764 ret
= nicvf_vf_start(dev
, nic
->snicvf
[i
], rbdrsz
);
1769 /* Configure callbacks based on offloads */
1770 nicvf_set_tx_function(dev
);
1771 nicvf_set_rx_function(dev
);
1777 nicvf_dev_stop_cleanup(struct rte_eth_dev
*dev
, bool cleanup
)
1781 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1783 PMD_INIT_FUNC_TRACE();
1785 /* Teardown secondary vf first */
1786 for (i
= 0; i
< nic
->sqs_count
; i
++) {
1787 if (!nic
->snicvf
[i
])
1790 nicvf_vf_stop(dev
, nic
->snicvf
[i
], cleanup
);
1793 /* Stop the primary VF now */
1794 nicvf_vf_stop(dev
, nic
, cleanup
);
1796 /* Disable loopback */
1797 ret
= nicvf_loopback_config(nic
, 0);
1799 PMD_INIT_LOG(ERR
, "Failed to disable loopback %d", ret
);
1801 /* Reclaim CPI configuration */
1802 ret
= nicvf_mbox_config_cpi(nic
, 0);
1804 PMD_INIT_LOG(ERR
, "Failed to reclaim CPI config %d", ret
);
1808 nicvf_dev_stop(struct rte_eth_dev
*dev
)
1810 PMD_INIT_FUNC_TRACE();
1812 nicvf_dev_stop_cleanup(dev
, false);
1816 nicvf_vf_stop(struct rte_eth_dev
*dev
, struct nicvf
*nic
, bool cleanup
)
1820 uint16_t tx_start
, tx_end
;
1821 uint16_t rx_start
, rx_end
;
1823 PMD_INIT_FUNC_TRACE();
1826 /* Let PF make the BGX's RX and TX switches to OFF position */
1827 nicvf_mbox_shutdown(nic
);
1830 /* Disable VLAN Strip */
1831 nicvf_vlan_hw_strip(nic
, 0);
1833 /* Get queue ranges for this VF */
1834 nicvf_tx_range(dev
, nic
, &tx_start
, &tx_end
);
1836 for (qidx
= tx_start
; qidx
<= tx_end
; qidx
++)
1837 nicvf_vf_stop_tx_queue(dev
, nic
, qidx
% MAX_SND_QUEUES_PER_QS
);
1839 /* Get queue ranges for this VF */
1840 nicvf_rx_range(dev
, nic
, &rx_start
, &rx_end
);
1843 for (qidx
= rx_start
; qidx
<= rx_end
; qidx
++)
1844 nicvf_vf_stop_rx_queue(dev
, nic
, qidx
% MAX_RCV_QUEUES_PER_QS
);
1847 ret
= nicvf_qset_rbdr_reclaim(nic
, 0);
1849 PMD_INIT_LOG(ERR
, "Failed to reclaim RBDR %d", ret
);
1851 /* Move all charged buffers in RBDR back to pool */
1852 if (nic
->rbdr
!= NULL
)
1853 nicvf_rbdr_release_mbufs(dev
, nic
);
1856 ret
= nicvf_qset_reclaim(nic
);
1858 PMD_INIT_LOG(ERR
, "Failed to disable qset %d", ret
);
1860 /* Disable all interrupts */
1861 nicvf_disable_all_interrupts(nic
);
1863 /* Free RBDR SW structure */
1865 rte_free(nic
->rbdr
);
1871 nicvf_dev_close(struct rte_eth_dev
*dev
)
1874 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1876 PMD_INIT_FUNC_TRACE();
1878 nicvf_dev_stop_cleanup(dev
, true);
1879 nicvf_periodic_alarm_stop(nicvf_interrupt
, dev
);
1881 for (i
= 0; i
< nic
->sqs_count
; i
++) {
1882 if (!nic
->snicvf
[i
])
1885 nicvf_periodic_alarm_stop(nicvf_vf_interrupt
, nic
->snicvf
[i
]);
1890 nicvf_request_sqs(struct nicvf
*nic
)
1894 assert_primary(nic
);
1895 assert(nic
->sqs_count
> 0);
1896 assert(nic
->sqs_count
<= MAX_SQS_PER_VF
);
1898 /* Set no of Rx/Tx queues in each of the SQsets */
1899 for (i
= 0; i
< nic
->sqs_count
; i
++) {
1900 if (nicvf_svf_empty())
1901 rte_panic("Cannot assign sufficient number of "
1902 "secondary queues to primary VF%" PRIu8
"\n",
1905 nic
->snicvf
[i
] = nicvf_svf_pop();
1906 nic
->snicvf
[i
]->sqs_id
= i
;
1909 return nicvf_mbox_request_sqs(nic
);
1913 nicvf_dev_configure(struct rte_eth_dev
*dev
)
1915 struct rte_eth_dev_data
*data
= dev
->data
;
1916 struct rte_eth_conf
*conf
= &data
->dev_conf
;
1917 struct rte_eth_rxmode
*rxmode
= &conf
->rxmode
;
1918 struct rte_eth_txmode
*txmode
= &conf
->txmode
;
1919 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1922 PMD_INIT_FUNC_TRACE();
1924 if (rxmode
->mq_mode
& ETH_MQ_RX_RSS_FLAG
)
1925 rxmode
->offloads
|= DEV_RX_OFFLOAD_RSS_HASH
;
1927 if (!rte_eal_has_hugepages()) {
1928 PMD_INIT_LOG(INFO
, "Huge page is not configured");
1932 if (txmode
->mq_mode
) {
1933 PMD_INIT_LOG(INFO
, "Tx mq_mode DCB or VMDq not supported");
1937 if (rxmode
->mq_mode
!= ETH_MQ_RX_NONE
&&
1938 rxmode
->mq_mode
!= ETH_MQ_RX_RSS
) {
1939 PMD_INIT_LOG(INFO
, "Unsupported rx qmode %d", rxmode
->mq_mode
);
1943 if (rxmode
->split_hdr_size
) {
1944 PMD_INIT_LOG(INFO
, "Rxmode does not support split header");
1948 if (conf
->link_speeds
& ETH_LINK_SPEED_FIXED
) {
1949 PMD_INIT_LOG(INFO
, "Setting link speed/duplex not supported");
1953 if (conf
->dcb_capability_en
) {
1954 PMD_INIT_LOG(INFO
, "DCB enable not supported");
1958 if (conf
->fdir_conf
.mode
!= RTE_FDIR_MODE_NONE
) {
1959 PMD_INIT_LOG(INFO
, "Flow director not supported");
1963 assert_primary(nic
);
1964 NICVF_STATIC_ASSERT(MAX_RCV_QUEUES_PER_QS
== MAX_SND_QUEUES_PER_QS
);
1965 cqcount
= RTE_MAX(data
->nb_tx_queues
, data
->nb_rx_queues
);
1966 if (cqcount
> MAX_RCV_QUEUES_PER_QS
) {
1967 nic
->sqs_count
= RTE_ALIGN_CEIL(cqcount
, MAX_RCV_QUEUES_PER_QS
);
1968 nic
->sqs_count
= (nic
->sqs_count
/ MAX_RCV_QUEUES_PER_QS
) - 1;
1973 assert(nic
->sqs_count
<= MAX_SQS_PER_VF
);
1975 if (nic
->sqs_count
> 0) {
1976 if (nicvf_request_sqs(nic
)) {
1977 rte_panic("Cannot assign sufficient number of "
1978 "secondary queues to PORT%d VF%" PRIu8
"\n",
1979 dev
->data
->port_id
, nic
->vf_id
);
1983 if (rxmode
->offloads
& DEV_RX_OFFLOAD_CHECKSUM
)
1984 nic
->offload_cksum
= 1;
1986 PMD_INIT_LOG(DEBUG
, "Configured ethdev port%d hwcap=0x%" PRIx64
,
1987 dev
->data
->port_id
, nicvf_hw_cap(nic
));
1993 nicvf_dev_set_link_up(struct rte_eth_dev
*dev
)
1995 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
1998 rc
= nicvf_mbox_set_link_up_down(nic
, true);
2002 /* Start tx queues */
2003 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
2004 nicvf_dev_tx_queue_start(dev
, i
);
2011 nicvf_dev_set_link_down(struct rte_eth_dev
*dev
)
2013 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
2016 /* Stop tx queues */
2017 for (i
= 0; i
< dev
->data
->nb_tx_queues
; i
++)
2018 nicvf_dev_tx_queue_stop(dev
, i
);
2020 return nicvf_mbox_set_link_up_down(nic
, false);
2023 /* Initialize and register driver with DPDK Application */
2024 static const struct eth_dev_ops nicvf_eth_dev_ops
= {
2025 .dev_configure
= nicvf_dev_configure
,
2026 .dev_start
= nicvf_dev_start
,
2027 .dev_stop
= nicvf_dev_stop
,
2028 .link_update
= nicvf_dev_link_update
,
2029 .dev_close
= nicvf_dev_close
,
2030 .stats_get
= nicvf_dev_stats_get
,
2031 .stats_reset
= nicvf_dev_stats_reset
,
2032 .promiscuous_enable
= nicvf_dev_promisc_enable
,
2033 .dev_infos_get
= nicvf_dev_info_get
,
2034 .dev_supported_ptypes_get
= nicvf_dev_supported_ptypes_get
,
2035 .mtu_set
= nicvf_dev_set_mtu
,
2036 .vlan_offload_set
= nicvf_vlan_offload_set
,
2037 .reta_update
= nicvf_dev_reta_update
,
2038 .reta_query
= nicvf_dev_reta_query
,
2039 .rss_hash_update
= nicvf_dev_rss_hash_update
,
2040 .rss_hash_conf_get
= nicvf_dev_rss_hash_conf_get
,
2041 .rx_queue_start
= nicvf_dev_rx_queue_start
,
2042 .rx_queue_stop
= nicvf_dev_rx_queue_stop
,
2043 .tx_queue_start
= nicvf_dev_tx_queue_start
,
2044 .tx_queue_stop
= nicvf_dev_tx_queue_stop
,
2045 .rx_queue_setup
= nicvf_dev_rx_queue_setup
,
2046 .rx_queue_release
= nicvf_dev_rx_queue_release
,
2047 .rx_queue_count
= nicvf_dev_rx_queue_count
,
2048 .tx_queue_setup
= nicvf_dev_tx_queue_setup
,
2049 .tx_queue_release
= nicvf_dev_tx_queue_release
,
2050 .dev_set_link_up
= nicvf_dev_set_link_up
,
2051 .dev_set_link_down
= nicvf_dev_set_link_down
,
2052 .get_reg
= nicvf_dev_get_regs
,
2056 nicvf_vlan_offload_config(struct rte_eth_dev
*dev
, int mask
)
2058 struct rte_eth_rxmode
*rxmode
;
2059 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
2060 rxmode
= &dev
->data
->dev_conf
.rxmode
;
2061 if (mask
& ETH_VLAN_STRIP_MASK
) {
2062 if (rxmode
->offloads
& DEV_RX_OFFLOAD_VLAN_STRIP
)
2063 nicvf_vlan_hw_strip(nic
, true);
2065 nicvf_vlan_hw_strip(nic
, false);
2072 nicvf_vlan_offload_set(struct rte_eth_dev
*dev
, int mask
)
2074 nicvf_vlan_offload_config(dev
, mask
);
2080 nicvf_set_first_skip(struct rte_eth_dev
*dev
)
2082 int bytes_to_skip
= 0;
2085 struct rte_kvargs
*kvlist
;
2086 static const char *const skip
[] = {
2089 struct nicvf
*nic
= nicvf_pmd_priv(dev
);
2091 if (!dev
->device
->devargs
) {
2092 nicvf_first_skip_config(nic
, 0);
2096 kvlist
= rte_kvargs_parse(dev
->device
->devargs
->args
, skip
);
2100 if (kvlist
->count
== 0)
2103 for (i
= 0; i
!= kvlist
->count
; ++i
) {
2104 const struct rte_kvargs_pair
*pair
= &kvlist
->pairs
[i
];
2106 if (!strcmp(pair
->key
, SKIP_DATA_BYTES
))
2107 bytes_to_skip
= atoi(pair
->value
);
2110 /*128 bytes amounts to one cache line*/
2111 if (bytes_to_skip
>= 0 && bytes_to_skip
< 128) {
2112 if (!(bytes_to_skip
% 8)) {
2113 nicvf_first_skip_config(nic
, (bytes_to_skip
/ 8));
2114 nic
->skip_bytes
= bytes_to_skip
;
2117 PMD_INIT_LOG(ERR
, "skip_data_bytes should be multiple of 8");
2122 PMD_INIT_LOG(ERR
, "skip_data_bytes should be less than 128");
2127 nicvf_first_skip_config(nic
, 0);
2129 rte_kvargs_free(kvlist
);
2133 nicvf_eth_dev_uninit(struct rte_eth_dev
*dev
)
2135 PMD_INIT_FUNC_TRACE();
2137 if (rte_eal_process_type() == RTE_PROC_PRIMARY
)
2138 nicvf_dev_close(dev
);
2143 nicvf_eth_dev_init(struct rte_eth_dev
*eth_dev
)
2146 struct rte_pci_device
*pci_dev
;
2147 struct nicvf
*nic
= nicvf_pmd_priv(eth_dev
);
2149 PMD_INIT_FUNC_TRACE();
2151 eth_dev
->dev_ops
= &nicvf_eth_dev_ops
;
2153 /* For secondary processes, the primary has done all the work */
2154 if (rte_eal_process_type() != RTE_PROC_PRIMARY
) {
2156 /* Setup callbacks for secondary process */
2157 nicvf_set_tx_function(eth_dev
);
2158 nicvf_set_rx_function(eth_dev
);
2161 /* If nic == NULL than it is secondary function
2162 * so ethdev need to be released by caller */
2167 pci_dev
= RTE_ETH_DEV_TO_PCI(eth_dev
);
2168 rte_eth_copy_pci_info(eth_dev
, pci_dev
);
2170 nic
->device_id
= pci_dev
->id
.device_id
;
2171 nic
->vendor_id
= pci_dev
->id
.vendor_id
;
2172 nic
->subsystem_device_id
= pci_dev
->id
.subsystem_device_id
;
2173 nic
->subsystem_vendor_id
= pci_dev
->id
.subsystem_vendor_id
;
2175 PMD_INIT_LOG(DEBUG
, "nicvf: device (%x:%x) %u:%u:%u:%u",
2176 pci_dev
->id
.vendor_id
, pci_dev
->id
.device_id
,
2177 pci_dev
->addr
.domain
, pci_dev
->addr
.bus
,
2178 pci_dev
->addr
.devid
, pci_dev
->addr
.function
);
2180 nic
->reg_base
= (uintptr_t)pci_dev
->mem_resource
[0].addr
;
2181 if (!nic
->reg_base
) {
2182 PMD_INIT_LOG(ERR
, "Failed to map BAR0");
2187 nicvf_disable_all_interrupts(nic
);
2189 ret
= nicvf_periodic_alarm_start(nicvf_interrupt
, eth_dev
);
2191 PMD_INIT_LOG(ERR
, "Failed to start period alarm");
2195 ret
= nicvf_mbox_check_pf_ready(nic
);
2197 PMD_INIT_LOG(ERR
, "Failed to get ready message from PF");
2201 "node=%d vf=%d mode=%s sqs=%s loopback_supported=%s",
2202 nic
->node
, nic
->vf_id
,
2203 nic
->tns_mode
== NIC_TNS_MODE
? "tns" : "tns-bypass",
2204 nic
->sqs_mode
? "true" : "false",
2205 nic
->loopback_supported
? "true" : "false"
2209 ret
= nicvf_base_init(nic
);
2211 PMD_INIT_LOG(ERR
, "Failed to execute nicvf_base_init");
2215 if (nic
->sqs_mode
) {
2216 /* Push nic to stack of secondary vfs */
2217 nicvf_svf_push(nic
);
2219 /* Steal nic pointer from the device for further reuse */
2220 eth_dev
->data
->dev_private
= NULL
;
2222 nicvf_periodic_alarm_stop(nicvf_interrupt
, eth_dev
);
2223 ret
= nicvf_periodic_alarm_start(nicvf_vf_interrupt
, nic
);
2225 PMD_INIT_LOG(ERR
, "Failed to start period alarm");
2229 /* Detach port by returning positive error number */
2233 eth_dev
->data
->mac_addrs
= rte_zmalloc("mac_addr",
2234 RTE_ETHER_ADDR_LEN
, 0);
2235 if (eth_dev
->data
->mac_addrs
== NULL
) {
2236 PMD_INIT_LOG(ERR
, "Failed to allocate memory for mac addr");
2240 if (rte_is_zero_ether_addr((struct rte_ether_addr
*)nic
->mac_addr
))
2241 rte_eth_random_addr(&nic
->mac_addr
[0]);
2243 rte_ether_addr_copy((struct rte_ether_addr
*)nic
->mac_addr
,
2244 ð_dev
->data
->mac_addrs
[0]);
2246 ret
= nicvf_mbox_set_mac_addr(nic
, nic
->mac_addr
);
2248 PMD_INIT_LOG(ERR
, "Failed to set mac addr");
2252 ret
= nicvf_set_first_skip(eth_dev
);
2254 PMD_INIT_LOG(ERR
, "Failed to configure first skip");
2257 PMD_INIT_LOG(INFO
, "Port %d (%x:%x) mac=%02x:%02x:%02x:%02x:%02x:%02x",
2258 eth_dev
->data
->port_id
, nic
->vendor_id
, nic
->device_id
,
2259 nic
->mac_addr
[0], nic
->mac_addr
[1], nic
->mac_addr
[2],
2260 nic
->mac_addr
[3], nic
->mac_addr
[4], nic
->mac_addr
[5]);
2265 rte_free(eth_dev
->data
->mac_addrs
);
2266 eth_dev
->data
->mac_addrs
= NULL
;
2268 nicvf_periodic_alarm_stop(nicvf_interrupt
, eth_dev
);
2273 static const struct rte_pci_id pci_id_nicvf_map
[] = {
2275 .class_id
= RTE_CLASS_ANY_ID
,
2276 .vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2277 .device_id
= PCI_DEVICE_ID_THUNDERX_CN88XX_PASS1_NICVF
,
2278 .subsystem_vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2279 .subsystem_device_id
= PCI_SUB_DEVICE_ID_CN88XX_PASS1_NICVF
,
2282 .class_id
= RTE_CLASS_ANY_ID
,
2283 .vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2284 .device_id
= PCI_DEVICE_ID_THUNDERX_NICVF
,
2285 .subsystem_vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2286 .subsystem_device_id
= PCI_SUB_DEVICE_ID_CN88XX_PASS2_NICVF
,
2289 .class_id
= RTE_CLASS_ANY_ID
,
2290 .vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2291 .device_id
= PCI_DEVICE_ID_THUNDERX_NICVF
,
2292 .subsystem_vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2293 .subsystem_device_id
= PCI_SUB_DEVICE_ID_CN81XX_NICVF
,
2296 .class_id
= RTE_CLASS_ANY_ID
,
2297 .vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2298 .device_id
= PCI_DEVICE_ID_THUNDERX_NICVF
,
2299 .subsystem_vendor_id
= PCI_VENDOR_ID_CAVIUM
,
2300 .subsystem_device_id
= PCI_SUB_DEVICE_ID_CN83XX_NICVF
,
2307 static int nicvf_eth_pci_probe(struct rte_pci_driver
*pci_drv __rte_unused
,
2308 struct rte_pci_device
*pci_dev
)
2310 return rte_eth_dev_pci_generic_probe(pci_dev
, sizeof(struct nicvf
),
2311 nicvf_eth_dev_init
);
2314 static int nicvf_eth_pci_remove(struct rte_pci_device
*pci_dev
)
2316 return rte_eth_dev_pci_generic_remove(pci_dev
, nicvf_eth_dev_uninit
);
2319 static struct rte_pci_driver rte_nicvf_pmd
= {
2320 .id_table
= pci_id_nicvf_map
,
2321 .drv_flags
= RTE_PCI_DRV_NEED_MAPPING
| RTE_PCI_DRV_KEEP_MAPPED_RES
|
2322 RTE_PCI_DRV_INTR_LSC
,
2323 .probe
= nicvf_eth_pci_probe
,
2324 .remove
= nicvf_eth_pci_remove
,
2327 RTE_PMD_REGISTER_PCI(net_thunderx
, rte_nicvf_pmd
);
2328 RTE_PMD_REGISTER_PCI_TABLE(net_thunderx
, pci_id_nicvf_map
);
2329 RTE_PMD_REGISTER_KMOD_DEP(net_thunderx
, "* igb_uio | uio_pci_generic | vfio-pci");
2330 RTE_PMD_REGISTER_PARAM_STRING(net_thunderx
, SKIP_DATA_BYTES
"=<int>");