2 * Copyright (C) 2015-2017 Netronome Systems, Inc.
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
9 * The BSD 2-Clause License:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 * Netronome network device driver: Common functions between PF and VF
37 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
38 * Jason McMullan <jason.mcmullan@netronome.com>
39 * Rolf Neugebauer <rolf.neugebauer@netronome.com>
40 * Brad Petrus <brad.petrus@netronome.com>
41 * Chris Telfer <chris.telfer@netronome.com>
44 #include <linux/bpf.h>
45 #include <linux/bpf_trace.h>
46 #include <linux/module.h>
47 #include <linux/kernel.h>
48 #include <linux/init.h>
50 #include <linux/netdevice.h>
51 #include <linux/etherdevice.h>
52 #include <linux/interrupt.h>
54 #include <linux/ipv6.h>
55 #include <linux/page_ref.h>
56 #include <linux/pci.h>
57 #include <linux/pci_regs.h>
58 #include <linux/msi.h>
59 #include <linux/ethtool.h>
60 #include <linux/log2.h>
61 #include <linux/if_vlan.h>
62 #include <linux/random.h>
64 #include <linux/ktime.h>
66 #include <net/pkt_cls.h>
67 #include <net/vxlan.h>
69 #include "nfp_net_ctrl.h"
73 * nfp_net_get_fw_version() - Read and parse the FW version
74 * @fw_ver: Output fw_version structure to read to
75 * @ctrl_bar: Mapped address of the control BAR
77 void nfp_net_get_fw_version(struct nfp_net_fw_version
*fw_ver
,
78 void __iomem
*ctrl_bar
)
82 reg
= readl(ctrl_bar
+ NFP_NET_CFG_VERSION
);
83 put_unaligned_le32(reg
, fw_ver
);
87 nfp_net_dma_map_rx(struct nfp_net
*nn
, void *frag
, unsigned int bufsz
,
90 return dma_map_single(&nn
->pdev
->dev
, frag
+ NFP_NET_RX_BUF_HEADROOM
,
91 bufsz
- NFP_NET_RX_BUF_NON_DATA
, direction
);
95 nfp_net_dma_unmap_rx(struct nfp_net
*nn
, dma_addr_t dma_addr
,
96 unsigned int bufsz
, int direction
)
98 dma_unmap_single(&nn
->pdev
->dev
, dma_addr
,
99 bufsz
- NFP_NET_RX_BUF_NON_DATA
, direction
);
104 * Firmware reconfig may take a while so we have two versions of it -
105 * synchronous and asynchronous (posted). All synchronous callers are holding
106 * RTNL so we don't have to worry about serializing them.
108 static void nfp_net_reconfig_start(struct nfp_net
*nn
, u32 update
)
110 nn_writel(nn
, NFP_NET_CFG_UPDATE
, update
);
111 /* ensure update is written before pinging HW */
113 nfp_qcp_wr_ptr_add(nn
->qcp_cfg
, 1);
116 /* Pass 0 as update to run posted reconfigs. */
117 static void nfp_net_reconfig_start_async(struct nfp_net
*nn
, u32 update
)
119 update
|= nn
->reconfig_posted
;
120 nn
->reconfig_posted
= 0;
122 nfp_net_reconfig_start(nn
, update
);
124 nn
->reconfig_timer_active
= true;
125 mod_timer(&nn
->reconfig_timer
, jiffies
+ NFP_NET_POLL_TIMEOUT
* HZ
);
128 static bool nfp_net_reconfig_check_done(struct nfp_net
*nn
, bool last_check
)
132 reg
= nn_readl(nn
, NFP_NET_CFG_UPDATE
);
135 if (reg
& NFP_NET_CFG_UPDATE_ERR
) {
136 nn_err(nn
, "Reconfig error: 0x%08x\n", reg
);
138 } else if (last_check
) {
139 nn_err(nn
, "Reconfig timeout: 0x%08x\n", reg
);
146 static int nfp_net_reconfig_wait(struct nfp_net
*nn
, unsigned long deadline
)
148 bool timed_out
= false;
150 /* Poll update field, waiting for NFP to ack the config */
151 while (!nfp_net_reconfig_check_done(nn
, timed_out
)) {
153 timed_out
= time_is_before_eq_jiffies(deadline
);
156 if (nn_readl(nn
, NFP_NET_CFG_UPDATE
) & NFP_NET_CFG_UPDATE_ERR
)
159 return timed_out
? -EIO
: 0;
162 static void nfp_net_reconfig_timer(unsigned long data
)
164 struct nfp_net
*nn
= (void *)data
;
166 spin_lock_bh(&nn
->reconfig_lock
);
168 nn
->reconfig_timer_active
= false;
170 /* If sync caller is present it will take over from us */
171 if (nn
->reconfig_sync_present
)
174 /* Read reconfig status and report errors */
175 nfp_net_reconfig_check_done(nn
, true);
177 if (nn
->reconfig_posted
)
178 nfp_net_reconfig_start_async(nn
, 0);
180 spin_unlock_bh(&nn
->reconfig_lock
);
184 * nfp_net_reconfig_post() - Post async reconfig request
185 * @nn: NFP Net device to reconfigure
186 * @update: The value for the update field in the BAR config
188 * Record FW reconfiguration request. Reconfiguration will be kicked off
189 * whenever reconfiguration machinery is idle. Multiple requests can be
192 static void nfp_net_reconfig_post(struct nfp_net
*nn
, u32 update
)
194 spin_lock_bh(&nn
->reconfig_lock
);
196 /* Sync caller will kick off async reconf when it's done, just post */
197 if (nn
->reconfig_sync_present
) {
198 nn
->reconfig_posted
|= update
;
202 /* Opportunistically check if the previous command is done */
203 if (!nn
->reconfig_timer_active
||
204 nfp_net_reconfig_check_done(nn
, false))
205 nfp_net_reconfig_start_async(nn
, update
);
207 nn
->reconfig_posted
|= update
;
209 spin_unlock_bh(&nn
->reconfig_lock
);
213 * nfp_net_reconfig() - Reconfigure the firmware
214 * @nn: NFP Net device to reconfigure
215 * @update: The value for the update field in the BAR config
217 * Write the update word to the BAR and ping the reconfig queue. The
218 * poll until the firmware has acknowledged the update by zeroing the
221 * Return: Negative errno on error, 0 on success
223 int nfp_net_reconfig(struct nfp_net
*nn
, u32 update
)
225 bool cancelled_timer
= false;
226 u32 pre_posted_requests
;
229 spin_lock_bh(&nn
->reconfig_lock
);
231 nn
->reconfig_sync_present
= true;
233 if (nn
->reconfig_timer_active
) {
234 del_timer(&nn
->reconfig_timer
);
235 nn
->reconfig_timer_active
= false;
236 cancelled_timer
= true;
238 pre_posted_requests
= nn
->reconfig_posted
;
239 nn
->reconfig_posted
= 0;
241 spin_unlock_bh(&nn
->reconfig_lock
);
244 nfp_net_reconfig_wait(nn
, nn
->reconfig_timer
.expires
);
246 /* Run the posted reconfigs which were issued before we started */
247 if (pre_posted_requests
) {
248 nfp_net_reconfig_start(nn
, pre_posted_requests
);
249 nfp_net_reconfig_wait(nn
, jiffies
+ HZ
* NFP_NET_POLL_TIMEOUT
);
252 nfp_net_reconfig_start(nn
, update
);
253 ret
= nfp_net_reconfig_wait(nn
, jiffies
+ HZ
* NFP_NET_POLL_TIMEOUT
);
255 spin_lock_bh(&nn
->reconfig_lock
);
257 if (nn
->reconfig_posted
)
258 nfp_net_reconfig_start_async(nn
, 0);
260 nn
->reconfig_sync_present
= false;
262 spin_unlock_bh(&nn
->reconfig_lock
);
267 /* Interrupt configuration and handling
271 * nfp_net_irq_unmask() - Unmask automasked interrupt
272 * @nn: NFP Network structure
273 * @entry_nr: MSI-X table entry
275 * Clear the ICR for the IRQ entry.
277 static void nfp_net_irq_unmask(struct nfp_net
*nn
, unsigned int entry_nr
)
279 nn_writeb(nn
, NFP_NET_CFG_ICR(entry_nr
), NFP_NET_CFG_ICR_UNMASKED
);
284 * nfp_net_irqs_alloc() - allocates MSI-X irqs
285 * @pdev: PCI device structure
286 * @irq_entries: Array to be initialized and used to hold the irq entries
287 * @min_irqs: Minimal acceptable number of interrupts
288 * @wanted_irqs: Target number of interrupts to allocate
290 * Return: Number of irqs obtained or 0 on error.
293 nfp_net_irqs_alloc(struct pci_dev
*pdev
, struct msix_entry
*irq_entries
,
294 unsigned int min_irqs
, unsigned int wanted_irqs
)
299 for (i
= 0; i
< wanted_irqs
; i
++)
300 irq_entries
[i
].entry
= i
;
302 got_irqs
= pci_enable_msix_range(pdev
, irq_entries
,
303 min_irqs
, wanted_irqs
);
305 dev_err(&pdev
->dev
, "Failed to enable %d-%d MSI-X (err=%d)\n",
306 min_irqs
, wanted_irqs
, got_irqs
);
310 if (got_irqs
< wanted_irqs
)
311 dev_warn(&pdev
->dev
, "Unable to allocate %d IRQs got only %d\n",
312 wanted_irqs
, got_irqs
);
318 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
319 * @nn: NFP Network structure
320 * @irq_entries: Table of allocated interrupts
321 * @n: Size of @irq_entries (number of entries to grab)
323 * After interrupts are allocated with nfp_net_irqs_alloc() this function
324 * should be called to assign them to a specific netdev (port).
327 nfp_net_irqs_assign(struct nfp_net
*nn
, struct msix_entry
*irq_entries
,
330 nn
->max_r_vecs
= n
- NFP_NET_NON_Q_VECTORS
;
331 nn
->num_r_vecs
= nn
->max_r_vecs
;
333 memcpy(nn
->irq_entries
, irq_entries
, sizeof(*irq_entries
) * n
);
335 if (nn
->num_rx_rings
> nn
->num_r_vecs
||
336 nn
->num_tx_rings
> nn
->num_r_vecs
)
337 nn_warn(nn
, "More rings (%d,%d) than vectors (%d).\n",
338 nn
->num_rx_rings
, nn
->num_tx_rings
, nn
->num_r_vecs
);
340 nn
->num_rx_rings
= min(nn
->num_r_vecs
, nn
->num_rx_rings
);
341 nn
->num_tx_rings
= min(nn
->num_r_vecs
, nn
->num_tx_rings
);
342 nn
->num_stack_tx_rings
= nn
->num_tx_rings
;
346 * nfp_net_irqs_disable() - Disable interrupts
347 * @pdev: PCI device structure
349 * Undoes what @nfp_net_irqs_alloc() does.
351 void nfp_net_irqs_disable(struct pci_dev
*pdev
)
353 pci_disable_msix(pdev
);
357 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
359 * @data: Opaque data structure
361 * Return: Indicate if the interrupt has been handled.
363 static irqreturn_t
nfp_net_irq_rxtx(int irq
, void *data
)
365 struct nfp_net_r_vector
*r_vec
= data
;
367 napi_schedule_irqoff(&r_vec
->napi
);
369 /* The FW auto-masks any interrupt, either via the MASK bit in
370 * the MSI-X table or via the per entry ICR field. So there
371 * is no need to disable interrupts here.
377 * nfp_net_read_link_status() - Reread link status from control BAR
378 * @nn: NFP Network structure
380 static void nfp_net_read_link_status(struct nfp_net
*nn
)
386 spin_lock_irqsave(&nn
->link_status_lock
, flags
);
388 sts
= nn_readl(nn
, NFP_NET_CFG_STS
);
389 link_up
= !!(sts
& NFP_NET_CFG_STS_LINK
);
391 if (nn
->link_up
== link_up
)
394 nn
->link_up
= link_up
;
397 netif_carrier_on(nn
->netdev
);
398 netdev_info(nn
->netdev
, "NIC Link is Up\n");
400 netif_carrier_off(nn
->netdev
);
401 netdev_info(nn
->netdev
, "NIC Link is Down\n");
404 spin_unlock_irqrestore(&nn
->link_status_lock
, flags
);
408 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
410 * @data: Opaque data structure
412 * Return: Indicate if the interrupt has been handled.
414 static irqreturn_t
nfp_net_irq_lsc(int irq
, void *data
)
416 struct nfp_net
*nn
= data
;
417 struct msix_entry
*entry
;
419 entry
= &nn
->irq_entries
[NFP_NET_IRQ_LSC_IDX
];
421 nfp_net_read_link_status(nn
);
423 nfp_net_irq_unmask(nn
, entry
->entry
);
429 * nfp_net_irq_exn() - Interrupt service routine for exceptions
431 * @data: Opaque data structure
433 * Return: Indicate if the interrupt has been handled.
435 static irqreturn_t
nfp_net_irq_exn(int irq
, void *data
)
437 struct nfp_net
*nn
= data
;
439 nn_err(nn
, "%s: UNIMPLEMENTED.\n", __func__
);
440 /* XXX TO BE IMPLEMENTED */
445 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
446 * @tx_ring: TX ring structure
447 * @r_vec: IRQ vector servicing this ring
451 nfp_net_tx_ring_init(struct nfp_net_tx_ring
*tx_ring
,
452 struct nfp_net_r_vector
*r_vec
, unsigned int idx
)
454 struct nfp_net
*nn
= r_vec
->nfp_net
;
457 tx_ring
->r_vec
= r_vec
;
459 tx_ring
->qcidx
= tx_ring
->idx
* nn
->stride_tx
;
460 tx_ring
->qcp_q
= nn
->tx_bar
+ NFP_QCP_QUEUE_OFF(tx_ring
->qcidx
);
464 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
465 * @rx_ring: RX ring structure
466 * @r_vec: IRQ vector servicing this ring
470 nfp_net_rx_ring_init(struct nfp_net_rx_ring
*rx_ring
,
471 struct nfp_net_r_vector
*r_vec
, unsigned int idx
)
473 struct nfp_net
*nn
= r_vec
->nfp_net
;
476 rx_ring
->r_vec
= r_vec
;
478 rx_ring
->fl_qcidx
= rx_ring
->idx
* nn
->stride_rx
;
479 rx_ring
->rx_qcidx
= rx_ring
->fl_qcidx
+ (nn
->stride_rx
- 1);
481 rx_ring
->qcp_fl
= nn
->rx_bar
+ NFP_QCP_QUEUE_OFF(rx_ring
->fl_qcidx
);
482 rx_ring
->qcp_rx
= nn
->rx_bar
+ NFP_QCP_QUEUE_OFF(rx_ring
->rx_qcidx
);
486 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
487 * @netdev: netdev structure
489 static void nfp_net_vecs_init(struct net_device
*netdev
)
491 struct nfp_net
*nn
= netdev_priv(netdev
);
492 struct nfp_net_r_vector
*r_vec
;
495 nn
->lsc_handler
= nfp_net_irq_lsc
;
496 nn
->exn_handler
= nfp_net_irq_exn
;
498 for (r
= 0; r
< nn
->max_r_vecs
; r
++) {
499 struct msix_entry
*entry
;
501 entry
= &nn
->irq_entries
[NFP_NET_NON_Q_VECTORS
+ r
];
503 r_vec
= &nn
->r_vecs
[r
];
505 r_vec
->handler
= nfp_net_irq_rxtx
;
506 r_vec
->irq_entry
= entry
->entry
;
507 r_vec
->irq_vector
= entry
->vector
;
509 cpumask_set_cpu(r
, &r_vec
->affinity_mask
);
514 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
515 * @nn: NFP Network structure
516 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
517 * @format: printf-style format to construct the interrupt name
518 * @name: Pointer to allocated space for interrupt name
519 * @name_sz: Size of space for interrupt name
520 * @vector_idx: Index of MSI-X vector used for this interrupt
521 * @handler: IRQ handler to register for this interrupt
524 nfp_net_aux_irq_request(struct nfp_net
*nn
, u32 ctrl_offset
,
525 const char *format
, char *name
, size_t name_sz
,
526 unsigned int vector_idx
, irq_handler_t handler
)
528 struct msix_entry
*entry
;
531 entry
= &nn
->irq_entries
[vector_idx
];
533 snprintf(name
, name_sz
, format
, netdev_name(nn
->netdev
));
534 err
= request_irq(entry
->vector
, handler
, 0, name
, nn
);
536 nn_err(nn
, "Failed to request IRQ %d (err=%d).\n",
540 nn_writeb(nn
, ctrl_offset
, entry
->entry
);
546 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
547 * @nn: NFP Network structure
548 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
549 * @vector_idx: Index of MSI-X vector used for this interrupt
551 static void nfp_net_aux_irq_free(struct nfp_net
*nn
, u32 ctrl_offset
,
552 unsigned int vector_idx
)
554 nn_writeb(nn
, ctrl_offset
, 0xff);
555 free_irq(nn
->irq_entries
[vector_idx
].vector
, nn
);
560 * One queue controller peripheral queue is used for transmit. The
561 * driver en-queues packets for transmit by advancing the write
562 * pointer. The device indicates that packets have transmitted by
563 * advancing the read pointer. The driver maintains a local copy of
564 * the read and write pointer in @struct nfp_net_tx_ring. The driver
565 * keeps @wr_p in sync with the queue controller write pointer and can
566 * determine how many packets have been transmitted by comparing its
567 * copy of the read pointer @rd_p with the read pointer maintained by
568 * the queue controller peripheral.
572 * nfp_net_tx_full() - Check if the TX ring is full
573 * @tx_ring: TX ring to check
574 * @dcnt: Number of descriptors that need to be enqueued (must be >= 1)
576 * This function checks, based on the *host copy* of read/write
577 * pointer if a given TX ring is full. The real TX queue may have
578 * some newly made available slots.
580 * Return: True if the ring is full.
582 static int nfp_net_tx_full(struct nfp_net_tx_ring
*tx_ring
, int dcnt
)
584 return (tx_ring
->wr_p
- tx_ring
->rd_p
) >= (tx_ring
->cnt
- dcnt
);
587 /* Wrappers for deciding when to stop and restart TX queues */
588 static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring
*tx_ring
)
590 return !nfp_net_tx_full(tx_ring
, MAX_SKB_FRAGS
* 4);
593 static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring
*tx_ring
)
595 return nfp_net_tx_full(tx_ring
, MAX_SKB_FRAGS
+ 1);
599 * nfp_net_tx_ring_stop() - stop tx ring
600 * @nd_q: netdev queue
601 * @tx_ring: driver tx queue structure
603 * Safely stop TX ring. Remember that while we are running .start_xmit()
604 * someone else may be cleaning the TX ring completions so we need to be
605 * extra careful here.
607 static void nfp_net_tx_ring_stop(struct netdev_queue
*nd_q
,
608 struct nfp_net_tx_ring
*tx_ring
)
610 netif_tx_stop_queue(nd_q
);
612 /* We can race with the TX completion out of NAPI so recheck */
614 if (unlikely(nfp_net_tx_ring_should_wake(tx_ring
)))
615 netif_tx_start_queue(nd_q
);
619 * nfp_net_tx_tso() - Set up Tx descriptor for LSO
620 * @nn: NFP Net device
621 * @r_vec: per-ring structure
622 * @txbuf: Pointer to driver soft TX descriptor
623 * @txd: Pointer to HW TX descriptor
624 * @skb: Pointer to SKB
626 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
627 * Return error on packet header greater than maximum supported LSO header size.
629 static void nfp_net_tx_tso(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
,
630 struct nfp_net_tx_buf
*txbuf
,
631 struct nfp_net_tx_desc
*txd
, struct sk_buff
*skb
)
636 if (!skb_is_gso(skb
))
639 if (!skb
->encapsulation
)
640 hdrlen
= skb_transport_offset(skb
) + tcp_hdrlen(skb
);
642 hdrlen
= skb_inner_transport_header(skb
) - skb
->data
+
643 inner_tcp_hdrlen(skb
);
645 txbuf
->pkt_cnt
= skb_shinfo(skb
)->gso_segs
;
646 txbuf
->real_len
+= hdrlen
* (txbuf
->pkt_cnt
- 1);
648 mss
= skb_shinfo(skb
)->gso_size
& PCIE_DESC_TX_MSS_MASK
;
649 txd
->l4_offset
= hdrlen
;
650 txd
->mss
= cpu_to_le16(mss
);
651 txd
->flags
|= PCIE_DESC_TX_LSO
;
653 u64_stats_update_begin(&r_vec
->tx_sync
);
655 u64_stats_update_end(&r_vec
->tx_sync
);
659 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
660 * @nn: NFP Net device
661 * @r_vec: per-ring structure
662 * @txbuf: Pointer to driver soft TX descriptor
663 * @txd: Pointer to TX descriptor
664 * @skb: Pointer to SKB
666 * This function sets the TX checksum flags in the TX descriptor based
667 * on the configuration and the protocol of the packet to be transmitted.
669 static void nfp_net_tx_csum(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
,
670 struct nfp_net_tx_buf
*txbuf
,
671 struct nfp_net_tx_desc
*txd
, struct sk_buff
*skb
)
673 struct ipv6hdr
*ipv6h
;
677 if (!(nn
->ctrl
& NFP_NET_CFG_CTRL_TXCSUM
))
680 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
683 txd
->flags
|= PCIE_DESC_TX_CSUM
;
684 if (skb
->encapsulation
)
685 txd
->flags
|= PCIE_DESC_TX_ENCAP
;
687 iph
= skb
->encapsulation
? inner_ip_hdr(skb
) : ip_hdr(skb
);
688 ipv6h
= skb
->encapsulation
? inner_ipv6_hdr(skb
) : ipv6_hdr(skb
);
690 if (iph
->version
== 4) {
691 txd
->flags
|= PCIE_DESC_TX_IP4_CSUM
;
692 l4_hdr
= iph
->protocol
;
693 } else if (ipv6h
->version
== 6) {
694 l4_hdr
= ipv6h
->nexthdr
;
696 nn_warn_ratelimit(nn
, "partial checksum but ipv=%x!\n",
703 txd
->flags
|= PCIE_DESC_TX_TCP_CSUM
;
706 txd
->flags
|= PCIE_DESC_TX_UDP_CSUM
;
709 nn_warn_ratelimit(nn
, "partial checksum but l4 proto=%x!\n",
714 u64_stats_update_begin(&r_vec
->tx_sync
);
715 if (skb
->encapsulation
)
716 r_vec
->hw_csum_tx_inner
+= txbuf
->pkt_cnt
;
718 r_vec
->hw_csum_tx
+= txbuf
->pkt_cnt
;
719 u64_stats_update_end(&r_vec
->tx_sync
);
722 static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring
*tx_ring
)
725 nfp_qcp_wr_ptr_add(tx_ring
->qcp_q
, tx_ring
->wr_ptr_add
);
726 tx_ring
->wr_ptr_add
= 0;
730 * nfp_net_tx() - Main transmit entry point
731 * @skb: SKB to transmit
732 * @netdev: netdev structure
734 * Return: NETDEV_TX_OK on success.
736 static int nfp_net_tx(struct sk_buff
*skb
, struct net_device
*netdev
)
738 struct nfp_net
*nn
= netdev_priv(netdev
);
739 const struct skb_frag_struct
*frag
;
740 struct nfp_net_r_vector
*r_vec
;
741 struct nfp_net_tx_desc
*txd
, txdg
;
742 struct nfp_net_tx_buf
*txbuf
;
743 struct nfp_net_tx_ring
*tx_ring
;
744 struct netdev_queue
*nd_q
;
751 qidx
= skb_get_queue_mapping(skb
);
752 tx_ring
= &nn
->tx_rings
[qidx
];
753 r_vec
= tx_ring
->r_vec
;
754 nd_q
= netdev_get_tx_queue(nn
->netdev
, qidx
);
756 nr_frags
= skb_shinfo(skb
)->nr_frags
;
758 if (unlikely(nfp_net_tx_full(tx_ring
, nr_frags
+ 1))) {
759 nn_warn_ratelimit(nn
, "TX ring %d busy. wrp=%u rdp=%u\n",
760 qidx
, tx_ring
->wr_p
, tx_ring
->rd_p
);
761 netif_tx_stop_queue(nd_q
);
762 u64_stats_update_begin(&r_vec
->tx_sync
);
764 u64_stats_update_end(&r_vec
->tx_sync
);
765 return NETDEV_TX_BUSY
;
768 /* Start with the head skbuf */
769 dma_addr
= dma_map_single(&nn
->pdev
->dev
, skb
->data
, skb_headlen(skb
),
771 if (dma_mapping_error(&nn
->pdev
->dev
, dma_addr
))
774 wr_idx
= tx_ring
->wr_p
& (tx_ring
->cnt
- 1);
776 /* Stash the soft descriptor of the head then initialize it */
777 txbuf
= &tx_ring
->txbufs
[wr_idx
];
779 txbuf
->dma_addr
= dma_addr
;
782 txbuf
->real_len
= skb
->len
;
784 /* Build TX descriptor */
785 txd
= &tx_ring
->txds
[wr_idx
];
786 txd
->offset_eop
= (nr_frags
== 0) ? PCIE_DESC_TX_EOP
: 0;
787 txd
->dma_len
= cpu_to_le16(skb_headlen(skb
));
788 nfp_desc_set_dma_addr(txd
, dma_addr
);
789 txd
->data_len
= cpu_to_le16(skb
->len
);
795 nfp_net_tx_tso(nn
, r_vec
, txbuf
, txd
, skb
);
797 nfp_net_tx_csum(nn
, r_vec
, txbuf
, txd
, skb
);
799 if (skb_vlan_tag_present(skb
) && nn
->ctrl
& NFP_NET_CFG_CTRL_TXVLAN
) {
800 txd
->flags
|= PCIE_DESC_TX_VLAN
;
801 txd
->vlan
= cpu_to_le16(skb_vlan_tag_get(skb
));
806 /* all descs must match except for in addr, length and eop */
809 for (f
= 0; f
< nr_frags
; f
++) {
810 frag
= &skb_shinfo(skb
)->frags
[f
];
811 fsize
= skb_frag_size(frag
);
813 dma_addr
= skb_frag_dma_map(&nn
->pdev
->dev
, frag
, 0,
814 fsize
, DMA_TO_DEVICE
);
815 if (dma_mapping_error(&nn
->pdev
->dev
, dma_addr
))
818 wr_idx
= (wr_idx
+ 1) & (tx_ring
->cnt
- 1);
819 tx_ring
->txbufs
[wr_idx
].skb
= skb
;
820 tx_ring
->txbufs
[wr_idx
].dma_addr
= dma_addr
;
821 tx_ring
->txbufs
[wr_idx
].fidx
= f
;
823 txd
= &tx_ring
->txds
[wr_idx
];
825 txd
->dma_len
= cpu_to_le16(fsize
);
826 nfp_desc_set_dma_addr(txd
, dma_addr
);
828 (f
== nr_frags
- 1) ? PCIE_DESC_TX_EOP
: 0;
831 u64_stats_update_begin(&r_vec
->tx_sync
);
833 u64_stats_update_end(&r_vec
->tx_sync
);
836 netdev_tx_sent_queue(nd_q
, txbuf
->real_len
);
838 tx_ring
->wr_p
+= nr_frags
+ 1;
839 if (nfp_net_tx_ring_should_stop(tx_ring
))
840 nfp_net_tx_ring_stop(nd_q
, tx_ring
);
842 tx_ring
->wr_ptr_add
+= nr_frags
+ 1;
843 if (!skb
->xmit_more
|| netif_xmit_stopped(nd_q
))
844 nfp_net_tx_xmit_more_flush(tx_ring
);
846 skb_tx_timestamp(skb
);
853 frag
= &skb_shinfo(skb
)->frags
[f
];
854 dma_unmap_page(&nn
->pdev
->dev
,
855 tx_ring
->txbufs
[wr_idx
].dma_addr
,
856 skb_frag_size(frag
), DMA_TO_DEVICE
);
857 tx_ring
->txbufs
[wr_idx
].skb
= NULL
;
858 tx_ring
->txbufs
[wr_idx
].dma_addr
= 0;
859 tx_ring
->txbufs
[wr_idx
].fidx
= -2;
862 wr_idx
+= tx_ring
->cnt
;
864 dma_unmap_single(&nn
->pdev
->dev
, tx_ring
->txbufs
[wr_idx
].dma_addr
,
865 skb_headlen(skb
), DMA_TO_DEVICE
);
866 tx_ring
->txbufs
[wr_idx
].skb
= NULL
;
867 tx_ring
->txbufs
[wr_idx
].dma_addr
= 0;
868 tx_ring
->txbufs
[wr_idx
].fidx
= -2;
870 nn_warn_ratelimit(nn
, "Failed to map DMA TX buffer\n");
871 u64_stats_update_begin(&r_vec
->tx_sync
);
873 u64_stats_update_end(&r_vec
->tx_sync
);
874 dev_kfree_skb_any(skb
);
879 * nfp_net_tx_complete() - Handled completed TX packets
880 * @tx_ring: TX ring structure
882 * Return: Number of completed TX descriptors
884 static void nfp_net_tx_complete(struct nfp_net_tx_ring
*tx_ring
)
886 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
887 struct nfp_net
*nn
= r_vec
->nfp_net
;
888 const struct skb_frag_struct
*frag
;
889 struct netdev_queue
*nd_q
;
890 u32 done_pkts
= 0, done_bytes
= 0;
897 /* Work out how many descriptors have been transmitted */
898 qcp_rd_p
= nfp_qcp_rd_ptr_read(tx_ring
->qcp_q
);
900 if (qcp_rd_p
== tx_ring
->qcp_rd_p
)
903 if (qcp_rd_p
> tx_ring
->qcp_rd_p
)
904 todo
= qcp_rd_p
- tx_ring
->qcp_rd_p
;
906 todo
= qcp_rd_p
+ tx_ring
->cnt
- tx_ring
->qcp_rd_p
;
909 idx
= tx_ring
->rd_p
& (tx_ring
->cnt
- 1);
912 skb
= tx_ring
->txbufs
[idx
].skb
;
916 nr_frags
= skb_shinfo(skb
)->nr_frags
;
917 fidx
= tx_ring
->txbufs
[idx
].fidx
;
921 dma_unmap_single(&nn
->pdev
->dev
,
922 tx_ring
->txbufs
[idx
].dma_addr
,
923 skb_headlen(skb
), DMA_TO_DEVICE
);
925 done_pkts
+= tx_ring
->txbufs
[idx
].pkt_cnt
;
926 done_bytes
+= tx_ring
->txbufs
[idx
].real_len
;
929 frag
= &skb_shinfo(skb
)->frags
[fidx
];
930 dma_unmap_page(&nn
->pdev
->dev
,
931 tx_ring
->txbufs
[idx
].dma_addr
,
932 skb_frag_size(frag
), DMA_TO_DEVICE
);
935 /* check for last gather fragment */
936 if (fidx
== nr_frags
- 1)
937 dev_kfree_skb_any(skb
);
939 tx_ring
->txbufs
[idx
].dma_addr
= 0;
940 tx_ring
->txbufs
[idx
].skb
= NULL
;
941 tx_ring
->txbufs
[idx
].fidx
= -2;
944 tx_ring
->qcp_rd_p
= qcp_rd_p
;
946 u64_stats_update_begin(&r_vec
->tx_sync
);
947 r_vec
->tx_bytes
+= done_bytes
;
948 r_vec
->tx_pkts
+= done_pkts
;
949 u64_stats_update_end(&r_vec
->tx_sync
);
951 nd_q
= netdev_get_tx_queue(nn
->netdev
, tx_ring
->idx
);
952 netdev_tx_completed_queue(nd_q
, done_pkts
, done_bytes
);
953 if (nfp_net_tx_ring_should_wake(tx_ring
)) {
954 /* Make sure TX thread will see updated tx_ring->rd_p */
957 if (unlikely(netif_tx_queue_stopped(nd_q
)))
958 netif_tx_wake_queue(nd_q
);
961 WARN_ONCE(tx_ring
->wr_p
- tx_ring
->rd_p
> tx_ring
->cnt
,
962 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
963 tx_ring
->rd_p
, tx_ring
->wr_p
, tx_ring
->cnt
);
966 static void nfp_net_xdp_complete(struct nfp_net_tx_ring
*tx_ring
)
968 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
969 struct nfp_net
*nn
= r_vec
->nfp_net
;
970 u32 done_pkts
= 0, done_bytes
= 0;
974 /* Work out how many descriptors have been transmitted */
975 qcp_rd_p
= nfp_qcp_rd_ptr_read(tx_ring
->qcp_q
);
977 if (qcp_rd_p
== tx_ring
->qcp_rd_p
)
980 if (qcp_rd_p
> tx_ring
->qcp_rd_p
)
981 todo
= qcp_rd_p
- tx_ring
->qcp_rd_p
;
983 todo
= qcp_rd_p
+ tx_ring
->cnt
- tx_ring
->qcp_rd_p
;
986 idx
= tx_ring
->rd_p
& (tx_ring
->cnt
- 1);
989 if (!tx_ring
->txbufs
[idx
].frag
)
992 nfp_net_dma_unmap_rx(nn
, tx_ring
->txbufs
[idx
].dma_addr
,
993 nn
->fl_bufsz
, DMA_BIDIRECTIONAL
);
994 __free_page(virt_to_page(tx_ring
->txbufs
[idx
].frag
));
997 done_bytes
+= tx_ring
->txbufs
[idx
].real_len
;
999 tx_ring
->txbufs
[idx
].dma_addr
= 0;
1000 tx_ring
->txbufs
[idx
].frag
= NULL
;
1001 tx_ring
->txbufs
[idx
].fidx
= -2;
1004 tx_ring
->qcp_rd_p
= qcp_rd_p
;
1006 u64_stats_update_begin(&r_vec
->tx_sync
);
1007 r_vec
->tx_bytes
+= done_bytes
;
1008 r_vec
->tx_pkts
+= done_pkts
;
1009 u64_stats_update_end(&r_vec
->tx_sync
);
1011 WARN_ONCE(tx_ring
->wr_p
- tx_ring
->rd_p
> tx_ring
->cnt
,
1012 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
1013 tx_ring
->rd_p
, tx_ring
->wr_p
, tx_ring
->cnt
);
1017 * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
1018 * @nn: NFP Net device
1019 * @tx_ring: TX ring structure
1021 * Assumes that the device is stopped
1024 nfp_net_tx_ring_reset(struct nfp_net
*nn
, struct nfp_net_tx_ring
*tx_ring
)
1026 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
1027 const struct skb_frag_struct
*frag
;
1028 struct pci_dev
*pdev
= nn
->pdev
;
1029 struct netdev_queue
*nd_q
;
1031 while (tx_ring
->rd_p
!= tx_ring
->wr_p
) {
1032 struct nfp_net_tx_buf
*tx_buf
;
1035 idx
= tx_ring
->rd_p
& (tx_ring
->cnt
- 1);
1036 tx_buf
= &tx_ring
->txbufs
[idx
];
1038 if (tx_ring
== r_vec
->xdp_ring
) {
1039 nfp_net_dma_unmap_rx(nn
, tx_buf
->dma_addr
,
1040 nn
->fl_bufsz
, DMA_BIDIRECTIONAL
);
1041 __free_page(virt_to_page(tx_ring
->txbufs
[idx
].frag
));
1043 struct sk_buff
*skb
= tx_ring
->txbufs
[idx
].skb
;
1044 int nr_frags
= skb_shinfo(skb
)->nr_frags
;
1046 if (tx_buf
->fidx
== -1) {
1048 dma_unmap_single(&pdev
->dev
, tx_buf
->dma_addr
,
1052 /* unmap fragment */
1053 frag
= &skb_shinfo(skb
)->frags
[tx_buf
->fidx
];
1054 dma_unmap_page(&pdev
->dev
, tx_buf
->dma_addr
,
1055 skb_frag_size(frag
),
1059 /* check for last gather fragment */
1060 if (tx_buf
->fidx
== nr_frags
- 1)
1061 dev_kfree_skb_any(skb
);
1064 tx_buf
->dma_addr
= 0;
1068 tx_ring
->qcp_rd_p
++;
1072 memset(tx_ring
->txds
, 0, sizeof(*tx_ring
->txds
) * tx_ring
->cnt
);
1075 tx_ring
->qcp_rd_p
= 0;
1076 tx_ring
->wr_ptr_add
= 0;
1078 if (tx_ring
== r_vec
->xdp_ring
)
1081 nd_q
= netdev_get_tx_queue(nn
->netdev
, tx_ring
->idx
);
1082 netdev_tx_reset_queue(nd_q
);
1085 static void nfp_net_tx_timeout(struct net_device
*netdev
)
1087 struct nfp_net
*nn
= netdev_priv(netdev
);
1090 for (i
= 0; i
< nn
->netdev
->real_num_tx_queues
; i
++) {
1091 if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev
, i
)))
1093 nn_warn(nn
, "TX timeout on ring: %d\n", i
);
1095 nn_warn(nn
, "TX watchdog timeout\n");
1098 /* Receive processing
1101 nfp_net_calc_fl_bufsz(struct nfp_net
*nn
, unsigned int mtu
)
1103 unsigned int fl_bufsz
;
1105 fl_bufsz
= NFP_NET_RX_BUF_HEADROOM
;
1106 if (nn
->rx_offset
== NFP_NET_CFG_RX_OFFSET_DYNAMIC
)
1107 fl_bufsz
+= NFP_NET_MAX_PREPEND
;
1109 fl_bufsz
+= nn
->rx_offset
;
1110 fl_bufsz
+= ETH_HLEN
+ VLAN_HLEN
* 2 + mtu
;
1112 fl_bufsz
= SKB_DATA_ALIGN(fl_bufsz
);
1113 fl_bufsz
+= SKB_DATA_ALIGN(sizeof(struct skb_shared_info
));
1119 nfp_net_free_frag(void *frag
, bool xdp
)
1122 skb_free_frag(frag
);
1124 __free_page(virt_to_page(frag
));
1128 * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
1129 * @rx_ring: RX ring structure of the skb
1130 * @dma_addr: Pointer to storage for DMA address (output param)
1131 * @fl_bufsz: size of freelist buffers
1132 * @xdp: Whether XDP is enabled
1134 * This function will allcate a new page frag, map it for DMA.
1136 * Return: allocated page frag or NULL on failure.
1139 nfp_net_rx_alloc_one(struct nfp_net_rx_ring
*rx_ring
, dma_addr_t
*dma_addr
,
1140 unsigned int fl_bufsz
, bool xdp
)
1142 struct nfp_net
*nn
= rx_ring
->r_vec
->nfp_net
;
1147 frag
= netdev_alloc_frag(fl_bufsz
);
1149 frag
= page_address(alloc_page(GFP_KERNEL
| __GFP_COLD
));
1151 nn_warn_ratelimit(nn
, "Failed to alloc receive page frag\n");
1155 direction
= xdp
? DMA_BIDIRECTIONAL
: DMA_FROM_DEVICE
;
1157 *dma_addr
= nfp_net_dma_map_rx(nn
, frag
, fl_bufsz
, direction
);
1158 if (dma_mapping_error(&nn
->pdev
->dev
, *dma_addr
)) {
1159 nfp_net_free_frag(frag
, xdp
);
1160 nn_warn_ratelimit(nn
, "Failed to map DMA RX buffer\n");
1168 nfp_net_napi_alloc_one(struct nfp_net
*nn
, int direction
, dma_addr_t
*dma_addr
)
1173 frag
= napi_alloc_frag(nn
->fl_bufsz
);
1175 frag
= page_address(alloc_page(GFP_ATOMIC
| __GFP_COLD
));
1177 nn_warn_ratelimit(nn
, "Failed to alloc receive page frag\n");
1181 *dma_addr
= nfp_net_dma_map_rx(nn
, frag
, nn
->fl_bufsz
, direction
);
1182 if (dma_mapping_error(&nn
->pdev
->dev
, *dma_addr
)) {
1183 nfp_net_free_frag(frag
, nn
->xdp_prog
);
1184 nn_warn_ratelimit(nn
, "Failed to map DMA RX buffer\n");
1192 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
1193 * @rx_ring: RX ring structure
1194 * @frag: page fragment buffer
1195 * @dma_addr: DMA address of skb mapping
1197 static void nfp_net_rx_give_one(struct nfp_net_rx_ring
*rx_ring
,
1198 void *frag
, dma_addr_t dma_addr
)
1200 unsigned int wr_idx
;
1202 wr_idx
= rx_ring
->wr_p
& (rx_ring
->cnt
- 1);
1204 /* Stash SKB and DMA address away */
1205 rx_ring
->rxbufs
[wr_idx
].frag
= frag
;
1206 rx_ring
->rxbufs
[wr_idx
].dma_addr
= dma_addr
;
1208 /* Fill freelist descriptor */
1209 rx_ring
->rxds
[wr_idx
].fld
.reserved
= 0;
1210 rx_ring
->rxds
[wr_idx
].fld
.meta_len_dd
= 0;
1211 nfp_desc_set_dma_addr(&rx_ring
->rxds
[wr_idx
].fld
, dma_addr
);
1214 rx_ring
->wr_ptr_add
++;
1215 if (rx_ring
->wr_ptr_add
>= NFP_NET_FL_BATCH
) {
1216 /* Update write pointer of the freelist queue. Make
1217 * sure all writes are flushed before telling the hardware.
1220 nfp_qcp_wr_ptr_add(rx_ring
->qcp_fl
, rx_ring
->wr_ptr_add
);
1221 rx_ring
->wr_ptr_add
= 0;
1226 * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
1227 * @rx_ring: RX ring structure
1229 * Warning: Do *not* call if ring buffers were never put on the FW freelist
1230 * (i.e. device was not enabled)!
1232 static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring
*rx_ring
)
1234 unsigned int wr_idx
, last_idx
;
1236 /* Move the empty entry to the end of the list */
1237 wr_idx
= rx_ring
->wr_p
& (rx_ring
->cnt
- 1);
1238 last_idx
= rx_ring
->cnt
- 1;
1239 rx_ring
->rxbufs
[wr_idx
].dma_addr
= rx_ring
->rxbufs
[last_idx
].dma_addr
;
1240 rx_ring
->rxbufs
[wr_idx
].frag
= rx_ring
->rxbufs
[last_idx
].frag
;
1241 rx_ring
->rxbufs
[last_idx
].dma_addr
= 0;
1242 rx_ring
->rxbufs
[last_idx
].frag
= NULL
;
1244 memset(rx_ring
->rxds
, 0, sizeof(*rx_ring
->rxds
) * rx_ring
->cnt
);
1247 rx_ring
->wr_ptr_add
= 0;
1251 * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
1252 * @nn: NFP Net device
1253 * @rx_ring: RX ring to remove buffers from
1254 * @xdp: Whether XDP is enabled
1256 * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
1257 * entries. After device is disabled nfp_net_rx_ring_reset() must be called
1258 * to restore required ring geometry.
1261 nfp_net_rx_ring_bufs_free(struct nfp_net
*nn
, struct nfp_net_rx_ring
*rx_ring
,
1264 int direction
= xdp
? DMA_BIDIRECTIONAL
: DMA_FROM_DEVICE
;
1267 for (i
= 0; i
< rx_ring
->cnt
- 1; i
++) {
1268 /* NULL skb can only happen when initial filling of the ring
1269 * fails to allocate enough buffers and calls here to free
1270 * already allocated ones.
1272 if (!rx_ring
->rxbufs
[i
].frag
)
1275 nfp_net_dma_unmap_rx(nn
, rx_ring
->rxbufs
[i
].dma_addr
,
1276 rx_ring
->bufsz
, direction
);
1277 nfp_net_free_frag(rx_ring
->rxbufs
[i
].frag
, xdp
);
1278 rx_ring
->rxbufs
[i
].dma_addr
= 0;
1279 rx_ring
->rxbufs
[i
].frag
= NULL
;
1284 * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
1285 * @nn: NFP Net device
1286 * @rx_ring: RX ring to remove buffers from
1287 * @xdp: Whether XDP is enabled
1290 nfp_net_rx_ring_bufs_alloc(struct nfp_net
*nn
, struct nfp_net_rx_ring
*rx_ring
,
1293 struct nfp_net_rx_buf
*rxbufs
;
1296 rxbufs
= rx_ring
->rxbufs
;
1298 for (i
= 0; i
< rx_ring
->cnt
- 1; i
++) {
1300 nfp_net_rx_alloc_one(rx_ring
, &rxbufs
[i
].dma_addr
,
1301 rx_ring
->bufsz
, xdp
);
1302 if (!rxbufs
[i
].frag
) {
1303 nfp_net_rx_ring_bufs_free(nn
, rx_ring
, xdp
);
1312 * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
1313 * @rx_ring: RX ring to fill
1315 static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring
*rx_ring
)
1319 for (i
= 0; i
< rx_ring
->cnt
- 1; i
++)
1320 nfp_net_rx_give_one(rx_ring
, rx_ring
->rxbufs
[i
].frag
,
1321 rx_ring
->rxbufs
[i
].dma_addr
);
1325 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
1326 * @flags: RX descriptor flags field in CPU byte order
1328 static int nfp_net_rx_csum_has_errors(u16 flags
)
1330 u16 csum_all_checked
, csum_all_ok
;
1332 csum_all_checked
= flags
& __PCIE_DESC_RX_CSUM_ALL
;
1333 csum_all_ok
= flags
& __PCIE_DESC_RX_CSUM_ALL_OK
;
1335 return csum_all_checked
!= (csum_all_ok
<< PCIE_DESC_RX_CSUM_OK_SHIFT
);
1339 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
1340 * @nn: NFP Net device
1341 * @r_vec: per-ring structure
1342 * @rxd: Pointer to RX descriptor
1343 * @skb: Pointer to SKB
1345 static void nfp_net_rx_csum(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
,
1346 struct nfp_net_rx_desc
*rxd
, struct sk_buff
*skb
)
1348 skb_checksum_none_assert(skb
);
1350 if (!(nn
->netdev
->features
& NETIF_F_RXCSUM
))
1353 if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd
->rxd
.flags
))) {
1354 u64_stats_update_begin(&r_vec
->rx_sync
);
1355 r_vec
->hw_csum_rx_error
++;
1356 u64_stats_update_end(&r_vec
->rx_sync
);
1360 /* Assume that the firmware will never report inner CSUM_OK unless outer
1361 * L4 headers were successfully parsed. FW will always report zero UDP
1362 * checksum as CSUM_OK.
1364 if (rxd
->rxd
.flags
& PCIE_DESC_RX_TCP_CSUM_OK
||
1365 rxd
->rxd
.flags
& PCIE_DESC_RX_UDP_CSUM_OK
) {
1366 __skb_incr_checksum_unnecessary(skb
);
1367 u64_stats_update_begin(&r_vec
->rx_sync
);
1368 r_vec
->hw_csum_rx_ok
++;
1369 u64_stats_update_end(&r_vec
->rx_sync
);
1372 if (rxd
->rxd
.flags
& PCIE_DESC_RX_I_TCP_CSUM_OK
||
1373 rxd
->rxd
.flags
& PCIE_DESC_RX_I_UDP_CSUM_OK
) {
1374 __skb_incr_checksum_unnecessary(skb
);
1375 u64_stats_update_begin(&r_vec
->rx_sync
);
1376 r_vec
->hw_csum_rx_inner_ok
++;
1377 u64_stats_update_end(&r_vec
->rx_sync
);
1381 static void nfp_net_set_hash(struct net_device
*netdev
, struct sk_buff
*skb
,
1382 unsigned int type
, __be32
*hash
)
1384 if (!(netdev
->features
& NETIF_F_RXHASH
))
1388 case NFP_NET_RSS_IPV4
:
1389 case NFP_NET_RSS_IPV6
:
1390 case NFP_NET_RSS_IPV6_EX
:
1391 skb_set_hash(skb
, get_unaligned_be32(hash
), PKT_HASH_TYPE_L3
);
1394 skb_set_hash(skb
, get_unaligned_be32(hash
), PKT_HASH_TYPE_L4
);
1400 nfp_net_set_hash_desc(struct net_device
*netdev
, struct sk_buff
*skb
,
1401 struct nfp_net_rx_desc
*rxd
)
1403 struct nfp_net_rx_hash
*rx_hash
;
1405 if (!(rxd
->rxd
.flags
& PCIE_DESC_RX_RSS
))
1408 rx_hash
= (struct nfp_net_rx_hash
*)(skb
->data
- sizeof(*rx_hash
));
1410 nfp_net_set_hash(netdev
, skb
, get_unaligned_be32(&rx_hash
->hash_type
),
1415 nfp_net_parse_meta(struct net_device
*netdev
, struct sk_buff
*skb
,
1418 u8
*data
= skb
->data
- meta_len
;
1421 meta_info
= get_unaligned_be32(data
);
1425 switch (meta_info
& NFP_NET_META_FIELD_MASK
) {
1426 case NFP_NET_META_HASH
:
1427 meta_info
>>= NFP_NET_META_FIELD_SIZE
;
1428 nfp_net_set_hash(netdev
, skb
,
1429 meta_info
& NFP_NET_META_FIELD_MASK
,
1433 case NFP_NET_META_MARK
:
1434 skb
->mark
= get_unaligned_be32(data
);
1441 meta_info
>>= NFP_NET_META_FIELD_SIZE
;
1448 nfp_net_rx_drop(struct nfp_net_r_vector
*r_vec
, struct nfp_net_rx_ring
*rx_ring
,
1449 struct nfp_net_rx_buf
*rxbuf
, struct sk_buff
*skb
)
1451 u64_stats_update_begin(&r_vec
->rx_sync
);
1453 u64_stats_update_end(&r_vec
->rx_sync
);
1455 /* skb is build based on the frag, free_skb() would free the frag
1456 * so to be able to reuse it we need an extra ref.
1458 if (skb
&& rxbuf
&& skb
->head
== rxbuf
->frag
)
1459 page_ref_inc(virt_to_head_page(rxbuf
->frag
));
1461 nfp_net_rx_give_one(rx_ring
, rxbuf
->frag
, rxbuf
->dma_addr
);
1463 dev_kfree_skb_any(skb
);
1467 nfp_net_tx_xdp_buf(struct nfp_net
*nn
, struct nfp_net_rx_ring
*rx_ring
,
1468 struct nfp_net_tx_ring
*tx_ring
,
1469 struct nfp_net_rx_buf
*rxbuf
, unsigned int pkt_off
,
1470 unsigned int pkt_len
)
1472 struct nfp_net_tx_buf
*txbuf
;
1473 struct nfp_net_tx_desc
*txd
;
1474 dma_addr_t new_dma_addr
;
1478 if (unlikely(nfp_net_tx_full(tx_ring
, 1))) {
1479 nfp_net_rx_drop(rx_ring
->r_vec
, rx_ring
, rxbuf
, NULL
);
1483 new_frag
= nfp_net_napi_alloc_one(nn
, DMA_BIDIRECTIONAL
, &new_dma_addr
);
1484 if (unlikely(!new_frag
)) {
1485 nfp_net_rx_drop(rx_ring
->r_vec
, rx_ring
, rxbuf
, NULL
);
1488 nfp_net_rx_give_one(rx_ring
, new_frag
, new_dma_addr
);
1490 wr_idx
= tx_ring
->wr_p
& (tx_ring
->cnt
- 1);
1492 /* Stash the soft descriptor of the head then initialize it */
1493 txbuf
= &tx_ring
->txbufs
[wr_idx
];
1494 txbuf
->frag
= rxbuf
->frag
;
1495 txbuf
->dma_addr
= rxbuf
->dma_addr
;
1498 txbuf
->real_len
= pkt_len
;
1500 dma_sync_single_for_device(&nn
->pdev
->dev
, rxbuf
->dma_addr
+ pkt_off
,
1501 pkt_len
, DMA_BIDIRECTIONAL
);
1503 /* Build TX descriptor */
1504 txd
= &tx_ring
->txds
[wr_idx
];
1505 txd
->offset_eop
= PCIE_DESC_TX_EOP
;
1506 txd
->dma_len
= cpu_to_le16(pkt_len
);
1507 nfp_desc_set_dma_addr(txd
, rxbuf
->dma_addr
+ pkt_off
);
1508 txd
->data_len
= cpu_to_le16(pkt_len
);
1515 tx_ring
->wr_ptr_add
++;
1519 static int nfp_net_run_xdp(struct bpf_prog
*prog
, void *data
, unsigned int len
)
1521 struct xdp_buff xdp
;
1524 xdp
.data_end
= data
+ len
;
1526 return bpf_prog_run_xdp(prog
, &xdp
);
1530 * nfp_net_rx() - receive up to @budget packets on @rx_ring
1531 * @rx_ring: RX ring to receive from
1532 * @budget: NAPI budget
1534 * Note, this function is separated out from the napi poll function to
1535 * more cleanly separate packet receive code from other bookkeeping
1536 * functions performed in the napi poll function.
1538 * Return: Number of packets received.
1540 static int nfp_net_rx(struct nfp_net_rx_ring
*rx_ring
, int budget
)
1542 struct nfp_net_r_vector
*r_vec
= rx_ring
->r_vec
;
1543 struct nfp_net
*nn
= r_vec
->nfp_net
;
1544 struct nfp_net_tx_ring
*tx_ring
;
1545 struct bpf_prog
*xdp_prog
;
1546 unsigned int true_bufsz
;
1547 struct sk_buff
*skb
;
1548 int pkts_polled
= 0;
1553 xdp_prog
= READ_ONCE(nn
->xdp_prog
);
1554 rx_dma_map_dir
= xdp_prog
? DMA_BIDIRECTIONAL
: DMA_FROM_DEVICE
;
1555 true_bufsz
= xdp_prog
? PAGE_SIZE
: nn
->fl_bufsz
;
1556 tx_ring
= r_vec
->xdp_ring
;
1558 while (pkts_polled
< budget
) {
1559 unsigned int meta_len
, data_len
, data_off
, pkt_len
, pkt_off
;
1560 struct nfp_net_rx_buf
*rxbuf
;
1561 struct nfp_net_rx_desc
*rxd
;
1562 dma_addr_t new_dma_addr
;
1565 idx
= rx_ring
->rd_p
& (rx_ring
->cnt
- 1);
1567 rxd
= &rx_ring
->rxds
[idx
];
1568 if (!(rxd
->rxd
.meta_len_dd
& PCIE_DESC_RX_DD
))
1571 /* Memory barrier to ensure that we won't do other reads
1572 * before the DD bit.
1579 rxbuf
= &rx_ring
->rxbufs
[idx
];
1581 * <-- [rx_offset] -->
1582 * ---------------------------------------------------------
1583 * | [XX] | metadata | packet | XXXX |
1584 * ---------------------------------------------------------
1585 * <---------------- data_len --------------->
1587 * The rx_offset is fixed for all packets, the meta_len can vary
1588 * on a packet by packet basis. If rx_offset is set to zero
1589 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1590 * buffer and is immediately followed by the packet (no [XX]).
1592 meta_len
= rxd
->rxd
.meta_len_dd
& PCIE_DESC_RX_META_LEN_MASK
;
1593 data_len
= le16_to_cpu(rxd
->rxd
.data_len
);
1594 pkt_len
= data_len
- meta_len
;
1596 if (nn
->rx_offset
== NFP_NET_CFG_RX_OFFSET_DYNAMIC
)
1599 pkt_off
= nn
->rx_offset
;
1600 data_off
= NFP_NET_RX_BUF_HEADROOM
+ pkt_off
;
1603 u64_stats_update_begin(&r_vec
->rx_sync
);
1605 r_vec
->rx_bytes
+= pkt_len
;
1606 u64_stats_update_end(&r_vec
->rx_sync
);
1608 if (xdp_prog
&& !(rxd
->rxd
.flags
& PCIE_DESC_RX_BPF
&&
1609 nn
->bpf_offload_xdp
)) {
1612 dma_sync_single_for_cpu(&nn
->pdev
->dev
,
1613 rxbuf
->dma_addr
+ pkt_off
,
1614 pkt_len
, DMA_BIDIRECTIONAL
);
1615 act
= nfp_net_run_xdp(xdp_prog
, rxbuf
->frag
+ data_off
,
1621 if (unlikely(!nfp_net_tx_xdp_buf(nn
, rx_ring
,
1624 trace_xdp_exception(nn
->netdev
, xdp_prog
, act
);
1627 bpf_warn_invalid_xdp_action(act
);
1629 trace_xdp_exception(nn
->netdev
, xdp_prog
, act
);
1631 nfp_net_rx_give_one(rx_ring
, rxbuf
->frag
,
1637 skb
= build_skb(rxbuf
->frag
, true_bufsz
);
1638 if (unlikely(!skb
)) {
1639 nfp_net_rx_drop(r_vec
, rx_ring
, rxbuf
, NULL
);
1642 new_frag
= nfp_net_napi_alloc_one(nn
, rx_dma_map_dir
,
1644 if (unlikely(!new_frag
)) {
1645 nfp_net_rx_drop(r_vec
, rx_ring
, rxbuf
, skb
);
1649 nfp_net_dma_unmap_rx(nn
, rxbuf
->dma_addr
, nn
->fl_bufsz
,
1652 nfp_net_rx_give_one(rx_ring
, new_frag
, new_dma_addr
);
1654 skb_reserve(skb
, data_off
);
1655 skb_put(skb
, pkt_len
);
1657 if (nn
->fw_ver
.major
<= 3) {
1658 nfp_net_set_hash_desc(nn
->netdev
, skb
, rxd
);
1659 } else if (meta_len
) {
1662 end
= nfp_net_parse_meta(nn
->netdev
, skb
, meta_len
);
1663 if (unlikely(end
!= skb
->data
)) {
1664 nn_warn_ratelimit(nn
, "invalid RX packet metadata\n");
1665 nfp_net_rx_drop(r_vec
, rx_ring
, NULL
, skb
);
1670 skb_record_rx_queue(skb
, rx_ring
->idx
);
1671 skb
->protocol
= eth_type_trans(skb
, nn
->netdev
);
1673 nfp_net_rx_csum(nn
, r_vec
, rxd
, skb
);
1675 if (rxd
->rxd
.flags
& PCIE_DESC_RX_VLAN
)
1676 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
),
1677 le16_to_cpu(rxd
->rxd
.vlan
));
1679 napi_gro_receive(&rx_ring
->r_vec
->napi
, skb
);
1682 if (xdp_prog
&& tx_ring
->wr_ptr_add
)
1683 nfp_net_tx_xmit_more_flush(tx_ring
);
1690 * nfp_net_poll() - napi poll function
1691 * @napi: NAPI structure
1692 * @budget: NAPI budget
1694 * Return: number of packets polled.
1696 static int nfp_net_poll(struct napi_struct
*napi
, int budget
)
1698 struct nfp_net_r_vector
*r_vec
=
1699 container_of(napi
, struct nfp_net_r_vector
, napi
);
1700 unsigned int pkts_polled
= 0;
1703 nfp_net_tx_complete(r_vec
->tx_ring
);
1704 if (r_vec
->rx_ring
) {
1705 pkts_polled
= nfp_net_rx(r_vec
->rx_ring
, budget
);
1706 if (r_vec
->xdp_ring
)
1707 nfp_net_xdp_complete(r_vec
->xdp_ring
);
1710 if (pkts_polled
< budget
) {
1711 napi_complete_done(napi
, pkts_polled
);
1712 nfp_net_irq_unmask(r_vec
->nfp_net
, r_vec
->irq_entry
);
1718 /* Setup and Configuration
1722 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
1723 * @tx_ring: TX ring to free
1725 static void nfp_net_tx_ring_free(struct nfp_net_tx_ring
*tx_ring
)
1727 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
1728 struct nfp_net
*nn
= r_vec
->nfp_net
;
1729 struct pci_dev
*pdev
= nn
->pdev
;
1731 kfree(tx_ring
->txbufs
);
1734 dma_free_coherent(&pdev
->dev
, tx_ring
->size
,
1735 tx_ring
->txds
, tx_ring
->dma
);
1738 tx_ring
->txbufs
= NULL
;
1739 tx_ring
->txds
= NULL
;
1745 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
1746 * @tx_ring: TX Ring structure to allocate
1747 * @cnt: Ring buffer count
1748 * @is_xdp: True if ring will be used for XDP
1750 * Return: 0 on success, negative errno otherwise.
1753 nfp_net_tx_ring_alloc(struct nfp_net_tx_ring
*tx_ring
, u32 cnt
, bool is_xdp
)
1755 struct nfp_net_r_vector
*r_vec
= tx_ring
->r_vec
;
1756 struct nfp_net
*nn
= r_vec
->nfp_net
;
1757 struct pci_dev
*pdev
= nn
->pdev
;
1762 tx_ring
->size
= sizeof(*tx_ring
->txds
) * tx_ring
->cnt
;
1763 tx_ring
->txds
= dma_zalloc_coherent(&pdev
->dev
, tx_ring
->size
,
1764 &tx_ring
->dma
, GFP_KERNEL
);
1768 sz
= sizeof(*tx_ring
->txbufs
) * tx_ring
->cnt
;
1769 tx_ring
->txbufs
= kzalloc(sz
, GFP_KERNEL
);
1770 if (!tx_ring
->txbufs
)
1774 netif_set_xps_queue(nn
->netdev
, &r_vec
->affinity_mask
,
1777 nn_dbg(nn
, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n",
1778 tx_ring
->idx
, tx_ring
->qcidx
,
1779 tx_ring
->cnt
, (unsigned long long)tx_ring
->dma
, tx_ring
->txds
,
1780 is_xdp
? "XDP" : "");
1785 nfp_net_tx_ring_free(tx_ring
);
1789 static struct nfp_net_tx_ring
*
1790 nfp_net_tx_ring_set_prepare(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
,
1791 unsigned int num_stack_tx_rings
)
1793 struct nfp_net_tx_ring
*rings
;
1796 rings
= kcalloc(s
->n_rings
, sizeof(*rings
), GFP_KERNEL
);
1800 for (r
= 0; r
< s
->n_rings
; r
++) {
1803 if (r
>= num_stack_tx_rings
)
1804 bias
= num_stack_tx_rings
;
1806 nfp_net_tx_ring_init(&rings
[r
], &nn
->r_vecs
[r
- bias
], r
);
1808 if (nfp_net_tx_ring_alloc(&rings
[r
], s
->dcnt
, bias
))
1812 return s
->rings
= rings
;
1816 nfp_net_tx_ring_free(&rings
[r
]);
1822 nfp_net_tx_ring_set_swap(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
)
1824 struct nfp_net_ring_set
new = *s
;
1826 s
->dcnt
= nn
->txd_cnt
;
1827 s
->rings
= nn
->tx_rings
;
1828 s
->n_rings
= nn
->num_tx_rings
;
1830 nn
->txd_cnt
= new.dcnt
;
1831 nn
->tx_rings
= new.rings
;
1832 nn
->num_tx_rings
= new.n_rings
;
1836 nfp_net_tx_ring_set_free(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
)
1838 struct nfp_net_tx_ring
*rings
= s
->rings
;
1841 for (r
= 0; r
< s
->n_rings
; r
++)
1842 nfp_net_tx_ring_free(&rings
[r
]);
1848 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
1849 * @rx_ring: RX ring to free
1851 static void nfp_net_rx_ring_free(struct nfp_net_rx_ring
*rx_ring
)
1853 struct nfp_net_r_vector
*r_vec
= rx_ring
->r_vec
;
1854 struct nfp_net
*nn
= r_vec
->nfp_net
;
1855 struct pci_dev
*pdev
= nn
->pdev
;
1857 kfree(rx_ring
->rxbufs
);
1860 dma_free_coherent(&pdev
->dev
, rx_ring
->size
,
1861 rx_ring
->rxds
, rx_ring
->dma
);
1864 rx_ring
->rxbufs
= NULL
;
1865 rx_ring
->rxds
= NULL
;
1871 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
1872 * @rx_ring: RX ring to allocate
1873 * @fl_bufsz: Size of buffers to allocate
1874 * @cnt: Ring buffer count
1876 * Return: 0 on success, negative errno otherwise.
1879 nfp_net_rx_ring_alloc(struct nfp_net_rx_ring
*rx_ring
, unsigned int fl_bufsz
,
1882 struct nfp_net_r_vector
*r_vec
= rx_ring
->r_vec
;
1883 struct nfp_net
*nn
= r_vec
->nfp_net
;
1884 struct pci_dev
*pdev
= nn
->pdev
;
1888 rx_ring
->bufsz
= fl_bufsz
;
1890 rx_ring
->size
= sizeof(*rx_ring
->rxds
) * rx_ring
->cnt
;
1891 rx_ring
->rxds
= dma_zalloc_coherent(&pdev
->dev
, rx_ring
->size
,
1892 &rx_ring
->dma
, GFP_KERNEL
);
1896 sz
= sizeof(*rx_ring
->rxbufs
) * rx_ring
->cnt
;
1897 rx_ring
->rxbufs
= kzalloc(sz
, GFP_KERNEL
);
1898 if (!rx_ring
->rxbufs
)
1901 nn_dbg(nn
, "RxQ%02d: FlQCidx=%02d RxQCidx=%02d cnt=%d dma=%#llx host=%p\n",
1902 rx_ring
->idx
, rx_ring
->fl_qcidx
, rx_ring
->rx_qcidx
,
1903 rx_ring
->cnt
, (unsigned long long)rx_ring
->dma
, rx_ring
->rxds
);
1908 nfp_net_rx_ring_free(rx_ring
);
1912 static struct nfp_net_rx_ring
*
1913 nfp_net_rx_ring_set_prepare(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
,
1916 unsigned int fl_bufsz
= nfp_net_calc_fl_bufsz(nn
, s
->mtu
);
1917 struct nfp_net_rx_ring
*rings
;
1920 rings
= kcalloc(s
->n_rings
, sizeof(*rings
), GFP_KERNEL
);
1924 for (r
= 0; r
< s
->n_rings
; r
++) {
1925 nfp_net_rx_ring_init(&rings
[r
], &nn
->r_vecs
[r
], r
);
1927 if (nfp_net_rx_ring_alloc(&rings
[r
], fl_bufsz
, s
->dcnt
))
1930 if (nfp_net_rx_ring_bufs_alloc(nn
, &rings
[r
], xdp
))
1934 return s
->rings
= rings
;
1938 nfp_net_rx_ring_bufs_free(nn
, &rings
[r
], xdp
);
1940 nfp_net_rx_ring_free(&rings
[r
]);
1947 nfp_net_rx_ring_set_swap(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
)
1949 struct nfp_net_ring_set
new = *s
;
1951 s
->mtu
= nn
->netdev
->mtu
;
1952 s
->dcnt
= nn
->rxd_cnt
;
1953 s
->rings
= nn
->rx_rings
;
1954 s
->n_rings
= nn
->num_rx_rings
;
1956 nn
->netdev
->mtu
= new.mtu
;
1957 nn
->fl_bufsz
= nfp_net_calc_fl_bufsz(nn
, new.mtu
);
1958 nn
->rxd_cnt
= new.dcnt
;
1959 nn
->rx_rings
= new.rings
;
1960 nn
->num_rx_rings
= new.n_rings
;
1964 nfp_net_rx_ring_set_free(struct nfp_net
*nn
, struct nfp_net_ring_set
*s
,
1967 struct nfp_net_rx_ring
*rings
= s
->rings
;
1970 for (r
= 0; r
< s
->n_rings
; r
++) {
1971 nfp_net_rx_ring_bufs_free(nn
, &rings
[r
], xdp
);
1972 nfp_net_rx_ring_free(&rings
[r
]);
1979 nfp_net_vector_assign_rings(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
,
1982 r_vec
->rx_ring
= idx
< nn
->num_rx_rings
? &nn
->rx_rings
[idx
] : NULL
;
1984 idx
< nn
->num_stack_tx_rings
? &nn
->tx_rings
[idx
] : NULL
;
1986 r_vec
->xdp_ring
= idx
< nn
->num_tx_rings
- nn
->num_stack_tx_rings
?
1987 &nn
->tx_rings
[nn
->num_stack_tx_rings
+ idx
] : NULL
;
1991 nfp_net_prepare_vector(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
,
1997 netif_napi_add(nn
->netdev
, &r_vec
->napi
,
1998 nfp_net_poll
, NAPI_POLL_WEIGHT
);
2000 snprintf(r_vec
->name
, sizeof(r_vec
->name
),
2001 "%s-rxtx-%d", nn
->netdev
->name
, idx
);
2002 err
= request_irq(r_vec
->irq_vector
, r_vec
->handler
, 0, r_vec
->name
,
2005 netif_napi_del(&r_vec
->napi
);
2006 nn_err(nn
, "Error requesting IRQ %d\n", r_vec
->irq_vector
);
2009 disable_irq(r_vec
->irq_vector
);
2011 irq_set_affinity_hint(r_vec
->irq_vector
, &r_vec
->affinity_mask
);
2013 nn_dbg(nn
, "RV%02d: irq=%03d/%03d\n", idx
, r_vec
->irq_vector
,
2020 nfp_net_cleanup_vector(struct nfp_net
*nn
, struct nfp_net_r_vector
*r_vec
)
2022 irq_set_affinity_hint(r_vec
->irq_vector
, NULL
);
2023 netif_napi_del(&r_vec
->napi
);
2024 free_irq(r_vec
->irq_vector
, r_vec
);
2028 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
2029 * @nn: NFP Net device to reconfigure
2031 void nfp_net_rss_write_itbl(struct nfp_net
*nn
)
2035 for (i
= 0; i
< NFP_NET_CFG_RSS_ITBL_SZ
; i
+= 4)
2036 nn_writel(nn
, NFP_NET_CFG_RSS_ITBL
+ i
,
2037 get_unaligned_le32(nn
->rss_itbl
+ i
));
2041 * nfp_net_rss_write_key() - Write RSS hash key to device
2042 * @nn: NFP Net device to reconfigure
2044 void nfp_net_rss_write_key(struct nfp_net
*nn
)
2048 for (i
= 0; i
< NFP_NET_CFG_RSS_KEY_SZ
; i
+= 4)
2049 nn_writel(nn
, NFP_NET_CFG_RSS_KEY
+ i
,
2050 get_unaligned_le32(nn
->rss_key
+ i
));
2054 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
2055 * @nn: NFP Net device to reconfigure
2057 void nfp_net_coalesce_write_cfg(struct nfp_net
*nn
)
2063 /* Compute factor used to convert coalesce '_usecs' parameters to
2064 * ME timestamp ticks. There are 16 ME clock cycles for each timestamp
2067 factor
= nn
->me_freq_mhz
/ 16;
2069 /* copy RX interrupt coalesce parameters */
2070 value
= (nn
->rx_coalesce_max_frames
<< 16) |
2071 (factor
* nn
->rx_coalesce_usecs
);
2072 for (i
= 0; i
< nn
->num_rx_rings
; i
++)
2073 nn_writel(nn
, NFP_NET_CFG_RXR_IRQ_MOD(i
), value
);
2075 /* copy TX interrupt coalesce parameters */
2076 value
= (nn
->tx_coalesce_max_frames
<< 16) |
2077 (factor
* nn
->tx_coalesce_usecs
);
2078 for (i
= 0; i
< nn
->num_tx_rings
; i
++)
2079 nn_writel(nn
, NFP_NET_CFG_TXR_IRQ_MOD(i
), value
);
2083 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
2084 * @nn: NFP Net device to reconfigure
2086 * Writes the MAC address from the netdev to the device control BAR. Does not
2087 * perform the required reconfig. We do a bit of byte swapping dance because
2090 static void nfp_net_write_mac_addr(struct nfp_net
*nn
)
2092 nn_writel(nn
, NFP_NET_CFG_MACADDR
+ 0,
2093 get_unaligned_be32(nn
->netdev
->dev_addr
));
2094 nn_writew(nn
, NFP_NET_CFG_MACADDR
+ 6,
2095 get_unaligned_be16(nn
->netdev
->dev_addr
+ 4));
2098 static void nfp_net_vec_clear_ring_data(struct nfp_net
*nn
, unsigned int idx
)
2100 nn_writeq(nn
, NFP_NET_CFG_RXR_ADDR(idx
), 0);
2101 nn_writeb(nn
, NFP_NET_CFG_RXR_SZ(idx
), 0);
2102 nn_writeb(nn
, NFP_NET_CFG_RXR_VEC(idx
), 0);
2104 nn_writeq(nn
, NFP_NET_CFG_TXR_ADDR(idx
), 0);
2105 nn_writeb(nn
, NFP_NET_CFG_TXR_SZ(idx
), 0);
2106 nn_writeb(nn
, NFP_NET_CFG_TXR_VEC(idx
), 0);
2110 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
2111 * @nn: NFP Net device to reconfigure
2113 static void nfp_net_clear_config_and_disable(struct nfp_net
*nn
)
2115 u32 new_ctrl
, update
;
2119 new_ctrl
= nn
->ctrl
;
2120 new_ctrl
&= ~NFP_NET_CFG_CTRL_ENABLE
;
2121 update
= NFP_NET_CFG_UPDATE_GEN
;
2122 update
|= NFP_NET_CFG_UPDATE_MSIX
;
2123 update
|= NFP_NET_CFG_UPDATE_RING
;
2125 if (nn
->cap
& NFP_NET_CFG_CTRL_RINGCFG
)
2126 new_ctrl
&= ~NFP_NET_CFG_CTRL_RINGCFG
;
2128 nn_writeq(nn
, NFP_NET_CFG_TXRS_ENABLE
, 0);
2129 nn_writeq(nn
, NFP_NET_CFG_RXRS_ENABLE
, 0);
2131 nn_writel(nn
, NFP_NET_CFG_CTRL
, new_ctrl
);
2132 err
= nfp_net_reconfig(nn
, update
);
2134 nn_err(nn
, "Could not disable device: %d\n", err
);
2136 for (r
= 0; r
< nn
->num_rx_rings
; r
++)
2137 nfp_net_rx_ring_reset(&nn
->rx_rings
[r
]);
2138 for (r
= 0; r
< nn
->num_tx_rings
; r
++)
2139 nfp_net_tx_ring_reset(nn
, &nn
->tx_rings
[r
]);
2140 for (r
= 0; r
< nn
->num_r_vecs
; r
++)
2141 nfp_net_vec_clear_ring_data(nn
, r
);
2143 nn
->ctrl
= new_ctrl
;
2147 nfp_net_rx_ring_hw_cfg_write(struct nfp_net
*nn
,
2148 struct nfp_net_rx_ring
*rx_ring
, unsigned int idx
)
2150 /* Write the DMA address, size and MSI-X info to the device */
2151 nn_writeq(nn
, NFP_NET_CFG_RXR_ADDR(idx
), rx_ring
->dma
);
2152 nn_writeb(nn
, NFP_NET_CFG_RXR_SZ(idx
), ilog2(rx_ring
->cnt
));
2153 nn_writeb(nn
, NFP_NET_CFG_RXR_VEC(idx
), rx_ring
->r_vec
->irq_entry
);
2157 nfp_net_tx_ring_hw_cfg_write(struct nfp_net
*nn
,
2158 struct nfp_net_tx_ring
*tx_ring
, unsigned int idx
)
2160 nn_writeq(nn
, NFP_NET_CFG_TXR_ADDR(idx
), tx_ring
->dma
);
2161 nn_writeb(nn
, NFP_NET_CFG_TXR_SZ(idx
), ilog2(tx_ring
->cnt
));
2162 nn_writeb(nn
, NFP_NET_CFG_TXR_VEC(idx
), tx_ring
->r_vec
->irq_entry
);
2165 static int __nfp_net_set_config_and_enable(struct nfp_net
*nn
)
2167 u32 new_ctrl
, update
= 0;
2171 new_ctrl
= nn
->ctrl
;
2173 if (nn
->cap
& NFP_NET_CFG_CTRL_RSS
) {
2174 nfp_net_rss_write_key(nn
);
2175 nfp_net_rss_write_itbl(nn
);
2176 nn_writel(nn
, NFP_NET_CFG_RSS_CTRL
, nn
->rss_cfg
);
2177 update
|= NFP_NET_CFG_UPDATE_RSS
;
2180 if (nn
->cap
& NFP_NET_CFG_CTRL_IRQMOD
) {
2181 nfp_net_coalesce_write_cfg(nn
);
2183 new_ctrl
|= NFP_NET_CFG_CTRL_IRQMOD
;
2184 update
|= NFP_NET_CFG_UPDATE_IRQMOD
;
2187 for (r
= 0; r
< nn
->num_tx_rings
; r
++)
2188 nfp_net_tx_ring_hw_cfg_write(nn
, &nn
->tx_rings
[r
], r
);
2189 for (r
= 0; r
< nn
->num_rx_rings
; r
++)
2190 nfp_net_rx_ring_hw_cfg_write(nn
, &nn
->rx_rings
[r
], r
);
2192 nn_writeq(nn
, NFP_NET_CFG_TXRS_ENABLE
, nn
->num_tx_rings
== 64 ?
2193 0xffffffffffffffffULL
: ((u64
)1 << nn
->num_tx_rings
) - 1);
2195 nn_writeq(nn
, NFP_NET_CFG_RXRS_ENABLE
, nn
->num_rx_rings
== 64 ?
2196 0xffffffffffffffffULL
: ((u64
)1 << nn
->num_rx_rings
) - 1);
2198 nfp_net_write_mac_addr(nn
);
2200 nn_writel(nn
, NFP_NET_CFG_MTU
, nn
->netdev
->mtu
);
2201 nn_writel(nn
, NFP_NET_CFG_FLBUFSZ
,
2202 nn
->fl_bufsz
- NFP_NET_RX_BUF_NON_DATA
);
2205 new_ctrl
|= NFP_NET_CFG_CTRL_ENABLE
;
2206 update
|= NFP_NET_CFG_UPDATE_GEN
;
2207 update
|= NFP_NET_CFG_UPDATE_MSIX
;
2208 update
|= NFP_NET_CFG_UPDATE_RING
;
2209 if (nn
->cap
& NFP_NET_CFG_CTRL_RINGCFG
)
2210 new_ctrl
|= NFP_NET_CFG_CTRL_RINGCFG
;
2212 nn_writel(nn
, NFP_NET_CFG_CTRL
, new_ctrl
);
2213 err
= nfp_net_reconfig(nn
, update
);
2215 nn
->ctrl
= new_ctrl
;
2217 for (r
= 0; r
< nn
->num_rx_rings
; r
++)
2218 nfp_net_rx_ring_fill_freelist(&nn
->rx_rings
[r
]);
2220 /* Since reconfiguration requests while NFP is down are ignored we
2221 * have to wipe the entire VXLAN configuration and reinitialize it.
2223 if (nn
->ctrl
& NFP_NET_CFG_CTRL_VXLAN
) {
2224 memset(&nn
->vxlan_ports
, 0, sizeof(nn
->vxlan_ports
));
2225 memset(&nn
->vxlan_usecnt
, 0, sizeof(nn
->vxlan_usecnt
));
2226 udp_tunnel_get_rx_info(nn
->netdev
);
2233 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
2234 * @nn: NFP Net device to reconfigure
2236 static int nfp_net_set_config_and_enable(struct nfp_net
*nn
)
2240 err
= __nfp_net_set_config_and_enable(nn
);
2242 nfp_net_clear_config_and_disable(nn
);
2248 * nfp_net_open_stack() - Start the device from stack's perspective
2249 * @nn: NFP Net device to reconfigure
2251 static void nfp_net_open_stack(struct nfp_net
*nn
)
2255 for (r
= 0; r
< nn
->num_r_vecs
; r
++) {
2256 napi_enable(&nn
->r_vecs
[r
].napi
);
2257 enable_irq(nn
->r_vecs
[r
].irq_vector
);
2260 netif_tx_wake_all_queues(nn
->netdev
);
2262 enable_irq(nn
->irq_entries
[NFP_NET_IRQ_LSC_IDX
].vector
);
2263 nfp_net_read_link_status(nn
);
2266 static int nfp_net_netdev_open(struct net_device
*netdev
)
2268 struct nfp_net
*nn
= netdev_priv(netdev
);
2269 struct nfp_net_ring_set rx
= {
2270 .n_rings
= nn
->num_rx_rings
,
2271 .mtu
= nn
->netdev
->mtu
,
2272 .dcnt
= nn
->rxd_cnt
,
2274 struct nfp_net_ring_set tx
= {
2275 .n_rings
= nn
->num_tx_rings
,
2276 .dcnt
= nn
->txd_cnt
,
2280 if (nn
->ctrl
& NFP_NET_CFG_CTRL_ENABLE
) {
2281 nn_err(nn
, "Dev is already enabled: 0x%08x\n", nn
->ctrl
);
2285 /* Step 1: Allocate resources for rings and the like
2286 * - Request interrupts
2287 * - Allocate RX and TX ring resources
2288 * - Setup initial RSS table
2290 err
= nfp_net_aux_irq_request(nn
, NFP_NET_CFG_EXN
, "%s-exn",
2291 nn
->exn_name
, sizeof(nn
->exn_name
),
2292 NFP_NET_IRQ_EXN_IDX
, nn
->exn_handler
);
2295 err
= nfp_net_aux_irq_request(nn
, NFP_NET_CFG_LSC
, "%s-lsc",
2296 nn
->lsc_name
, sizeof(nn
->lsc_name
),
2297 NFP_NET_IRQ_LSC_IDX
, nn
->lsc_handler
);
2300 disable_irq(nn
->irq_entries
[NFP_NET_IRQ_LSC_IDX
].vector
);
2302 for (r
= 0; r
< nn
->num_r_vecs
; r
++) {
2303 err
= nfp_net_prepare_vector(nn
, &nn
->r_vecs
[r
], r
);
2305 goto err_cleanup_vec_p
;
2308 nn
->rx_rings
= nfp_net_rx_ring_set_prepare(nn
, &rx
, nn
->xdp_prog
);
2309 if (!nn
->rx_rings
) {
2311 goto err_cleanup_vec
;
2314 nn
->tx_rings
= nfp_net_tx_ring_set_prepare(nn
, &tx
,
2315 nn
->num_stack_tx_rings
);
2316 if (!nn
->tx_rings
) {
2318 goto err_free_rx_rings
;
2321 for (r
= 0; r
< nn
->max_r_vecs
; r
++)
2322 nfp_net_vector_assign_rings(nn
, &nn
->r_vecs
[r
], r
);
2324 err
= netif_set_real_num_tx_queues(netdev
, nn
->num_stack_tx_rings
);
2326 goto err_free_rings
;
2328 err
= netif_set_real_num_rx_queues(netdev
, nn
->num_rx_rings
);
2330 goto err_free_rings
;
2332 /* Step 2: Configure the NFP
2333 * - Enable rings from 0 to tx_rings/rx_rings - 1.
2334 * - Write MAC address (in case it changed)
2336 * - Set the Freelist buffer size
2339 err
= nfp_net_set_config_and_enable(nn
);
2341 goto err_free_rings
;
2343 /* Step 3: Enable for kernel
2344 * - put some freelist descriptors on each RX ring
2345 * - enable NAPI on each ring
2346 * - enable all TX queues
2349 nfp_net_open_stack(nn
);
2354 nfp_net_tx_ring_set_free(nn
, &tx
);
2356 nfp_net_rx_ring_set_free(nn
, &rx
, nn
->xdp_prog
);
2361 nfp_net_cleanup_vector(nn
, &nn
->r_vecs
[r
]);
2362 nfp_net_aux_irq_free(nn
, NFP_NET_CFG_LSC
, NFP_NET_IRQ_LSC_IDX
);
2364 nfp_net_aux_irq_free(nn
, NFP_NET_CFG_EXN
, NFP_NET_IRQ_EXN_IDX
);
2369 * nfp_net_close_stack() - Quiescent the stack (part of close)
2370 * @nn: NFP Net device to reconfigure
2372 static void nfp_net_close_stack(struct nfp_net
*nn
)
2376 disable_irq(nn
->irq_entries
[NFP_NET_IRQ_LSC_IDX
].vector
);
2377 netif_carrier_off(nn
->netdev
);
2378 nn
->link_up
= false;
2380 for (r
= 0; r
< nn
->num_r_vecs
; r
++) {
2381 disable_irq(nn
->r_vecs
[r
].irq_vector
);
2382 napi_disable(&nn
->r_vecs
[r
].napi
);
2385 netif_tx_disable(nn
->netdev
);
2389 * nfp_net_close_free_all() - Free all runtime resources
2390 * @nn: NFP Net device to reconfigure
2392 static void nfp_net_close_free_all(struct nfp_net
*nn
)
2396 for (r
= 0; r
< nn
->num_rx_rings
; r
++) {
2397 nfp_net_rx_ring_bufs_free(nn
, &nn
->rx_rings
[r
], nn
->xdp_prog
);
2398 nfp_net_rx_ring_free(&nn
->rx_rings
[r
]);
2400 for (r
= 0; r
< nn
->num_tx_rings
; r
++)
2401 nfp_net_tx_ring_free(&nn
->tx_rings
[r
]);
2402 for (r
= 0; r
< nn
->num_r_vecs
; r
++)
2403 nfp_net_cleanup_vector(nn
, &nn
->r_vecs
[r
]);
2405 kfree(nn
->rx_rings
);
2406 kfree(nn
->tx_rings
);
2408 nfp_net_aux_irq_free(nn
, NFP_NET_CFG_LSC
, NFP_NET_IRQ_LSC_IDX
);
2409 nfp_net_aux_irq_free(nn
, NFP_NET_CFG_EXN
, NFP_NET_IRQ_EXN_IDX
);
2413 * nfp_net_netdev_close() - Called when the device is downed
2414 * @netdev: netdev structure
2416 static int nfp_net_netdev_close(struct net_device
*netdev
)
2418 struct nfp_net
*nn
= netdev_priv(netdev
);
2420 if (!(nn
->ctrl
& NFP_NET_CFG_CTRL_ENABLE
)) {
2421 nn_err(nn
, "Dev is not up: 0x%08x\n", nn
->ctrl
);
2425 /* Step 1: Disable RX and TX rings from the Linux kernel perspective
2427 nfp_net_close_stack(nn
);
2431 nfp_net_clear_config_and_disable(nn
);
2433 /* Step 3: Free resources
2435 nfp_net_close_free_all(nn
);
2437 nn_dbg(nn
, "%s down", netdev
->name
);
2441 static void nfp_net_set_rx_mode(struct net_device
*netdev
)
2443 struct nfp_net
*nn
= netdev_priv(netdev
);
2446 new_ctrl
= nn
->ctrl
;
2448 if (netdev
->flags
& IFF_PROMISC
) {
2449 if (nn
->cap
& NFP_NET_CFG_CTRL_PROMISC
)
2450 new_ctrl
|= NFP_NET_CFG_CTRL_PROMISC
;
2452 nn_warn(nn
, "FW does not support promiscuous mode\n");
2454 new_ctrl
&= ~NFP_NET_CFG_CTRL_PROMISC
;
2457 if (new_ctrl
== nn
->ctrl
)
2460 nn_writel(nn
, NFP_NET_CFG_CTRL
, new_ctrl
);
2461 nfp_net_reconfig_post(nn
, NFP_NET_CFG_UPDATE_GEN
);
2463 nn
->ctrl
= new_ctrl
;
2466 static void nfp_net_rss_init_itbl(struct nfp_net
*nn
)
2470 for (i
= 0; i
< sizeof(nn
->rss_itbl
); i
++)
2472 ethtool_rxfh_indir_default(i
, nn
->num_rx_rings
);
2476 nfp_net_ring_swap_enable(struct nfp_net
*nn
, unsigned int *num_vecs
,
2477 unsigned int *stack_tx_rings
,
2478 struct bpf_prog
**xdp_prog
,
2479 struct nfp_net_ring_set
*rx
,
2480 struct nfp_net_ring_set
*tx
)
2486 nfp_net_rx_ring_set_swap(nn
, rx
);
2488 nfp_net_tx_ring_set_swap(nn
, tx
);
2490 swap(*num_vecs
, nn
->num_r_vecs
);
2491 swap(*stack_tx_rings
, nn
->num_stack_tx_rings
);
2492 *xdp_prog
= xchg(&nn
->xdp_prog
, *xdp_prog
);
2494 for (r
= 0; r
< nn
->max_r_vecs
; r
++)
2495 nfp_net_vector_assign_rings(nn
, &nn
->r_vecs
[r
], r
);
2497 if (!netif_is_rxfh_configured(nn
->netdev
))
2498 nfp_net_rss_init_itbl(nn
);
2500 err
= netif_set_real_num_rx_queues(nn
->netdev
,
2505 if (nn
->netdev
->real_num_tx_queues
!= nn
->num_stack_tx_rings
) {
2506 err
= netif_set_real_num_tx_queues(nn
->netdev
,
2507 nn
->num_stack_tx_rings
);
2512 return __nfp_net_set_config_and_enable(nn
);
2516 nfp_net_check_config(struct nfp_net
*nn
, struct bpf_prog
*xdp_prog
,
2517 struct nfp_net_ring_set
*rx
, struct nfp_net_ring_set
*tx
)
2519 /* XDP-enabled tests */
2522 if (rx
&& nfp_net_calc_fl_bufsz(nn
, rx
->mtu
) > PAGE_SIZE
) {
2523 nn_warn(nn
, "MTU too large w/ XDP enabled\n");
2526 if (tx
&& tx
->n_rings
> nn
->max_tx_rings
) {
2527 nn_warn(nn
, "Insufficient number of TX rings w/ XDP enabled\n");
2535 nfp_net_ring_reconfig_down(struct nfp_net
*nn
, struct bpf_prog
**xdp_prog
,
2536 struct nfp_net_ring_set
*rx
,
2537 struct nfp_net_ring_set
*tx
,
2538 unsigned int stack_tx_rings
, unsigned int num_vecs
)
2540 nn
->netdev
->mtu
= rx
? rx
->mtu
: nn
->netdev
->mtu
;
2541 nn
->fl_bufsz
= nfp_net_calc_fl_bufsz(nn
, nn
->netdev
->mtu
);
2542 nn
->rxd_cnt
= rx
? rx
->dcnt
: nn
->rxd_cnt
;
2543 nn
->txd_cnt
= tx
? tx
->dcnt
: nn
->txd_cnt
;
2544 nn
->num_rx_rings
= rx
? rx
->n_rings
: nn
->num_rx_rings
;
2545 nn
->num_tx_rings
= tx
? tx
->n_rings
: nn
->num_tx_rings
;
2546 nn
->num_stack_tx_rings
= stack_tx_rings
;
2547 nn
->num_r_vecs
= num_vecs
;
2548 *xdp_prog
= xchg(&nn
->xdp_prog
, *xdp_prog
);
2550 if (!netif_is_rxfh_configured(nn
->netdev
))
2551 nfp_net_rss_init_itbl(nn
);
2555 nfp_net_ring_reconfig(struct nfp_net
*nn
, struct bpf_prog
**xdp_prog
,
2556 struct nfp_net_ring_set
*rx
, struct nfp_net_ring_set
*tx
)
2558 unsigned int stack_tx_rings
, num_vecs
, r
;
2561 stack_tx_rings
= tx
? tx
->n_rings
: nn
->num_tx_rings
;
2563 stack_tx_rings
-= rx
? rx
->n_rings
: nn
->num_rx_rings
;
2565 num_vecs
= max(rx
? rx
->n_rings
: nn
->num_rx_rings
, stack_tx_rings
);
2567 err
= nfp_net_check_config(nn
, *xdp_prog
, rx
, tx
);
2571 if (!netif_running(nn
->netdev
)) {
2572 nfp_net_ring_reconfig_down(nn
, xdp_prog
, rx
, tx
,
2573 stack_tx_rings
, num_vecs
);
2577 /* Prepare new rings */
2578 for (r
= nn
->num_r_vecs
; r
< num_vecs
; r
++) {
2579 err
= nfp_net_prepare_vector(nn
, &nn
->r_vecs
[r
], r
);
2582 goto err_cleanup_vecs
;
2586 if (!nfp_net_rx_ring_set_prepare(nn
, rx
, *xdp_prog
)) {
2588 goto err_cleanup_vecs
;
2592 if (!nfp_net_tx_ring_set_prepare(nn
, tx
, stack_tx_rings
)) {
2598 /* Stop device, swap in new rings, try to start the firmware */
2599 nfp_net_close_stack(nn
);
2600 nfp_net_clear_config_and_disable(nn
);
2602 err
= nfp_net_ring_swap_enable(nn
, &num_vecs
, &stack_tx_rings
,
2607 nfp_net_clear_config_and_disable(nn
);
2609 /* Try with old configuration and old rings */
2610 err2
= nfp_net_ring_swap_enable(nn
, &num_vecs
, &stack_tx_rings
,
2613 nn_err(nn
, "Can't restore ring config - FW communication failed (%d,%d)\n",
2616 for (r
= num_vecs
- 1; r
>= nn
->num_r_vecs
; r
--)
2617 nfp_net_cleanup_vector(nn
, &nn
->r_vecs
[r
]);
2620 nfp_net_rx_ring_set_free(nn
, rx
, *xdp_prog
);
2622 nfp_net_tx_ring_set_free(nn
, tx
);
2624 nfp_net_open_stack(nn
);
2630 nfp_net_rx_ring_set_free(nn
, rx
, *xdp_prog
);
2632 for (r
= num_vecs
- 1; r
>= nn
->num_r_vecs
; r
--)
2633 nfp_net_cleanup_vector(nn
, &nn
->r_vecs
[r
]);
2637 static int nfp_net_change_mtu(struct net_device
*netdev
, int new_mtu
)
2639 struct nfp_net
*nn
= netdev_priv(netdev
);
2640 struct nfp_net_ring_set rx
= {
2641 .n_rings
= nn
->num_rx_rings
,
2643 .dcnt
= nn
->rxd_cnt
,
2646 return nfp_net_ring_reconfig(nn
, &nn
->xdp_prog
, &rx
, NULL
);
2649 static void nfp_net_stat64(struct net_device
*netdev
,
2650 struct rtnl_link_stats64
*stats
)
2652 struct nfp_net
*nn
= netdev_priv(netdev
);
2655 for (r
= 0; r
< nn
->num_r_vecs
; r
++) {
2656 struct nfp_net_r_vector
*r_vec
= &nn
->r_vecs
[r
];
2661 start
= u64_stats_fetch_begin(&r_vec
->rx_sync
);
2662 data
[0] = r_vec
->rx_pkts
;
2663 data
[1] = r_vec
->rx_bytes
;
2664 data
[2] = r_vec
->rx_drops
;
2665 } while (u64_stats_fetch_retry(&r_vec
->rx_sync
, start
));
2666 stats
->rx_packets
+= data
[0];
2667 stats
->rx_bytes
+= data
[1];
2668 stats
->rx_dropped
+= data
[2];
2671 start
= u64_stats_fetch_begin(&r_vec
->tx_sync
);
2672 data
[0] = r_vec
->tx_pkts
;
2673 data
[1] = r_vec
->tx_bytes
;
2674 data
[2] = r_vec
->tx_errors
;
2675 } while (u64_stats_fetch_retry(&r_vec
->tx_sync
, start
));
2676 stats
->tx_packets
+= data
[0];
2677 stats
->tx_bytes
+= data
[1];
2678 stats
->tx_errors
+= data
[2];
2682 static bool nfp_net_ebpf_capable(struct nfp_net
*nn
)
2684 if (nn
->cap
& NFP_NET_CFG_CTRL_BPF
&&
2685 nn_readb(nn
, NFP_NET_CFG_BPF_ABI
) == NFP_NET_BPF_ABI
)
2691 nfp_net_setup_tc(struct net_device
*netdev
, u32 handle
, __be16 proto
,
2692 struct tc_to_netdev
*tc
)
2694 struct nfp_net
*nn
= netdev_priv(netdev
);
2696 if (TC_H_MAJ(handle
) != TC_H_MAJ(TC_H_INGRESS
))
2698 if (proto
!= htons(ETH_P_ALL
))
2701 if (tc
->type
== TC_SETUP_CLSBPF
&& nfp_net_ebpf_capable(nn
)) {
2702 if (!nn
->bpf_offload_xdp
)
2703 return nfp_net_bpf_offload(nn
, tc
->cls_bpf
);
2711 static int nfp_net_set_features(struct net_device
*netdev
,
2712 netdev_features_t features
)
2714 netdev_features_t changed
= netdev
->features
^ features
;
2715 struct nfp_net
*nn
= netdev_priv(netdev
);
2719 /* Assume this is not called with features we have not advertised */
2721 new_ctrl
= nn
->ctrl
;
2723 if (changed
& NETIF_F_RXCSUM
) {
2724 if (features
& NETIF_F_RXCSUM
)
2725 new_ctrl
|= NFP_NET_CFG_CTRL_RXCSUM
;
2727 new_ctrl
&= ~NFP_NET_CFG_CTRL_RXCSUM
;
2730 if (changed
& (NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
)) {
2731 if (features
& (NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
))
2732 new_ctrl
|= NFP_NET_CFG_CTRL_TXCSUM
;
2734 new_ctrl
&= ~NFP_NET_CFG_CTRL_TXCSUM
;
2737 if (changed
& (NETIF_F_TSO
| NETIF_F_TSO6
)) {
2738 if (features
& (NETIF_F_TSO
| NETIF_F_TSO6
))
2739 new_ctrl
|= NFP_NET_CFG_CTRL_LSO
;
2741 new_ctrl
&= ~NFP_NET_CFG_CTRL_LSO
;
2744 if (changed
& NETIF_F_HW_VLAN_CTAG_RX
) {
2745 if (features
& NETIF_F_HW_VLAN_CTAG_RX
)
2746 new_ctrl
|= NFP_NET_CFG_CTRL_RXVLAN
;
2748 new_ctrl
&= ~NFP_NET_CFG_CTRL_RXVLAN
;
2751 if (changed
& NETIF_F_HW_VLAN_CTAG_TX
) {
2752 if (features
& NETIF_F_HW_VLAN_CTAG_TX
)
2753 new_ctrl
|= NFP_NET_CFG_CTRL_TXVLAN
;
2755 new_ctrl
&= ~NFP_NET_CFG_CTRL_TXVLAN
;
2758 if (changed
& NETIF_F_SG
) {
2759 if (features
& NETIF_F_SG
)
2760 new_ctrl
|= NFP_NET_CFG_CTRL_GATHER
;
2762 new_ctrl
&= ~NFP_NET_CFG_CTRL_GATHER
;
2765 if (changed
& NETIF_F_HW_TC
&& nn
->ctrl
& NFP_NET_CFG_CTRL_BPF
) {
2766 nn_err(nn
, "Cannot disable HW TC offload while in use\n");
2770 nn_dbg(nn
, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
2771 netdev
->features
, features
, changed
);
2773 if (new_ctrl
== nn
->ctrl
)
2776 nn_dbg(nn
, "NIC ctrl: 0x%x -> 0x%x\n", nn
->ctrl
, new_ctrl
);
2777 nn_writel(nn
, NFP_NET_CFG_CTRL
, new_ctrl
);
2778 err
= nfp_net_reconfig(nn
, NFP_NET_CFG_UPDATE_GEN
);
2782 nn
->ctrl
= new_ctrl
;
2787 static netdev_features_t
2788 nfp_net_features_check(struct sk_buff
*skb
, struct net_device
*dev
,
2789 netdev_features_t features
)
2793 /* We can't do TSO over double tagged packets (802.1AD) */
2794 features
&= vlan_features_check(skb
, features
);
2796 if (!skb
->encapsulation
)
2799 /* Ensure that inner L4 header offset fits into TX descriptor field */
2800 if (skb_is_gso(skb
)) {
2803 hdrlen
= skb_inner_transport_header(skb
) - skb
->data
+
2804 inner_tcp_hdrlen(skb
);
2806 if (unlikely(hdrlen
> NFP_NET_LSO_MAX_HDR_SZ
))
2807 features
&= ~NETIF_F_GSO_MASK
;
2810 /* VXLAN/GRE check */
2811 switch (vlan_get_protocol(skb
)) {
2812 case htons(ETH_P_IP
):
2813 l4_hdr
= ip_hdr(skb
)->protocol
;
2815 case htons(ETH_P_IPV6
):
2816 l4_hdr
= ipv6_hdr(skb
)->nexthdr
;
2819 return features
& ~(NETIF_F_CSUM_MASK
| NETIF_F_GSO_MASK
);
2822 if (skb
->inner_protocol_type
!= ENCAP_TYPE_ETHER
||
2823 skb
->inner_protocol
!= htons(ETH_P_TEB
) ||
2824 (l4_hdr
!= IPPROTO_UDP
&& l4_hdr
!= IPPROTO_GRE
) ||
2825 (l4_hdr
== IPPROTO_UDP
&&
2826 (skb_inner_mac_header(skb
) - skb_transport_header(skb
) !=
2827 sizeof(struct udphdr
) + sizeof(struct vxlanhdr
))))
2828 return features
& ~(NETIF_F_CSUM_MASK
| NETIF_F_GSO_MASK
);
2834 * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
2835 * @nn: NFP Net device to reconfigure
2836 * @idx: Index into the port table where new port should be written
2837 * @port: UDP port to configure (pass zero to remove VXLAN port)
2839 static void nfp_net_set_vxlan_port(struct nfp_net
*nn
, int idx
, __be16 port
)
2843 nn
->vxlan_ports
[idx
] = port
;
2845 if (!(nn
->ctrl
& NFP_NET_CFG_CTRL_VXLAN
))
2848 BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS
& 1);
2849 for (i
= 0; i
< NFP_NET_N_VXLAN_PORTS
; i
+= 2)
2850 nn_writel(nn
, NFP_NET_CFG_VXLAN_PORT
+ i
* sizeof(port
),
2851 be16_to_cpu(nn
->vxlan_ports
[i
+ 1]) << 16 |
2852 be16_to_cpu(nn
->vxlan_ports
[i
]));
2854 nfp_net_reconfig_post(nn
, NFP_NET_CFG_UPDATE_VXLAN
);
2858 * nfp_net_find_vxlan_idx() - find table entry of the port or a free one
2859 * @nn: NFP Network structure
2860 * @port: UDP port to look for
2862 * Return: if the port is already in the table -- it's position;
2863 * if the port is not in the table -- free position to use;
2864 * if the table is full -- -ENOSPC.
2866 static int nfp_net_find_vxlan_idx(struct nfp_net
*nn
, __be16 port
)
2868 int i
, free_idx
= -ENOSPC
;
2870 for (i
= 0; i
< NFP_NET_N_VXLAN_PORTS
; i
++) {
2871 if (nn
->vxlan_ports
[i
] == port
)
2873 if (!nn
->vxlan_usecnt
[i
])
2880 static void nfp_net_add_vxlan_port(struct net_device
*netdev
,
2881 struct udp_tunnel_info
*ti
)
2883 struct nfp_net
*nn
= netdev_priv(netdev
);
2886 if (ti
->type
!= UDP_TUNNEL_TYPE_VXLAN
)
2889 idx
= nfp_net_find_vxlan_idx(nn
, ti
->port
);
2893 if (!nn
->vxlan_usecnt
[idx
]++)
2894 nfp_net_set_vxlan_port(nn
, idx
, ti
->port
);
2897 static void nfp_net_del_vxlan_port(struct net_device
*netdev
,
2898 struct udp_tunnel_info
*ti
)
2900 struct nfp_net
*nn
= netdev_priv(netdev
);
2903 if (ti
->type
!= UDP_TUNNEL_TYPE_VXLAN
)
2906 idx
= nfp_net_find_vxlan_idx(nn
, ti
->port
);
2907 if (idx
== -ENOSPC
|| !nn
->vxlan_usecnt
[idx
])
2910 if (!--nn
->vxlan_usecnt
[idx
])
2911 nfp_net_set_vxlan_port(nn
, idx
, 0);
2914 static int nfp_net_xdp_offload(struct nfp_net
*nn
, struct bpf_prog
*prog
)
2916 struct tc_cls_bpf_offload cmd
= {
2921 if (!nfp_net_ebpf_capable(nn
))
2924 if (nn
->ctrl
& NFP_NET_CFG_CTRL_BPF
) {
2925 if (!nn
->bpf_offload_xdp
)
2926 return prog
? -EBUSY
: 0;
2927 cmd
.command
= prog
? TC_CLSBPF_REPLACE
: TC_CLSBPF_DESTROY
;
2931 cmd
.command
= TC_CLSBPF_ADD
;
2934 ret
= nfp_net_bpf_offload(nn
, &cmd
);
2935 /* Stop offload if replace not possible */
2936 if (ret
&& cmd
.command
== TC_CLSBPF_REPLACE
)
2937 nfp_net_xdp_offload(nn
, NULL
);
2938 nn
->bpf_offload_xdp
= prog
&& !ret
;
2942 static int nfp_net_xdp_setup(struct nfp_net
*nn
, struct bpf_prog
*prog
)
2944 struct nfp_net_ring_set rx
= {
2945 .n_rings
= nn
->num_rx_rings
,
2946 .mtu
= nn
->netdev
->mtu
,
2947 .dcnt
= nn
->rxd_cnt
,
2949 struct nfp_net_ring_set tx
= {
2950 .n_rings
= nn
->num_tx_rings
,
2951 .dcnt
= nn
->txd_cnt
,
2955 if (prog
&& prog
->xdp_adjust_head
) {
2956 nn_err(nn
, "Does not support bpf_xdp_adjust_head()\n");
2959 if (!prog
&& !nn
->xdp_prog
)
2961 if (prog
&& nn
->xdp_prog
) {
2962 prog
= xchg(&nn
->xdp_prog
, prog
);
2964 nfp_net_xdp_offload(nn
, nn
->xdp_prog
);
2968 tx
.n_rings
+= prog
? nn
->num_rx_rings
: -nn
->num_rx_rings
;
2970 /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
2971 err
= nfp_net_ring_reconfig(nn
, &prog
, &rx
, &tx
);
2975 /* @prog got swapped and is now the old one */
2979 nfp_net_xdp_offload(nn
, nn
->xdp_prog
);
2984 static int nfp_net_xdp(struct net_device
*netdev
, struct netdev_xdp
*xdp
)
2986 struct nfp_net
*nn
= netdev_priv(netdev
);
2988 switch (xdp
->command
) {
2989 case XDP_SETUP_PROG
:
2990 return nfp_net_xdp_setup(nn
, xdp
->prog
);
2991 case XDP_QUERY_PROG
:
2992 xdp
->prog_attached
= !!nn
->xdp_prog
;
2999 static const struct net_device_ops nfp_net_netdev_ops
= {
3000 .ndo_open
= nfp_net_netdev_open
,
3001 .ndo_stop
= nfp_net_netdev_close
,
3002 .ndo_start_xmit
= nfp_net_tx
,
3003 .ndo_get_stats64
= nfp_net_stat64
,
3004 .ndo_setup_tc
= nfp_net_setup_tc
,
3005 .ndo_tx_timeout
= nfp_net_tx_timeout
,
3006 .ndo_set_rx_mode
= nfp_net_set_rx_mode
,
3007 .ndo_change_mtu
= nfp_net_change_mtu
,
3008 .ndo_set_mac_address
= eth_mac_addr
,
3009 .ndo_set_features
= nfp_net_set_features
,
3010 .ndo_features_check
= nfp_net_features_check
,
3011 .ndo_udp_tunnel_add
= nfp_net_add_vxlan_port
,
3012 .ndo_udp_tunnel_del
= nfp_net_del_vxlan_port
,
3013 .ndo_xdp
= nfp_net_xdp
,
3017 * nfp_net_info() - Print general info about the NIC
3018 * @nn: NFP Net device to reconfigure
3020 void nfp_net_info(struct nfp_net
*nn
)
3022 nn_info(nn
, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
3023 nn
->is_vf
? "VF " : "",
3024 nn
->num_tx_rings
, nn
->max_tx_rings
,
3025 nn
->num_rx_rings
, nn
->max_rx_rings
);
3026 nn_info(nn
, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
3027 nn
->fw_ver
.resv
, nn
->fw_ver
.class,
3028 nn
->fw_ver
.major
, nn
->fw_ver
.minor
,
3030 nn_info(nn
, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3032 nn
->cap
& NFP_NET_CFG_CTRL_PROMISC
? "PROMISC " : "",
3033 nn
->cap
& NFP_NET_CFG_CTRL_L2BC
? "L2BCFILT " : "",
3034 nn
->cap
& NFP_NET_CFG_CTRL_L2MC
? "L2MCFILT " : "",
3035 nn
->cap
& NFP_NET_CFG_CTRL_RXCSUM
? "RXCSUM " : "",
3036 nn
->cap
& NFP_NET_CFG_CTRL_TXCSUM
? "TXCSUM " : "",
3037 nn
->cap
& NFP_NET_CFG_CTRL_RXVLAN
? "RXVLAN " : "",
3038 nn
->cap
& NFP_NET_CFG_CTRL_TXVLAN
? "TXVLAN " : "",
3039 nn
->cap
& NFP_NET_CFG_CTRL_SCATTER
? "SCATTER " : "",
3040 nn
->cap
& NFP_NET_CFG_CTRL_GATHER
? "GATHER " : "",
3041 nn
->cap
& NFP_NET_CFG_CTRL_LSO
? "TSO " : "",
3042 nn
->cap
& NFP_NET_CFG_CTRL_RSS
? "RSS " : "",
3043 nn
->cap
& NFP_NET_CFG_CTRL_L2SWITCH
? "L2SWITCH " : "",
3044 nn
->cap
& NFP_NET_CFG_CTRL_MSIXAUTO
? "AUTOMASK " : "",
3045 nn
->cap
& NFP_NET_CFG_CTRL_IRQMOD
? "IRQMOD " : "",
3046 nn
->cap
& NFP_NET_CFG_CTRL_VXLAN
? "VXLAN " : "",
3047 nn
->cap
& NFP_NET_CFG_CTRL_NVGRE
? "NVGRE " : "",
3048 nfp_net_ebpf_capable(nn
) ? "BPF " : "");
3052 * nfp_net_netdev_alloc() - Allocate netdev and related structure
3054 * @max_tx_rings: Maximum number of TX rings supported by device
3055 * @max_rx_rings: Maximum number of RX rings supported by device
3057 * This function allocates a netdev device and fills in the initial
3058 * part of the @struct nfp_net structure.
3060 * Return: NFP Net device structure, or ERR_PTR on error.
3062 struct nfp_net
*nfp_net_netdev_alloc(struct pci_dev
*pdev
,
3063 unsigned int max_tx_rings
,
3064 unsigned int max_rx_rings
)
3066 struct net_device
*netdev
;
3069 netdev
= alloc_etherdev_mqs(sizeof(struct nfp_net
),
3070 max_tx_rings
, max_rx_rings
);
3072 return ERR_PTR(-ENOMEM
);
3074 SET_NETDEV_DEV(netdev
, &pdev
->dev
);
3075 nn
= netdev_priv(netdev
);
3077 nn
->netdev
= netdev
;
3080 nn
->max_tx_rings
= max_tx_rings
;
3081 nn
->max_rx_rings
= max_rx_rings
;
3083 nn
->num_tx_rings
= min_t(unsigned int, max_tx_rings
, num_online_cpus());
3084 nn
->num_rx_rings
= min_t(unsigned int, max_rx_rings
,
3085 netif_get_num_default_rss_queues());
3087 nn
->num_r_vecs
= max(nn
->num_tx_rings
, nn
->num_rx_rings
);
3088 nn
->num_r_vecs
= min_t(unsigned int, nn
->num_r_vecs
, num_online_cpus());
3090 nn
->txd_cnt
= NFP_NET_TX_DESCS_DEFAULT
;
3091 nn
->rxd_cnt
= NFP_NET_RX_DESCS_DEFAULT
;
3093 spin_lock_init(&nn
->reconfig_lock
);
3094 spin_lock_init(&nn
->rx_filter_lock
);
3095 spin_lock_init(&nn
->link_status_lock
);
3097 setup_timer(&nn
->reconfig_timer
,
3098 nfp_net_reconfig_timer
, (unsigned long)nn
);
3099 setup_timer(&nn
->rx_filter_stats_timer
,
3100 nfp_net_filter_stats_timer
, (unsigned long)nn
);
3106 * nfp_net_netdev_free() - Undo what @nfp_net_netdev_alloc() did
3107 * @nn: NFP Net device to reconfigure
3109 void nfp_net_netdev_free(struct nfp_net
*nn
)
3111 free_netdev(nn
->netdev
);
3115 * nfp_net_rss_init() - Set the initial RSS parameters
3116 * @nn: NFP Net device to reconfigure
3118 static void nfp_net_rss_init(struct nfp_net
*nn
)
3120 netdev_rss_key_fill(nn
->rss_key
, NFP_NET_CFG_RSS_KEY_SZ
);
3122 nfp_net_rss_init_itbl(nn
);
3124 /* Enable IPv4/IPv6 TCP by default */
3125 nn
->rss_cfg
= NFP_NET_CFG_RSS_IPV4_TCP
|
3126 NFP_NET_CFG_RSS_IPV6_TCP
|
3127 NFP_NET_CFG_RSS_TOEPLITZ
|
3128 NFP_NET_CFG_RSS_MASK
;
3132 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
3133 * @nn: NFP Net device to reconfigure
3135 static void nfp_net_irqmod_init(struct nfp_net
*nn
)
3137 nn
->rx_coalesce_usecs
= 50;
3138 nn
->rx_coalesce_max_frames
= 64;
3139 nn
->tx_coalesce_usecs
= 50;
3140 nn
->tx_coalesce_max_frames
= 64;
3144 * nfp_net_netdev_init() - Initialise/finalise the netdev structure
3145 * @netdev: netdev structure
3147 * Return: 0 on success or negative errno on error.
3149 int nfp_net_netdev_init(struct net_device
*netdev
)
3151 struct nfp_net
*nn
= netdev_priv(netdev
);
3154 /* Get some of the read-only fields from the BAR */
3155 nn
->cap
= nn_readl(nn
, NFP_NET_CFG_CAP
);
3156 nn
->max_mtu
= nn_readl(nn
, NFP_NET_CFG_MAX_MTU
);
3158 nfp_net_write_mac_addr(nn
);
3160 /* Determine RX packet/metadata boundary offset */
3161 if (nn
->fw_ver
.major
>= 2)
3162 nn
->rx_offset
= nn_readl(nn
, NFP_NET_CFG_RX_OFFSET
);
3164 nn
->rx_offset
= NFP_NET_RX_OFFSET
;
3166 /* Set default MTU and Freelist buffer size */
3167 if (nn
->max_mtu
< NFP_NET_DEFAULT_MTU
)
3168 netdev
->mtu
= nn
->max_mtu
;
3170 netdev
->mtu
= NFP_NET_DEFAULT_MTU
;
3171 nn
->fl_bufsz
= nfp_net_calc_fl_bufsz(nn
, netdev
->mtu
);
3173 /* Advertise/enable offloads based on capabilities
3175 * Note: netdev->features show the currently enabled features
3176 * and netdev->hw_features advertises which features are
3177 * supported. By default we enable most features.
3179 netdev
->hw_features
= NETIF_F_HIGHDMA
;
3180 if (nn
->cap
& NFP_NET_CFG_CTRL_RXCSUM
) {
3181 netdev
->hw_features
|= NETIF_F_RXCSUM
;
3182 nn
->ctrl
|= NFP_NET_CFG_CTRL_RXCSUM
;
3184 if (nn
->cap
& NFP_NET_CFG_CTRL_TXCSUM
) {
3185 netdev
->hw_features
|= NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
;
3186 nn
->ctrl
|= NFP_NET_CFG_CTRL_TXCSUM
;
3188 if (nn
->cap
& NFP_NET_CFG_CTRL_GATHER
) {
3189 netdev
->hw_features
|= NETIF_F_SG
;
3190 nn
->ctrl
|= NFP_NET_CFG_CTRL_GATHER
;
3192 if ((nn
->cap
& NFP_NET_CFG_CTRL_LSO
) && nn
->fw_ver
.major
> 2) {
3193 netdev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
3194 nn
->ctrl
|= NFP_NET_CFG_CTRL_LSO
;
3196 if (nn
->cap
& NFP_NET_CFG_CTRL_RSS
) {
3197 netdev
->hw_features
|= NETIF_F_RXHASH
;
3198 nfp_net_rss_init(nn
);
3199 nn
->ctrl
|= NFP_NET_CFG_CTRL_RSS
;
3201 if (nn
->cap
& NFP_NET_CFG_CTRL_VXLAN
&&
3202 nn
->cap
& NFP_NET_CFG_CTRL_NVGRE
) {
3203 if (nn
->cap
& NFP_NET_CFG_CTRL_LSO
)
3204 netdev
->hw_features
|= NETIF_F_GSO_GRE
|
3205 NETIF_F_GSO_UDP_TUNNEL
;
3206 nn
->ctrl
|= NFP_NET_CFG_CTRL_VXLAN
| NFP_NET_CFG_CTRL_NVGRE
;
3208 netdev
->hw_enc_features
= netdev
->hw_features
;
3211 netdev
->vlan_features
= netdev
->hw_features
;
3213 if (nn
->cap
& NFP_NET_CFG_CTRL_RXVLAN
) {
3214 netdev
->hw_features
|= NETIF_F_HW_VLAN_CTAG_RX
;
3215 nn
->ctrl
|= NFP_NET_CFG_CTRL_RXVLAN
;
3217 if (nn
->cap
& NFP_NET_CFG_CTRL_TXVLAN
) {
3218 netdev
->hw_features
|= NETIF_F_HW_VLAN_CTAG_TX
;
3219 nn
->ctrl
|= NFP_NET_CFG_CTRL_TXVLAN
;
3222 netdev
->features
= netdev
->hw_features
;
3224 if (nfp_net_ebpf_capable(nn
))
3225 netdev
->hw_features
|= NETIF_F_HW_TC
;
3227 /* Advertise but disable TSO by default. */
3228 netdev
->features
&= ~(NETIF_F_TSO
| NETIF_F_TSO6
);
3230 /* Allow L2 Broadcast and Multicast through by default, if supported */
3231 if (nn
->cap
& NFP_NET_CFG_CTRL_L2BC
)
3232 nn
->ctrl
|= NFP_NET_CFG_CTRL_L2BC
;
3233 if (nn
->cap
& NFP_NET_CFG_CTRL_L2MC
)
3234 nn
->ctrl
|= NFP_NET_CFG_CTRL_L2MC
;
3236 /* Allow IRQ moderation, if supported */
3237 if (nn
->cap
& NFP_NET_CFG_CTRL_IRQMOD
) {
3238 nfp_net_irqmod_init(nn
);
3239 nn
->ctrl
|= NFP_NET_CFG_CTRL_IRQMOD
;
3242 /* Stash the re-configuration queue away. First odd queue in TX Bar */
3243 nn
->qcp_cfg
= nn
->tx_bar
+ NFP_QCP_QUEUE_ADDR_SZ
;
3245 /* Make sure the FW knows the netdev is supposed to be disabled here */
3246 nn_writel(nn
, NFP_NET_CFG_CTRL
, 0);
3247 nn_writeq(nn
, NFP_NET_CFG_TXRS_ENABLE
, 0);
3248 nn_writeq(nn
, NFP_NET_CFG_RXRS_ENABLE
, 0);
3249 err
= nfp_net_reconfig(nn
, NFP_NET_CFG_UPDATE_RING
|
3250 NFP_NET_CFG_UPDATE_GEN
);
3254 /* Finalise the netdev setup */
3255 netdev
->netdev_ops
= &nfp_net_netdev_ops
;
3256 netdev
->watchdog_timeo
= msecs_to_jiffies(5 * 1000);
3258 /* MTU range: 68 - hw-specific max */
3259 netdev
->min_mtu
= ETH_MIN_MTU
;
3260 netdev
->max_mtu
= nn
->max_mtu
;
3262 netif_carrier_off(netdev
);
3264 nfp_net_set_ethtool_ops(netdev
);
3265 nfp_net_vecs_init(netdev
);
3267 return register_netdev(netdev
);
3271 * nfp_net_netdev_clean() - Undo what nfp_net_netdev_init() did.
3272 * @netdev: netdev structure
3274 void nfp_net_netdev_clean(struct net_device
*netdev
)
3276 struct nfp_net
*nn
= netdev_priv(netdev
);
3279 bpf_prog_put(nn
->xdp_prog
);
3280 if (nn
->bpf_offload_xdp
)
3281 nfp_net_xdp_offload(nn
, NULL
);
3282 unregister_netdev(nn
->netdev
);