]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - drivers/net/ethernet/netronome/nfp/nfp_net_common.c
nfp: don't keep count for free buffers delayed kick
[mirror_ubuntu-bionic-kernel.git] / drivers / net / ethernet / netronome / nfp / nfp_net_common.c
CommitLineData
4c352362 1/*
fdace6c2 2 * Copyright (C) 2015-2017 Netronome Systems, Inc.
4c352362
JK
3 *
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
8 *
9 * The BSD 2-Clause License:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34/*
35 * nfp_net_common.c
36 * Netronome network device driver: Common functions between PF and VF
37 * Authors: Jakub Kicinski <jakub.kicinski@netronome.com>
38 * Jason McMullan <jason.mcmullan@netronome.com>
39 * Rolf Neugebauer <rolf.neugebauer@netronome.com>
40 * Brad Petrus <brad.petrus@netronome.com>
41 * Chris Telfer <chris.telfer@netronome.com>
42 */
43
9ff304bf 44#include <linux/bitfield.h>
ecd63a02 45#include <linux/bpf.h>
a67edbf4 46#include <linux/bpf_trace.h>
4c352362
JK
47#include <linux/module.h>
48#include <linux/kernel.h>
49#include <linux/init.h>
50#include <linux/fs.h>
51#include <linux/netdevice.h>
52#include <linux/etherdevice.h>
53#include <linux/interrupt.h>
54#include <linux/ip.h>
55#include <linux/ipv6.h>
c0f031bc 56#include <linux/page_ref.h>
4c352362
JK
57#include <linux/pci.h>
58#include <linux/pci_regs.h>
59#include <linux/msi.h>
60#include <linux/ethtool.h>
61#include <linux/log2.h>
62#include <linux/if_vlan.h>
63#include <linux/random.h>
64
65#include <linux/ktime.h>
66
7533fdc0 67#include <net/pkt_cls.h>
4c352362
JK
68#include <net/vxlan.h>
69
ce22f5a2 70#include "nfpcore/nfp_nsp.h"
4c352362
JK
71#include "nfp_net_ctrl.h"
72#include "nfp_net.h"
eb488c26 73#include "nfp_port.h"
4c352362
JK
74
75/**
76 * nfp_net_get_fw_version() - Read and parse the FW version
77 * @fw_ver: Output fw_version structure to read to
78 * @ctrl_bar: Mapped address of the control BAR
79 */
80void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
81 void __iomem *ctrl_bar)
82{
83 u32 reg;
84
85 reg = readl(ctrl_bar + NFP_NET_CFG_VERSION);
86 put_unaligned_le32(reg, fw_ver);
87}
88
c487e6b1 89static dma_addr_t nfp_net_dma_map_rx(struct nfp_net_dp *dp, void *frag)
c0f031bc 90{
5cd4fbea
JK
91 return dma_map_single_attrs(dp->dev, frag + NFP_NET_RX_BUF_HEADROOM,
92 dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
93 dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
94}
95
96static void
97nfp_net_dma_sync_dev_rx(const struct nfp_net_dp *dp, dma_addr_t dma_addr)
98{
99 dma_sync_single_for_device(dp->dev, dma_addr,
100 dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
101 dp->rx_dma_dir);
c0f031bc
JK
102}
103
c487e6b1 104static void nfp_net_dma_unmap_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr)
c0f031bc 105{
5cd4fbea
JK
106 dma_unmap_single_attrs(dp->dev, dma_addr,
107 dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA,
108 dp->rx_dma_dir, DMA_ATTR_SKIP_CPU_SYNC);
109}
110
111static void nfp_net_dma_sync_cpu_rx(struct nfp_net_dp *dp, dma_addr_t dma_addr,
112 unsigned int len)
113{
114 dma_sync_single_for_cpu(dp->dev, dma_addr - NFP_NET_RX_BUF_HEADROOM,
115 len, dp->rx_dma_dir);
c0f031bc
JK
116}
117
3d780b92
JK
118/* Firmware reconfig
119 *
120 * Firmware reconfig may take a while so we have two versions of it -
121 * synchronous and asynchronous (posted). All synchronous callers are holding
122 * RTNL so we don't have to worry about serializing them.
123 */
124static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
125{
126 nn_writel(nn, NFP_NET_CFG_UPDATE, update);
127 /* ensure update is written before pinging HW */
128 nn_pci_flush(nn);
129 nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
130}
131
132/* Pass 0 as update to run posted reconfigs. */
133static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
134{
135 update |= nn->reconfig_posted;
136 nn->reconfig_posted = 0;
137
138 nfp_net_reconfig_start(nn, update);
139
140 nn->reconfig_timer_active = true;
141 mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
142}
143
144static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
145{
146 u32 reg;
147
148 reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
149 if (reg == 0)
150 return true;
151 if (reg & NFP_NET_CFG_UPDATE_ERR) {
152 nn_err(nn, "Reconfig error: 0x%08x\n", reg);
153 return true;
154 } else if (last_check) {
155 nn_err(nn, "Reconfig timeout: 0x%08x\n", reg);
156 return true;
157 }
158
159 return false;
160}
161
162static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
163{
164 bool timed_out = false;
165
166 /* Poll update field, waiting for NFP to ack the config */
167 while (!nfp_net_reconfig_check_done(nn, timed_out)) {
168 msleep(1);
169 timed_out = time_is_before_eq_jiffies(deadline);
170 }
171
172 if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
173 return -EIO;
174
175 return timed_out ? -EIO : 0;
176}
177
178static void nfp_net_reconfig_timer(unsigned long data)
179{
180 struct nfp_net *nn = (void *)data;
181
182 spin_lock_bh(&nn->reconfig_lock);
183
184 nn->reconfig_timer_active = false;
185
186 /* If sync caller is present it will take over from us */
187 if (nn->reconfig_sync_present)
188 goto done;
189
190 /* Read reconfig status and report errors */
191 nfp_net_reconfig_check_done(nn, true);
192
193 if (nn->reconfig_posted)
194 nfp_net_reconfig_start_async(nn, 0);
195done:
196 spin_unlock_bh(&nn->reconfig_lock);
197}
198
199/**
200 * nfp_net_reconfig_post() - Post async reconfig request
201 * @nn: NFP Net device to reconfigure
202 * @update: The value for the update field in the BAR config
203 *
204 * Record FW reconfiguration request. Reconfiguration will be kicked off
205 * whenever reconfiguration machinery is idle. Multiple requests can be
206 * merged together!
207 */
208static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
209{
210 spin_lock_bh(&nn->reconfig_lock);
211
212 /* Sync caller will kick off async reconf when it's done, just post */
213 if (nn->reconfig_sync_present) {
214 nn->reconfig_posted |= update;
215 goto done;
216 }
217
218 /* Opportunistically check if the previous command is done */
219 if (!nn->reconfig_timer_active ||
220 nfp_net_reconfig_check_done(nn, false))
221 nfp_net_reconfig_start_async(nn, update);
222 else
223 nn->reconfig_posted |= update;
224done:
225 spin_unlock_bh(&nn->reconfig_lock);
226}
227
4c352362
JK
228/**
229 * nfp_net_reconfig() - Reconfigure the firmware
230 * @nn: NFP Net device to reconfigure
231 * @update: The value for the update field in the BAR config
232 *
233 * Write the update word to the BAR and ping the reconfig queue. The
234 * poll until the firmware has acknowledged the update by zeroing the
235 * update word.
236 *
237 * Return: Negative errno on error, 0 on success
238 */
239int nfp_net_reconfig(struct nfp_net *nn, u32 update)
240{
3d780b92
JK
241 bool cancelled_timer = false;
242 u32 pre_posted_requests;
243 int ret;
4c352362
JK
244
245 spin_lock_bh(&nn->reconfig_lock);
246
3d780b92 247 nn->reconfig_sync_present = true;
4c352362 248
3d780b92
JK
249 if (nn->reconfig_timer_active) {
250 del_timer(&nn->reconfig_timer);
251 nn->reconfig_timer_active = false;
252 cancelled_timer = true;
253 }
254 pre_posted_requests = nn->reconfig_posted;
255 nn->reconfig_posted = 0;
256
257 spin_unlock_bh(&nn->reconfig_lock);
258
259 if (cancelled_timer)
260 nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
261
262 /* Run the posted reconfigs which were issued before we started */
263 if (pre_posted_requests) {
264 nfp_net_reconfig_start(nn, pre_posted_requests);
265 nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
4c352362
JK
266 }
267
3d780b92
JK
268 nfp_net_reconfig_start(nn, update);
269 ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
270
271 spin_lock_bh(&nn->reconfig_lock);
272
273 if (nn->reconfig_posted)
274 nfp_net_reconfig_start_async(nn, 0);
275
276 nn->reconfig_sync_present = false;
277
4c352362 278 spin_unlock_bh(&nn->reconfig_lock);
3d780b92 279
4c352362
JK
280 return ret;
281}
282
283/* Interrupt configuration and handling
284 */
285
4c352362
JK
286/**
287 * nfp_net_irq_unmask() - Unmask automasked interrupt
288 * @nn: NFP Network structure
289 * @entry_nr: MSI-X table entry
290 *
416db5c1 291 * Clear the ICR for the IRQ entry.
4c352362
JK
292 */
293static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
294{
4c352362
JK
295 nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
296 nn_pci_flush(nn);
297}
298
299/**
fdace6c2
JK
300 * nfp_net_irqs_alloc() - allocates MSI-X irqs
301 * @pdev: PCI device structure
302 * @irq_entries: Array to be initialized and used to hold the irq entries
303 * @min_irqs: Minimal acceptable number of interrupts
304 * @wanted_irqs: Target number of interrupts to allocate
4c352362 305 *
fdace6c2 306 * Return: Number of irqs obtained or 0 on error.
4c352362 307 */
fdace6c2
JK
308unsigned int
309nfp_net_irqs_alloc(struct pci_dev *pdev, struct msix_entry *irq_entries,
310 unsigned int min_irqs, unsigned int wanted_irqs)
4c352362 311{
fdace6c2
JK
312 unsigned int i;
313 int got_irqs;
4c352362 314
fdace6c2
JK
315 for (i = 0; i < wanted_irqs; i++)
316 irq_entries[i].entry = i;
4c352362 317
fdace6c2
JK
318 got_irqs = pci_enable_msix_range(pdev, irq_entries,
319 min_irqs, wanted_irqs);
320 if (got_irqs < 0) {
321 dev_err(&pdev->dev, "Failed to enable %d-%d MSI-X (err=%d)\n",
322 min_irqs, wanted_irqs, got_irqs);
4c352362
JK
323 return 0;
324 }
325
fdace6c2
JK
326 if (got_irqs < wanted_irqs)
327 dev_warn(&pdev->dev, "Unable to allocate %d IRQs got only %d\n",
328 wanted_irqs, got_irqs);
329
330 return got_irqs;
4c352362
JK
331}
332
4c352362 333/**
fdace6c2
JK
334 * nfp_net_irqs_assign() - Assign interrupts allocated externally to netdev
335 * @nn: NFP Network structure
336 * @irq_entries: Table of allocated interrupts
337 * @n: Size of @irq_entries (number of entries to grab)
4c352362 338 *
fdace6c2
JK
339 * After interrupts are allocated with nfp_net_irqs_alloc() this function
340 * should be called to assign them to a specific netdev (port).
4c352362 341 */
fdace6c2
JK
342void
343nfp_net_irqs_assign(struct nfp_net *nn, struct msix_entry *irq_entries,
344 unsigned int n)
4c352362 345{
79c12a75
JK
346 struct nfp_net_dp *dp = &nn->dp;
347
b33ae997 348 nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
79c12a75 349 dp->num_r_vecs = nn->max_r_vecs;
4c352362 350
fdace6c2 351 memcpy(nn->irq_entries, irq_entries, sizeof(*irq_entries) * n);
4c352362 352
79c12a75
JK
353 if (dp->num_rx_rings > dp->num_r_vecs ||
354 dp->num_tx_rings > dp->num_r_vecs)
87232d96
JK
355 dev_warn(nn->dp.dev, "More rings (%d,%d) than vectors (%d).\n",
356 dp->num_rx_rings, dp->num_tx_rings,
357 dp->num_r_vecs);
fdace6c2 358
79c12a75
JK
359 dp->num_rx_rings = min(dp->num_r_vecs, dp->num_rx_rings);
360 dp->num_tx_rings = min(dp->num_r_vecs, dp->num_tx_rings);
361 dp->num_stack_tx_rings = dp->num_tx_rings;
4c352362
JK
362}
363
364/**
365 * nfp_net_irqs_disable() - Disable interrupts
fdace6c2 366 * @pdev: PCI device structure
4c352362
JK
367 *
368 * Undoes what @nfp_net_irqs_alloc() does.
369 */
fdace6c2 370void nfp_net_irqs_disable(struct pci_dev *pdev)
4c352362 371{
fdace6c2 372 pci_disable_msix(pdev);
4c352362
JK
373}
374
375/**
376 * nfp_net_irq_rxtx() - Interrupt service routine for RX/TX rings.
377 * @irq: Interrupt
378 * @data: Opaque data structure
379 *
380 * Return: Indicate if the interrupt has been handled.
381 */
382static irqreturn_t nfp_net_irq_rxtx(int irq, void *data)
383{
384 struct nfp_net_r_vector *r_vec = data;
385
386 napi_schedule_irqoff(&r_vec->napi);
387
388 /* The FW auto-masks any interrupt, either via the MASK bit in
389 * the MSI-X table or via the per entry ICR field. So there
390 * is no need to disable interrupts here.
391 */
392 return IRQ_HANDLED;
393}
394
395/**
396 * nfp_net_read_link_status() - Reread link status from control BAR
397 * @nn: NFP Network structure
398 */
399static void nfp_net_read_link_status(struct nfp_net *nn)
400{
401 unsigned long flags;
402 bool link_up;
403 u32 sts;
404
405 spin_lock_irqsave(&nn->link_status_lock, flags);
406
407 sts = nn_readl(nn, NFP_NET_CFG_STS);
408 link_up = !!(sts & NFP_NET_CFG_STS_LINK);
409
410 if (nn->link_up == link_up)
411 goto out;
412
413 nn->link_up = link_up;
6d4f8cba
JK
414 if (nn->port)
415 set_bit(NFP_PORT_CHANGED, &nn->port->flags);
4c352362
JK
416
417 if (nn->link_up) {
79c12a75
JK
418 netif_carrier_on(nn->dp.netdev);
419 netdev_info(nn->dp.netdev, "NIC Link is Up\n");
4c352362 420 } else {
79c12a75
JK
421 netif_carrier_off(nn->dp.netdev);
422 netdev_info(nn->dp.netdev, "NIC Link is Down\n");
4c352362
JK
423 }
424out:
425 spin_unlock_irqrestore(&nn->link_status_lock, flags);
426}
427
428/**
429 * nfp_net_irq_lsc() - Interrupt service routine for link state changes
430 * @irq: Interrupt
431 * @data: Opaque data structure
432 *
433 * Return: Indicate if the interrupt has been handled.
434 */
435static irqreturn_t nfp_net_irq_lsc(int irq, void *data)
436{
437 struct nfp_net *nn = data;
fdace6c2
JK
438 struct msix_entry *entry;
439
440 entry = &nn->irq_entries[NFP_NET_IRQ_LSC_IDX];
4c352362
JK
441
442 nfp_net_read_link_status(nn);
443
fdace6c2 444 nfp_net_irq_unmask(nn, entry->entry);
4c352362
JK
445
446 return IRQ_HANDLED;
447}
448
449/**
450 * nfp_net_irq_exn() - Interrupt service routine for exceptions
451 * @irq: Interrupt
452 * @data: Opaque data structure
453 *
454 * Return: Indicate if the interrupt has been handled.
455 */
456static irqreturn_t nfp_net_irq_exn(int irq, void *data)
457{
458 struct nfp_net *nn = data;
459
460 nn_err(nn, "%s: UNIMPLEMENTED.\n", __func__);
461 /* XXX TO BE IMPLEMENTED */
462 return IRQ_HANDLED;
463}
464
465/**
466 * nfp_net_tx_ring_init() - Fill in the boilerplate for a TX ring
467 * @tx_ring: TX ring structure
d79737c2
JK
468 * @r_vec: IRQ vector servicing this ring
469 * @idx: Ring index
92e68195 470 * @is_xdp: Is this an XDP TX ring?
4c352362 471 */
d79737c2
JK
472static void
473nfp_net_tx_ring_init(struct nfp_net_tx_ring *tx_ring,
92e68195
JK
474 struct nfp_net_r_vector *r_vec, unsigned int idx,
475 bool is_xdp)
4c352362 476{
4c352362
JK
477 struct nfp_net *nn = r_vec->nfp_net;
478
d79737c2
JK
479 tx_ring->idx = idx;
480 tx_ring->r_vec = r_vec;
92e68195 481 tx_ring->is_xdp = is_xdp;
d79737c2 482
4c352362
JK
483 tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
484 tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
485}
486
487/**
488 * nfp_net_rx_ring_init() - Fill in the boilerplate for a RX ring
489 * @rx_ring: RX ring structure
d79737c2
JK
490 * @r_vec: IRQ vector servicing this ring
491 * @idx: Ring index
4c352362 492 */
d79737c2
JK
493static void
494nfp_net_rx_ring_init(struct nfp_net_rx_ring *rx_ring,
495 struct nfp_net_r_vector *r_vec, unsigned int idx)
4c352362 496{
4c352362
JK
497 struct nfp_net *nn = r_vec->nfp_net;
498
d79737c2
JK
499 rx_ring->idx = idx;
500 rx_ring->r_vec = r_vec;
501
4c352362 502 rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
4c352362 503 rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
4c352362
JK
504}
505
506/**
fdace6c2 507 * nfp_net_vecs_init() - Assign IRQs and setup rvecs.
beba69ca 508 * @nn: NFP Network structure
4c352362 509 */
beba69ca 510static void nfp_net_vecs_init(struct nfp_net *nn)
4c352362 511{
4c352362
JK
512 struct nfp_net_r_vector *r_vec;
513 int r;
514
4c352362
JK
515 nn->lsc_handler = nfp_net_irq_lsc;
516 nn->exn_handler = nfp_net_irq_exn;
517
164d1e9e 518 for (r = 0; r < nn->max_r_vecs; r++) {
fdace6c2
JK
519 struct msix_entry *entry;
520
521 entry = &nn->irq_entries[NFP_NET_NON_Q_VECTORS + r];
522
4c352362
JK
523 r_vec = &nn->r_vecs[r];
524 r_vec->nfp_net = nn;
525 r_vec->handler = nfp_net_irq_rxtx;
fdace6c2
JK
526 r_vec->irq_entry = entry->entry;
527 r_vec->irq_vector = entry->vector;
4c352362
JK
528
529 cpumask_set_cpu(r, &r_vec->affinity_mask);
4c352362
JK
530 }
531}
532
533/**
534 * nfp_net_aux_irq_request() - Request an auxiliary interrupt (LSC or EXN)
535 * @nn: NFP Network structure
536 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
537 * @format: printf-style format to construct the interrupt name
538 * @name: Pointer to allocated space for interrupt name
539 * @name_sz: Size of space for interrupt name
540 * @vector_idx: Index of MSI-X vector used for this interrupt
541 * @handler: IRQ handler to register for this interrupt
542 */
543static int
544nfp_net_aux_irq_request(struct nfp_net *nn, u32 ctrl_offset,
545 const char *format, char *name, size_t name_sz,
546 unsigned int vector_idx, irq_handler_t handler)
547{
548 struct msix_entry *entry;
549 int err;
550
551 entry = &nn->irq_entries[vector_idx];
552
79c12a75 553 snprintf(name, name_sz, format, netdev_name(nn->dp.netdev));
4c352362
JK
554 err = request_irq(entry->vector, handler, 0, name, nn);
555 if (err) {
556 nn_err(nn, "Failed to request IRQ %d (err=%d).\n",
557 entry->vector, err);
558 return err;
559 }
fdace6c2 560 nn_writeb(nn, ctrl_offset, entry->entry);
4c352362
JK
561
562 return 0;
563}
564
565/**
566 * nfp_net_aux_irq_free() - Free an auxiliary interrupt (LSC or EXN)
567 * @nn: NFP Network structure
568 * @ctrl_offset: Control BAR offset where IRQ configuration should be written
569 * @vector_idx: Index of MSI-X vector used for this interrupt
570 */
571static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
572 unsigned int vector_idx)
573{
574 nn_writeb(nn, ctrl_offset, 0xff);
575 free_irq(nn->irq_entries[vector_idx].vector, nn);
576}
577
578/* Transmit
579 *
580 * One queue controller peripheral queue is used for transmit. The
581 * driver en-queues packets for transmit by advancing the write
582 * pointer. The device indicates that packets have transmitted by
583 * advancing the read pointer. The driver maintains a local copy of
584 * the read and write pointer in @struct nfp_net_tx_ring. The driver
585 * keeps @wr_p in sync with the queue controller write pointer and can
586 * determine how many packets have been transmitted by comparing its
587 * copy of the read pointer @rd_p with the read pointer maintained by
588 * the queue controller peripheral.
589 */
590
591/**
592 * nfp_net_tx_full() - Check if the TX ring is full
593 * @tx_ring: TX ring to check
594 * @dcnt: Number of descriptors that need to be enqueued (must be >= 1)
595 *
596 * This function checks, based on the *host copy* of read/write
597 * pointer if a given TX ring is full. The real TX queue may have
598 * some newly made available slots.
599 *
600 * Return: True if the ring is full.
601 */
fa95f1d2 602static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
4c352362
JK
603{
604 return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
605}
606
607/* Wrappers for deciding when to stop and restart TX queues */
608static int nfp_net_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring)
609{
610 return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4);
611}
612
613static int nfp_net_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring)
614{
615 return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1);
616}
617
618/**
619 * nfp_net_tx_ring_stop() - stop tx ring
620 * @nd_q: netdev queue
621 * @tx_ring: driver tx queue structure
622 *
623 * Safely stop TX ring. Remember that while we are running .start_xmit()
624 * someone else may be cleaning the TX ring completions so we need to be
625 * extra careful here.
626 */
627static void nfp_net_tx_ring_stop(struct netdev_queue *nd_q,
628 struct nfp_net_tx_ring *tx_ring)
629{
630 netif_tx_stop_queue(nd_q);
631
632 /* We can race with the TX completion out of NAPI so recheck */
633 smp_mb();
634 if (unlikely(nfp_net_tx_ring_should_wake(tx_ring)))
635 netif_tx_start_queue(nd_q);
636}
637
638/**
639 * nfp_net_tx_tso() - Set up Tx descriptor for LSO
4c352362
JK
640 * @r_vec: per-ring structure
641 * @txbuf: Pointer to driver soft TX descriptor
642 * @txd: Pointer to HW TX descriptor
643 * @skb: Pointer to SKB
644 *
645 * Set up Tx descriptor for LSO, do nothing for non-LSO skbs.
646 * Return error on packet header greater than maximum supported LSO header size.
647 */
79c12a75 648static void nfp_net_tx_tso(struct nfp_net_r_vector *r_vec,
4c352362
JK
649 struct nfp_net_tx_buf *txbuf,
650 struct nfp_net_tx_desc *txd, struct sk_buff *skb)
651{
652 u32 hdrlen;
653 u16 mss;
654
655 if (!skb_is_gso(skb))
656 return;
657
28063be6
EP
658 if (!skb->encapsulation) {
659 txd->l3_offset = skb_network_offset(skb);
660 txd->l4_offset = skb_transport_offset(skb);
4c352362 661 hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
28063be6
EP
662 } else {
663 txd->l3_offset = skb_inner_network_offset(skb);
664 txd->l4_offset = skb_inner_transport_offset(skb);
4c352362
JK
665 hdrlen = skb_inner_transport_header(skb) - skb->data +
666 inner_tcp_hdrlen(skb);
28063be6 667 }
4c352362
JK
668
669 txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs;
670 txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1);
671
672 mss = skb_shinfo(skb)->gso_size & PCIE_DESC_TX_MSS_MASK;
e53fc9fa 673 txd->lso_hdrlen = hdrlen;
4c352362
JK
674 txd->mss = cpu_to_le16(mss);
675 txd->flags |= PCIE_DESC_TX_LSO;
676
677 u64_stats_update_begin(&r_vec->tx_sync);
678 r_vec->tx_lso++;
679 u64_stats_update_end(&r_vec->tx_sync);
680}
681
682/**
683 * nfp_net_tx_csum() - Set TX CSUM offload flags in TX descriptor
79c12a75 684 * @dp: NFP Net data path struct
4c352362
JK
685 * @r_vec: per-ring structure
686 * @txbuf: Pointer to driver soft TX descriptor
687 * @txd: Pointer to TX descriptor
688 * @skb: Pointer to SKB
689 *
690 * This function sets the TX checksum flags in the TX descriptor based
691 * on the configuration and the protocol of the packet to be transmitted.
692 */
79c12a75
JK
693static void nfp_net_tx_csum(struct nfp_net_dp *dp,
694 struct nfp_net_r_vector *r_vec,
4c352362
JK
695 struct nfp_net_tx_buf *txbuf,
696 struct nfp_net_tx_desc *txd, struct sk_buff *skb)
697{
698 struct ipv6hdr *ipv6h;
699 struct iphdr *iph;
700 u8 l4_hdr;
701
79c12a75 702 if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM))
4c352362
JK
703 return;
704
705 if (skb->ip_summed != CHECKSUM_PARTIAL)
706 return;
707
708 txd->flags |= PCIE_DESC_TX_CSUM;
709 if (skb->encapsulation)
710 txd->flags |= PCIE_DESC_TX_ENCAP;
711
712 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
713 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
714
715 if (iph->version == 4) {
716 txd->flags |= PCIE_DESC_TX_IP4_CSUM;
717 l4_hdr = iph->protocol;
718 } else if (ipv6h->version == 6) {
719 l4_hdr = ipv6h->nexthdr;
720 } else {
79c12a75 721 nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version);
4c352362
JK
722 return;
723 }
724
725 switch (l4_hdr) {
726 case IPPROTO_TCP:
727 txd->flags |= PCIE_DESC_TX_TCP_CSUM;
728 break;
729 case IPPROTO_UDP:
730 txd->flags |= PCIE_DESC_TX_UDP_CSUM;
731 break;
732 default:
79c12a75 733 nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr);
4c352362
JK
734 return;
735 }
736
737 u64_stats_update_begin(&r_vec->tx_sync);
738 if (skb->encapsulation)
739 r_vec->hw_csum_tx_inner += txbuf->pkt_cnt;
740 else
741 r_vec->hw_csum_tx += txbuf->pkt_cnt;
742 u64_stats_update_end(&r_vec->tx_sync);
743}
744
ecd63a02
JK
745static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
746{
747 wmb();
748 nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
749 tx_ring->wr_ptr_add = 0;
750}
751
4c352362
JK
752/**
753 * nfp_net_tx() - Main transmit entry point
754 * @skb: SKB to transmit
755 * @netdev: netdev structure
756 *
757 * Return: NETDEV_TX_OK on success.
758 */
759static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
760{
761 struct nfp_net *nn = netdev_priv(netdev);
762 const struct skb_frag_struct *frag;
4c352362 763 struct nfp_net_tx_desc *txd, txdg;
4c352362 764 struct nfp_net_tx_ring *tx_ring;
bef6b1b7
JK
765 struct nfp_net_r_vector *r_vec;
766 struct nfp_net_tx_buf *txbuf;
4c352362 767 struct netdev_queue *nd_q;
79c12a75 768 struct nfp_net_dp *dp;
4c352362
JK
769 dma_addr_t dma_addr;
770 unsigned int fsize;
771 int f, nr_frags;
772 int wr_idx;
773 u16 qidx;
774
79c12a75 775 dp = &nn->dp;
4c352362 776 qidx = skb_get_queue_mapping(skb);
79c12a75 777 tx_ring = &dp->tx_rings[qidx];
4c352362 778 r_vec = tx_ring->r_vec;
79c12a75 779 nd_q = netdev_get_tx_queue(dp->netdev, qidx);
4c352362
JK
780
781 nr_frags = skb_shinfo(skb)->nr_frags;
782
783 if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) {
79c12a75
JK
784 nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n",
785 qidx, tx_ring->wr_p, tx_ring->rd_p);
4c352362 786 netif_tx_stop_queue(nd_q);
28b0cfee 787 nfp_net_tx_xmit_more_flush(tx_ring);
4c352362
JK
788 u64_stats_update_begin(&r_vec->tx_sync);
789 r_vec->tx_busy++;
790 u64_stats_update_end(&r_vec->tx_sync);
791 return NETDEV_TX_BUSY;
792 }
793
794 /* Start with the head skbuf */
79c12a75 795 dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb),
4c352362 796 DMA_TO_DEVICE);
79c12a75 797 if (dma_mapping_error(dp->dev, dma_addr))
4c352362
JK
798 goto err_free;
799
4aa3b766 800 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
4c352362
JK
801
802 /* Stash the soft descriptor of the head then initialize it */
803 txbuf = &tx_ring->txbufs[wr_idx];
804 txbuf->skb = skb;
805 txbuf->dma_addr = dma_addr;
806 txbuf->fidx = -1;
807 txbuf->pkt_cnt = 1;
808 txbuf->real_len = skb->len;
809
810 /* Build TX descriptor */
811 txd = &tx_ring->txds[wr_idx];
812 txd->offset_eop = (nr_frags == 0) ? PCIE_DESC_TX_EOP : 0;
813 txd->dma_len = cpu_to_le16(skb_headlen(skb));
814 nfp_desc_set_dma_addr(txd, dma_addr);
815 txd->data_len = cpu_to_le16(skb->len);
816
817 txd->flags = 0;
818 txd->mss = 0;
e53fc9fa 819 txd->lso_hdrlen = 0;
4c352362 820
28063be6 821 /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */
79c12a75 822 nfp_net_tx_tso(r_vec, txbuf, txd, skb);
79c12a75 823 nfp_net_tx_csum(dp, r_vec, txbuf, txd, skb);
79c12a75 824 if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) {
4c352362
JK
825 txd->flags |= PCIE_DESC_TX_VLAN;
826 txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
827 }
828
829 /* Gather DMA */
830 if (nr_frags > 0) {
831 /* all descs must match except for in addr, length and eop */
832 txdg = *txd;
833
834 for (f = 0; f < nr_frags; f++) {
835 frag = &skb_shinfo(skb)->frags[f];
836 fsize = skb_frag_size(frag);
837
79c12a75 838 dma_addr = skb_frag_dma_map(dp->dev, frag, 0,
4c352362 839 fsize, DMA_TO_DEVICE);
79c12a75 840 if (dma_mapping_error(dp->dev, dma_addr))
4c352362
JK
841 goto err_unmap;
842
4aa3b766 843 wr_idx = D_IDX(tx_ring, wr_idx + 1);
4c352362
JK
844 tx_ring->txbufs[wr_idx].skb = skb;
845 tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
846 tx_ring->txbufs[wr_idx].fidx = f;
847
848 txd = &tx_ring->txds[wr_idx];
849 *txd = txdg;
850 txd->dma_len = cpu_to_le16(fsize);
851 nfp_desc_set_dma_addr(txd, dma_addr);
852 txd->offset_eop =
853 (f == nr_frags - 1) ? PCIE_DESC_TX_EOP : 0;
854 }
855
856 u64_stats_update_begin(&r_vec->tx_sync);
857 r_vec->tx_gather++;
858 u64_stats_update_end(&r_vec->tx_sync);
859 }
860
861 netdev_tx_sent_queue(nd_q, txbuf->real_len);
862
863 tx_ring->wr_p += nr_frags + 1;
864 if (nfp_net_tx_ring_should_stop(tx_ring))
865 nfp_net_tx_ring_stop(nd_q, tx_ring);
866
867 tx_ring->wr_ptr_add += nr_frags + 1;
ecd63a02
JK
868 if (!skb->xmit_more || netif_xmit_stopped(nd_q))
869 nfp_net_tx_xmit_more_flush(tx_ring);
4c352362
JK
870
871 skb_tx_timestamp(skb);
872
873 return NETDEV_TX_OK;
874
875err_unmap:
876 --f;
877 while (f >= 0) {
878 frag = &skb_shinfo(skb)->frags[f];
79c12a75 879 dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
4c352362
JK
880 skb_frag_size(frag), DMA_TO_DEVICE);
881 tx_ring->txbufs[wr_idx].skb = NULL;
882 tx_ring->txbufs[wr_idx].dma_addr = 0;
883 tx_ring->txbufs[wr_idx].fidx = -2;
884 wr_idx = wr_idx - 1;
885 if (wr_idx < 0)
886 wr_idx += tx_ring->cnt;
887 }
79c12a75 888 dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr,
4c352362
JK
889 skb_headlen(skb), DMA_TO_DEVICE);
890 tx_ring->txbufs[wr_idx].skb = NULL;
891 tx_ring->txbufs[wr_idx].dma_addr = 0;
892 tx_ring->txbufs[wr_idx].fidx = -2;
893err_free:
79c12a75 894 nn_dp_warn(dp, "Failed to map DMA TX buffer\n");
28b0cfee 895 nfp_net_tx_xmit_more_flush(tx_ring);
4c352362
JK
896 u64_stats_update_begin(&r_vec->tx_sync);
897 r_vec->tx_errors++;
898 u64_stats_update_end(&r_vec->tx_sync);
899 dev_kfree_skb_any(skb);
900 return NETDEV_TX_OK;
901}
902
903/**
904 * nfp_net_tx_complete() - Handled completed TX packets
905 * @tx_ring: TX ring structure
906 *
907 * Return: Number of completed TX descriptors
908 */
909static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
910{
911 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
79c12a75 912 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
4c352362
JK
913 const struct skb_frag_struct *frag;
914 struct netdev_queue *nd_q;
915 u32 done_pkts = 0, done_bytes = 0;
916 struct sk_buff *skb;
917 int todo, nr_frags;
918 u32 qcp_rd_p;
919 int fidx;
920 int idx;
921
d38df0d3
JK
922 if (tx_ring->wr_p == tx_ring->rd_p)
923 return;
924
4c352362
JK
925 /* Work out how many descriptors have been transmitted */
926 qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
927
928 if (qcp_rd_p == tx_ring->qcp_rd_p)
929 return;
930
770f0cea 931 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
4c352362
JK
932
933 while (todo--) {
4aa3b766 934 idx = D_IDX(tx_ring, tx_ring->rd_p++);
4c352362
JK
935
936 skb = tx_ring->txbufs[idx].skb;
937 if (!skb)
938 continue;
939
940 nr_frags = skb_shinfo(skb)->nr_frags;
941 fidx = tx_ring->txbufs[idx].fidx;
942
943 if (fidx == -1) {
944 /* unmap head */
79c12a75 945 dma_unmap_single(dp->dev, tx_ring->txbufs[idx].dma_addr,
4c352362
JK
946 skb_headlen(skb), DMA_TO_DEVICE);
947
948 done_pkts += tx_ring->txbufs[idx].pkt_cnt;
949 done_bytes += tx_ring->txbufs[idx].real_len;
950 } else {
951 /* unmap fragment */
952 frag = &skb_shinfo(skb)->frags[fidx];
79c12a75 953 dma_unmap_page(dp->dev, tx_ring->txbufs[idx].dma_addr,
4c352362
JK
954 skb_frag_size(frag), DMA_TO_DEVICE);
955 }
956
957 /* check for last gather fragment */
958 if (fidx == nr_frags - 1)
959 dev_kfree_skb_any(skb);
960
961 tx_ring->txbufs[idx].dma_addr = 0;
962 tx_ring->txbufs[idx].skb = NULL;
963 tx_ring->txbufs[idx].fidx = -2;
964 }
965
966 tx_ring->qcp_rd_p = qcp_rd_p;
967
968 u64_stats_update_begin(&r_vec->tx_sync);
969 r_vec->tx_bytes += done_bytes;
970 r_vec->tx_pkts += done_pkts;
971 u64_stats_update_end(&r_vec->tx_sync);
972
79c12a75 973 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
4c352362
JK
974 netdev_tx_completed_queue(nd_q, done_pkts, done_bytes);
975 if (nfp_net_tx_ring_should_wake(tx_ring)) {
976 /* Make sure TX thread will see updated tx_ring->rd_p */
977 smp_mb();
978
979 if (unlikely(netif_tx_queue_stopped(nd_q)))
980 netif_tx_wake_queue(nd_q);
981 }
982
983 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
984 "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
985 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
986}
987
abeeec4a 988static bool nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
ecd63a02
JK
989{
990 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
ecd63a02 991 u32 done_pkts = 0, done_bytes = 0;
abeeec4a 992 bool done_all;
ecd63a02
JK
993 int idx, todo;
994 u32 qcp_rd_p;
995
996 /* Work out how many descriptors have been transmitted */
997 qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
998
999 if (qcp_rd_p == tx_ring->qcp_rd_p)
abeeec4a 1000 return true;
ecd63a02 1001
770f0cea 1002 todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p);
ecd63a02 1003
abeeec4a
JK
1004 done_all = todo <= NFP_NET_XDP_MAX_COMPLETE;
1005 todo = min(todo, NFP_NET_XDP_MAX_COMPLETE);
1006
4aa3b766 1007 tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo);
abeeec4a 1008
92e68195 1009 done_pkts = todo;
ecd63a02 1010 while (todo--) {
4aa3b766 1011 idx = D_IDX(tx_ring, tx_ring->rd_p);
ecd63a02
JK
1012 tx_ring->rd_p++;
1013
ecd63a02 1014 done_bytes += tx_ring->txbufs[idx].real_len;
ecd63a02
JK
1015 }
1016
ecd63a02
JK
1017 u64_stats_update_begin(&r_vec->tx_sync);
1018 r_vec->tx_bytes += done_bytes;
1019 r_vec->tx_pkts += done_pkts;
1020 u64_stats_update_end(&r_vec->tx_sync);
1021
1022 WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
abeeec4a 1023 "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
ecd63a02 1024 tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
abeeec4a
JK
1025
1026 return done_all;
ecd63a02
JK
1027}
1028
4c352362 1029/**
827deea9 1030 * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
79c12a75 1031 * @dp: NFP Net data path struct
827deea9 1032 * @tx_ring: TX ring structure
4c352362
JK
1033 *
1034 * Assumes that the device is stopped
1035 */
827deea9 1036static void
79c12a75 1037nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
4c352362 1038{
4c352362 1039 const struct skb_frag_struct *frag;
ecd63a02 1040 struct netdev_queue *nd_q;
4c352362 1041
92e68195 1042 while (!tx_ring->is_xdp && tx_ring->rd_p != tx_ring->wr_p) {
ecd63a02 1043 struct nfp_net_tx_buf *tx_buf;
92e68195
JK
1044 struct sk_buff *skb;
1045 int idx, nr_frags;
4c352362 1046
4aa3b766 1047 idx = D_IDX(tx_ring, tx_ring->rd_p);
ecd63a02 1048 tx_buf = &tx_ring->txbufs[idx];
827deea9 1049
92e68195
JK
1050 skb = tx_ring->txbufs[idx].skb;
1051 nr_frags = skb_shinfo(skb)->nr_frags;
4c352362 1052
92e68195
JK
1053 if (tx_buf->fidx == -1) {
1054 /* unmap head */
1055 dma_unmap_single(dp->dev, tx_buf->dma_addr,
1056 skb_headlen(skb), DMA_TO_DEVICE);
1057 } else {
1058 /* unmap fragment */
1059 frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
1060 dma_unmap_page(dp->dev, tx_buf->dma_addr,
1061 skb_frag_size(frag), DMA_TO_DEVICE);
ecd63a02 1062 }
827deea9 1063
92e68195
JK
1064 /* check for last gather fragment */
1065 if (tx_buf->fidx == nr_frags - 1)
1066 dev_kfree_skb_any(skb);
1067
ecd63a02
JK
1068 tx_buf->dma_addr = 0;
1069 tx_buf->skb = NULL;
1070 tx_buf->fidx = -2;
4c352362
JK
1071
1072 tx_ring->qcp_rd_p++;
1073 tx_ring->rd_p++;
1074 }
1075
827deea9
JK
1076 memset(tx_ring->txds, 0, sizeof(*tx_ring->txds) * tx_ring->cnt);
1077 tx_ring->wr_p = 0;
1078 tx_ring->rd_p = 0;
1079 tx_ring->qcp_rd_p = 0;
1080 tx_ring->wr_ptr_add = 0;
1081
92e68195 1082 if (tx_ring->is_xdp)
ecd63a02
JK
1083 return;
1084
79c12a75 1085 nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx);
4c352362
JK
1086 netdev_tx_reset_queue(nd_q);
1087}
1088
1089static void nfp_net_tx_timeout(struct net_device *netdev)
1090{
1091 struct nfp_net *nn = netdev_priv(netdev);
1092 int i;
1093
79c12a75 1094 for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
4c352362
JK
1095 if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
1096 continue;
1097 nn_warn(nn, "TX timeout on ring: %d\n", i);
1098 }
1099 nn_warn(nn, "TX watchdog timeout\n");
1100}
1101
1102/* Receive processing
1103 */
bf187ea0 1104static unsigned int
76e1e1a8 1105nfp_net_calc_fl_bufsz(struct nfp_net_dp *dp)
bf187ea0
JK
1106{
1107 unsigned int fl_bufsz;
1108
c0f031bc 1109 fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
6fe0c3b4 1110 fl_bufsz += dp->rx_dma_off;
79c12a75 1111 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
c0f031bc 1112 fl_bufsz += NFP_NET_MAX_PREPEND;
bf187ea0 1113 else
79c12a75 1114 fl_bufsz += dp->rx_offset;
76e1e1a8 1115 fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + dp->mtu;
bf187ea0 1116
c0f031bc
JK
1117 fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
1118 fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1119
bf187ea0
JK
1120 return fl_bufsz;
1121}
4c352362 1122
ecd63a02
JK
1123static void
1124nfp_net_free_frag(void *frag, bool xdp)
1125{
1126 if (!xdp)
1127 skb_free_frag(frag);
1128 else
1129 __free_page(virt_to_page(frag));
1130}
1131
4c352362 1132/**
c0f031bc 1133 * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
783496b0 1134 * @dp: NFP Net data path struct
4c352362
JK
1135 * @dma_addr: Pointer to storage for DMA address (output param)
1136 *
c0f031bc 1137 * This function will allcate a new page frag, map it for DMA.
4c352362 1138 *
c0f031bc 1139 * Return: allocated page frag or NULL on failure.
4c352362 1140 */
d78005a5 1141static void *nfp_net_rx_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
4c352362 1142{
c0f031bc 1143 void *frag;
4c352362 1144
9dc6b116 1145 if (!dp->xdp_prog)
2195c263 1146 frag = netdev_alloc_frag(dp->fl_bufsz);
ecd63a02
JK
1147 else
1148 frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD));
c0f031bc 1149 if (!frag) {
79c12a75 1150 nn_dp_warn(dp, "Failed to alloc receive page frag\n");
4c352362
JK
1151 return NULL;
1152 }
1153
c487e6b1 1154 *dma_addr = nfp_net_dma_map_rx(dp, frag);
79c12a75 1155 if (dma_mapping_error(dp->dev, *dma_addr)) {
9dc6b116 1156 nfp_net_free_frag(frag, dp->xdp_prog);
79c12a75 1157 nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
4c352362
JK
1158 return NULL;
1159 }
1160
c0f031bc 1161 return frag;
4c352362
JK
1162}
1163
c487e6b1 1164static void *nfp_net_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr)
b64b7bb6
JK
1165{
1166 void *frag;
1167
79c12a75
JK
1168 if (!dp->xdp_prog)
1169 frag = napi_alloc_frag(dp->fl_bufsz);
ecd63a02
JK
1170 else
1171 frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD));
b64b7bb6 1172 if (!frag) {
79c12a75 1173 nn_dp_warn(dp, "Failed to alloc receive page frag\n");
b64b7bb6
JK
1174 return NULL;
1175 }
1176
c487e6b1 1177 *dma_addr = nfp_net_dma_map_rx(dp, frag);
79c12a75
JK
1178 if (dma_mapping_error(dp->dev, *dma_addr)) {
1179 nfp_net_free_frag(frag, dp->xdp_prog);
1180 nn_dp_warn(dp, "Failed to map DMA RX buffer\n");
b64b7bb6
JK
1181 return NULL;
1182 }
1183
1184 return frag;
1185}
1186
4c352362
JK
1187/**
1188 * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
6fe0c3b4 1189 * @dp: NFP Net data path struct
4c352362 1190 * @rx_ring: RX ring structure
c0f031bc 1191 * @frag: page fragment buffer
4c352362
JK
1192 * @dma_addr: DMA address of skb mapping
1193 */
6fe0c3b4
JK
1194static void nfp_net_rx_give_one(const struct nfp_net_dp *dp,
1195 struct nfp_net_rx_ring *rx_ring,
c0f031bc 1196 void *frag, dma_addr_t dma_addr)
4c352362
JK
1197{
1198 unsigned int wr_idx;
1199
4aa3b766 1200 wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
4c352362 1201
5cd4fbea
JK
1202 nfp_net_dma_sync_dev_rx(dp, dma_addr);
1203
4c352362 1204 /* Stash SKB and DMA address away */
c0f031bc 1205 rx_ring->rxbufs[wr_idx].frag = frag;
4c352362
JK
1206 rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
1207
1208 /* Fill freelist descriptor */
1209 rx_ring->rxds[wr_idx].fld.reserved = 0;
1210 rx_ring->rxds[wr_idx].fld.meta_len_dd = 0;
6fe0c3b4
JK
1211 nfp_desc_set_dma_addr(&rx_ring->rxds[wr_idx].fld,
1212 dma_addr + dp->rx_dma_off);
4c352362
JK
1213
1214 rx_ring->wr_p++;
9ed9ea70 1215 if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) {
4c352362
JK
1216 /* Update write pointer of the freelist queue. Make
1217 * sure all writes are flushed before telling the hardware.
1218 */
1219 wmb();
9ed9ea70 1220 nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH);
4c352362
JK
1221 }
1222}
1223
1224/**
1934680f
JK
1225 * nfp_net_rx_ring_reset() - Reflect in SW state of freelist after disable
1226 * @rx_ring: RX ring structure
4c352362 1227 *
1934680f
JK
1228 * Warning: Do *not* call if ring buffers were never put on the FW freelist
1229 * (i.e. device was not enabled)!
4c352362 1230 */
1934680f 1231static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
4c352362 1232{
1934680f 1233 unsigned int wr_idx, last_idx;
4c352362 1234
1934680f 1235 /* Move the empty entry to the end of the list */
4aa3b766 1236 wr_idx = D_IDX(rx_ring, rx_ring->wr_p);
1934680f
JK
1237 last_idx = rx_ring->cnt - 1;
1238 rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
c0f031bc 1239 rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
1934680f 1240 rx_ring->rxbufs[last_idx].dma_addr = 0;
c0f031bc 1241 rx_ring->rxbufs[last_idx].frag = NULL;
4c352362 1242
1934680f
JK
1243 memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
1244 rx_ring->wr_p = 0;
1245 rx_ring->rd_p = 0;
1934680f 1246}
4c352362 1247
1934680f
JK
1248/**
1249 * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
79c12a75 1250 * @dp: NFP Net data path struct
1934680f
JK
1251 * @rx_ring: RX ring to remove buffers from
1252 *
1253 * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
1254 * entries. After device is disabled nfp_net_rx_ring_reset() must be called
1255 * to restore required ring geometry.
1256 */
1257static void
79c12a75 1258nfp_net_rx_ring_bufs_free(struct nfp_net_dp *dp,
9dc6b116 1259 struct nfp_net_rx_ring *rx_ring)
1934680f 1260{
1934680f 1261 unsigned int i;
4c352362 1262
1934680f
JK
1263 for (i = 0; i < rx_ring->cnt - 1; i++) {
1264 /* NULL skb can only happen when initial filling of the ring
1265 * fails to allocate enough buffers and calls here to free
1266 * already allocated ones.
1267 */
c0f031bc 1268 if (!rx_ring->rxbufs[i].frag)
1934680f
JK
1269 continue;
1270
c487e6b1 1271 nfp_net_dma_unmap_rx(dp, rx_ring->rxbufs[i].dma_addr);
9dc6b116 1272 nfp_net_free_frag(rx_ring->rxbufs[i].frag, dp->xdp_prog);
1934680f 1273 rx_ring->rxbufs[i].dma_addr = 0;
c0f031bc 1274 rx_ring->rxbufs[i].frag = NULL;
4c352362
JK
1275 }
1276}
1277
1278/**
1934680f 1279 * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
79c12a75 1280 * @dp: NFP Net data path struct
1934680f 1281 * @rx_ring: RX ring to remove buffers from
4c352362 1282 */
1934680f 1283static int
79c12a75 1284nfp_net_rx_ring_bufs_alloc(struct nfp_net_dp *dp,
9dc6b116 1285 struct nfp_net_rx_ring *rx_ring)
4c352362 1286{
1934680f
JK
1287 struct nfp_net_rx_buf *rxbufs;
1288 unsigned int i;
1289
1290 rxbufs = rx_ring->rxbufs;
4c352362 1291
1934680f 1292 for (i = 0; i < rx_ring->cnt - 1; i++) {
d78005a5 1293 rxbufs[i].frag = nfp_net_rx_alloc_one(dp, &rxbufs[i].dma_addr);
c0f031bc 1294 if (!rxbufs[i].frag) {
9dc6b116 1295 nfp_net_rx_ring_bufs_free(dp, rx_ring);
4c352362
JK
1296 return -ENOMEM;
1297 }
4c352362
JK
1298 }
1299
1300 return 0;
1301}
1302
1934680f
JK
1303/**
1304 * nfp_net_rx_ring_fill_freelist() - Give buffers from the ring to FW
6fe0c3b4 1305 * @dp: NFP Net data path struct
1934680f
JK
1306 * @rx_ring: RX ring to fill
1307 */
6fe0c3b4
JK
1308static void
1309nfp_net_rx_ring_fill_freelist(struct nfp_net_dp *dp,
1310 struct nfp_net_rx_ring *rx_ring)
1934680f
JK
1311{
1312 unsigned int i;
1313
1314 for (i = 0; i < rx_ring->cnt - 1; i++)
6fe0c3b4 1315 nfp_net_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag,
1934680f
JK
1316 rx_ring->rxbufs[i].dma_addr);
1317}
1318
4c352362
JK
1319/**
1320 * nfp_net_rx_csum_has_errors() - group check if rxd has any csum errors
1321 * @flags: RX descriptor flags field in CPU byte order
1322 */
1323static int nfp_net_rx_csum_has_errors(u16 flags)
1324{
1325 u16 csum_all_checked, csum_all_ok;
1326
1327 csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL;
1328 csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK;
1329
1330 return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT);
1331}
1332
1333/**
1334 * nfp_net_rx_csum() - set SKB checksum field based on RX descriptor flags
79c12a75 1335 * @dp: NFP Net data path struct
4c352362
JK
1336 * @r_vec: per-ring structure
1337 * @rxd: Pointer to RX descriptor
ddb98d94 1338 * @meta: Parsed metadata prepend
4c352362
JK
1339 * @skb: Pointer to SKB
1340 */
79c12a75
JK
1341static void nfp_net_rx_csum(struct nfp_net_dp *dp,
1342 struct nfp_net_r_vector *r_vec,
ddb98d94
JK
1343 struct nfp_net_rx_desc *rxd,
1344 struct nfp_meta_parsed *meta, struct sk_buff *skb)
4c352362
JK
1345{
1346 skb_checksum_none_assert(skb);
1347
79c12a75 1348 if (!(dp->netdev->features & NETIF_F_RXCSUM))
4c352362
JK
1349 return;
1350
ddb98d94
JK
1351 if (meta->csum_type) {
1352 skb->ip_summed = meta->csum_type;
1353 skb->csum = meta->csum;
1354 u64_stats_update_begin(&r_vec->rx_sync);
1355 r_vec->hw_csum_rx_ok++;
1356 u64_stats_update_end(&r_vec->rx_sync);
1357 return;
1358 }
1359
4c352362
JK
1360 if (nfp_net_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) {
1361 u64_stats_update_begin(&r_vec->rx_sync);
1362 r_vec->hw_csum_rx_error++;
1363 u64_stats_update_end(&r_vec->rx_sync);
1364 return;
1365 }
1366
1367 /* Assume that the firmware will never report inner CSUM_OK unless outer
1368 * L4 headers were successfully parsed. FW will always report zero UDP
1369 * checksum as CSUM_OK.
1370 */
1371 if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK ||
1372 rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) {
1373 __skb_incr_checksum_unnecessary(skb);
1374 u64_stats_update_begin(&r_vec->rx_sync);
1375 r_vec->hw_csum_rx_ok++;
1376 u64_stats_update_end(&r_vec->rx_sync);
1377 }
1378
1379 if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK ||
1380 rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) {
1381 __skb_incr_checksum_unnecessary(skb);
1382 u64_stats_update_begin(&r_vec->rx_sync);
1383 r_vec->hw_csum_rx_inner_ok++;
1384 u64_stats_update_end(&r_vec->rx_sync);
1385 }
1386}
1387
e524a6a9
JK
1388static void
1389nfp_net_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta,
1390 unsigned int type, __be32 *hash)
4c352362 1391{
19d0f54e 1392 if (!(netdev->features & NETIF_F_RXHASH))
4c352362
JK
1393 return;
1394
19d0f54e 1395 switch (type) {
4c352362
JK
1396 case NFP_NET_RSS_IPV4:
1397 case NFP_NET_RSS_IPV6:
1398 case NFP_NET_RSS_IPV6_EX:
e524a6a9 1399 meta->hash_type = PKT_HASH_TYPE_L3;
4c352362
JK
1400 break;
1401 default:
e524a6a9 1402 meta->hash_type = PKT_HASH_TYPE_L4;
4c352362
JK
1403 break;
1404 }
e524a6a9
JK
1405
1406 meta->hash = get_unaligned_be32(hash);
4c352362
JK
1407}
1408
19d0f54e 1409static void
e524a6a9 1410nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
b92fb77f 1411 void *data, struct nfp_net_rx_desc *rxd)
19d0f54e 1412{
b92fb77f 1413 struct nfp_net_rx_hash *rx_hash = data;
19d0f54e
JK
1414
1415 if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
1416 return;
1417
e524a6a9 1418 nfp_net_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type),
19d0f54e
JK
1419 &rx_hash->hash);
1420}
1421
1422static void *
e524a6a9 1423nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
b92fb77f 1424 void *data, int meta_len)
19d0f54e 1425{
19d0f54e
JK
1426 u32 meta_info;
1427
1428 meta_info = get_unaligned_be32(data);
1429 data += 4;
1430
1431 while (meta_info) {
1432 switch (meta_info & NFP_NET_META_FIELD_MASK) {
1433 case NFP_NET_META_HASH:
1434 meta_info >>= NFP_NET_META_FIELD_SIZE;
e524a6a9 1435 nfp_net_set_hash(netdev, meta,
19d0f54e
JK
1436 meta_info & NFP_NET_META_FIELD_MASK,
1437 (__be32 *)data);
1438 data += 4;
1439 break;
1440 case NFP_NET_META_MARK:
e524a6a9 1441 meta->mark = get_unaligned_be32(data);
19d0f54e
JK
1442 data += 4;
1443 break;
ddb98d94
JK
1444 case NFP_NET_META_CSUM:
1445 meta->csum_type = CHECKSUM_COMPLETE;
1446 meta->csum =
1447 (__force __wsum)__get_unaligned_cpu32(data);
1448 data += 4;
1449 break;
19d0f54e
JK
1450 default:
1451 return NULL;
1452 }
1453
1454 meta_info >>= NFP_NET_META_FIELD_SIZE;
1455 }
1456
1457 return data;
1458}
1459
e9949aeb 1460static void
6fe0c3b4
JK
1461nfp_net_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec,
1462 struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf,
1463 struct sk_buff *skb)
e9949aeb
JK
1464{
1465 u64_stats_update_begin(&r_vec->rx_sync);
1466 r_vec->rx_drops++;
1467 u64_stats_update_end(&r_vec->rx_sync);
1468
c0f031bc
JK
1469 /* skb is build based on the frag, free_skb() would free the frag
1470 * so to be able to reuse it we need an extra ref.
1471 */
1472 if (skb && rxbuf && skb->head == rxbuf->frag)
1473 page_ref_inc(virt_to_head_page(rxbuf->frag));
e9949aeb 1474 if (rxbuf)
6fe0c3b4 1475 nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr);
e9949aeb
JK
1476 if (skb)
1477 dev_kfree_skb_any(skb);
1478}
1479
a67edbf4 1480static bool
79c12a75 1481nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
ecd63a02 1482 struct nfp_net_tx_ring *tx_ring,
1abae319 1483 struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
abeeec4a 1484 unsigned int pkt_len, bool *completed)
ecd63a02
JK
1485{
1486 struct nfp_net_tx_buf *txbuf;
1487 struct nfp_net_tx_desc *txd;
ecd63a02
JK
1488 int wr_idx;
1489
1490 if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
abeeec4a
JK
1491 if (!*completed) {
1492 nfp_net_xdp_complete(tx_ring);
1493 *completed = true;
1494 }
1495
1496 if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
1497 nfp_net_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf,
1498 NULL);
1499 return false;
1500 }
ecd63a02
JK
1501 }
1502
4aa3b766 1503 wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
ecd63a02
JK
1504
1505 /* Stash the soft descriptor of the head then initialize it */
1506 txbuf = &tx_ring->txbufs[wr_idx];
92e68195
JK
1507
1508 nfp_net_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr);
1509
ecd63a02
JK
1510 txbuf->frag = rxbuf->frag;
1511 txbuf->dma_addr = rxbuf->dma_addr;
1512 txbuf->fidx = -1;
1513 txbuf->pkt_cnt = 1;
1514 txbuf->real_len = pkt_len;
1515
1abae319 1516 dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off,
d58cebb7 1517 pkt_len, DMA_BIDIRECTIONAL);
ecd63a02
JK
1518
1519 /* Build TX descriptor */
1520 txd = &tx_ring->txds[wr_idx];
1521 txd->offset_eop = PCIE_DESC_TX_EOP;
1522 txd->dma_len = cpu_to_le16(pkt_len);
1abae319 1523 nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + dma_off);
ecd63a02
JK
1524 txd->data_len = cpu_to_le16(pkt_len);
1525
1526 txd->flags = 0;
1527 txd->mss = 0;
e53fc9fa 1528 txd->lso_hdrlen = 0;
ecd63a02
JK
1529
1530 tx_ring->wr_p++;
1531 tx_ring->wr_ptr_add++;
a67edbf4 1532 return true;
ecd63a02
JK
1533}
1534
6fe0c3b4
JK
1535static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
1536 unsigned int *off, unsigned int *len)
ecd63a02
JK
1537{
1538 struct xdp_buff xdp;
6fe0c3b4
JK
1539 void *orig_data;
1540 int ret;
1541
1542 xdp.data_hard_start = hard_start;
1543 xdp.data = data + *off;
1544 xdp.data_end = data + *off + *len;
ecd63a02 1545
6fe0c3b4
JK
1546 orig_data = xdp.data;
1547 ret = bpf_prog_run_xdp(prog, &xdp);
ecd63a02 1548
6fe0c3b4
JK
1549 *len -= xdp.data - orig_data;
1550 *off += xdp.data - orig_data;
1551
1552 return ret;
ecd63a02
JK
1553}
1554
4c352362
JK
1555/**
1556 * nfp_net_rx() - receive up to @budget packets on @rx_ring
1557 * @rx_ring: RX ring to receive from
1558 * @budget: NAPI budget
1559 *
1560 * Note, this function is separated out from the napi poll function to
1561 * more cleanly separate packet receive code from other bookkeeping
1562 * functions performed in the napi poll function.
1563 *
4c352362
JK
1564 * Return: Number of packets received.
1565 */
1566static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
1567{
1568 struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
79c12a75 1569 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
ecd63a02
JK
1570 struct nfp_net_tx_ring *tx_ring;
1571 struct bpf_prog *xdp_prog;
abeeec4a 1572 bool xdp_tx_cmpl = false;
ecd63a02 1573 unsigned int true_bufsz;
c0f031bc 1574 struct sk_buff *skb;
416db5c1 1575 int pkts_polled = 0;
4c352362
JK
1576 int idx;
1577
ecd63a02 1578 rcu_read_lock();
79c12a75 1579 xdp_prog = READ_ONCE(dp->xdp_prog);
79c12a75 1580 true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
ecd63a02
JK
1581 tx_ring = r_vec->xdp_ring;
1582
416db5c1 1583 while (pkts_polled < budget) {
5cd4fbea 1584 unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off;
13df4c66
JK
1585 struct nfp_net_rx_buf *rxbuf;
1586 struct nfp_net_rx_desc *rxd;
e524a6a9 1587 struct nfp_meta_parsed meta;
13df4c66
JK
1588 dma_addr_t new_dma_addr;
1589 void *new_frag;
1590
4aa3b766 1591 idx = D_IDX(rx_ring, rx_ring->rd_p);
4c352362
JK
1592
1593 rxd = &rx_ring->rxds[idx];
416db5c1 1594 if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
4c352362 1595 break;
416db5c1 1596
4c352362
JK
1597 /* Memory barrier to ensure that we won't do other reads
1598 * before the DD bit.
1599 */
1600 dma_rmb();
1601
e524a6a9
JK
1602 memset(&meta, 0, sizeof(meta));
1603
4c352362
JK
1604 rx_ring->rd_p++;
1605 pkts_polled++;
4c352362 1606
c0f031bc 1607 rxbuf = &rx_ring->rxbufs[idx];
180012dc
JK
1608 /* < meta_len >
1609 * <-- [rx_offset] -->
1610 * ---------------------------------------------------------
1611 * | [XX] | metadata | packet | XXXX |
1612 * ---------------------------------------------------------
1613 * <---------------- data_len --------------->
1614 *
1615 * The rx_offset is fixed for all packets, the meta_len can vary
1616 * on a packet by packet basis. If rx_offset is set to zero
1617 * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
1618 * buffer and is immediately followed by the packet (no [XX]).
1619 */
4c352362
JK
1620 meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
1621 data_len = le16_to_cpu(rxd->rxd.data_len);
13df4c66 1622 pkt_len = data_len - meta_len;
4c352362 1623
5cd4fbea 1624 pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off;
79c12a75 1625 if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
5cd4fbea 1626 pkt_off += meta_len;
180012dc 1627 else
5cd4fbea
JK
1628 pkt_off += dp->rx_offset;
1629 meta_off = pkt_off - meta_len;
4c352362 1630
4c352362
JK
1631 /* Stats update */
1632 u64_stats_update_begin(&r_vec->rx_sync);
1633 r_vec->rx_pkts++;
13df4c66 1634 r_vec->rx_bytes += pkt_len;
4c352362
JK
1635 u64_stats_update_end(&r_vec->rx_sync);
1636
b92fb77f
JK
1637 if (unlikely(meta_len > NFP_NET_MAX_PREPEND ||
1638 (dp->rx_offset && meta_len > dp->rx_offset))) {
1639 nn_dp_warn(dp, "oversized RX packet metadata %u\n",
1640 meta_len);
6fe0c3b4 1641 nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
b92fb77f
JK
1642 continue;
1643 }
1644
5cd4fbea
JK
1645 nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off,
1646 data_len);
1647
e524a6a9
JK
1648 if (!dp->chained_metadata_format) {
1649 nfp_net_set_hash_desc(dp->netdev, &meta,
1650 rxbuf->frag + meta_off, rxd);
1651 } else if (meta_len) {
1652 void *end;
1653
1654 end = nfp_net_parse_meta(dp->netdev, &meta,
1655 rxbuf->frag + meta_off,
1656 meta_len);
1657 if (unlikely(end != rxbuf->frag + pkt_off)) {
1658 nn_dp_warn(dp, "invalid RX packet metadata\n");
1659 nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
1660 NULL);
1661 continue;
1662 }
1663 }
1664
6d677075 1665 if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
79c12a75 1666 dp->bpf_offload_xdp)) {
1abae319 1667 unsigned int dma_off;
6fe0c3b4 1668 void *hard_start;
ecd63a02
JK
1669 int act;
1670
6fe0c3b4 1671 hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
6fe0c3b4 1672
6fe0c3b4 1673 act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
5cd4fbea 1674 &pkt_off, &pkt_len);
ecd63a02
JK
1675 switch (act) {
1676 case XDP_PASS:
1677 break;
1678 case XDP_TX:
5cd4fbea 1679 dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
79c12a75 1680 if (unlikely(!nfp_net_tx_xdp_buf(dp, rx_ring,
a67edbf4 1681 tx_ring, rxbuf,
1abae319 1682 dma_off,
abeeec4a
JK
1683 pkt_len,
1684 &xdp_tx_cmpl)))
79c12a75
JK
1685 trace_xdp_exception(dp->netdev,
1686 xdp_prog, act);
ecd63a02
JK
1687 continue;
1688 default:
1689 bpf_warn_invalid_xdp_action(act);
1690 case XDP_ABORTED:
79c12a75 1691 trace_xdp_exception(dp->netdev, xdp_prog, act);
ecd63a02 1692 case XDP_DROP:
6fe0c3b4 1693 nfp_net_rx_give_one(dp, rx_ring, rxbuf->frag,
ecd63a02
JK
1694 rxbuf->dma_addr);
1695 continue;
1696 }
1697 }
1698
1699 skb = build_skb(rxbuf->frag, true_bufsz);
13df4c66 1700 if (unlikely(!skb)) {
6fe0c3b4 1701 nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL);
13df4c66
JK
1702 continue;
1703 }
c487e6b1 1704 new_frag = nfp_net_napi_alloc_one(dp, &new_dma_addr);
13df4c66 1705 if (unlikely(!new_frag)) {
6fe0c3b4 1706 nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf, skb);
13df4c66
JK
1707 continue;
1708 }
1709
c487e6b1 1710 nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr);
13df4c66 1711
6fe0c3b4 1712 nfp_net_rx_give_one(dp, rx_ring, new_frag, new_dma_addr);
13df4c66 1713
5cd4fbea 1714 skb_reserve(skb, pkt_off);
13df4c66
JK
1715 skb_put(skb, pkt_len);
1716
e524a6a9
JK
1717 skb->mark = meta.mark;
1718 skb_set_hash(skb, meta.hash, meta.hash_type);
19d0f54e 1719
4c352362 1720 skb_record_rx_queue(skb, rx_ring->idx);
79c12a75 1721 skb->protocol = eth_type_trans(skb, dp->netdev);
4c352362 1722
ddb98d94 1723 nfp_net_rx_csum(dp, r_vec, rxd, &meta, skb);
4c352362
JK
1724
1725 if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
1726 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
1727 le16_to_cpu(rxd->rxd.vlan));
1728
1729 napi_gro_receive(&rx_ring->r_vec->napi, skb);
1730 }
1731
abeeec4a
JK
1732 if (xdp_prog) {
1733 if (tx_ring->wr_ptr_add)
1734 nfp_net_tx_xmit_more_flush(tx_ring);
1735 else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) &&
1736 !xdp_tx_cmpl)
1737 if (!nfp_net_xdp_complete(tx_ring))
1738 pkts_polled = budget;
1739 }
ecd63a02
JK
1740 rcu_read_unlock();
1741
4c352362
JK
1742 return pkts_polled;
1743}
1744
1745/**
1746 * nfp_net_poll() - napi poll function
1747 * @napi: NAPI structure
1748 * @budget: NAPI budget
1749 *
1750 * Return: number of packets polled.
1751 */
1752static int nfp_net_poll(struct napi_struct *napi, int budget)
1753{
1754 struct nfp_net_r_vector *r_vec =
1755 container_of(napi, struct nfp_net_r_vector, napi);
cbeaf7aa 1756 unsigned int pkts_polled = 0;
4c352362 1757
cbeaf7aa
JK
1758 if (r_vec->tx_ring)
1759 nfp_net_tx_complete(r_vec->tx_ring);
abeeec4a 1760 if (r_vec->rx_ring)
cbeaf7aa 1761 pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
4c352362 1762
7de5f115
JK
1763 if (pkts_polled < budget)
1764 if (napi_complete_done(napi, pkts_polled))
1765 nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry);
4c352362
JK
1766
1767 return pkts_polled;
1768}
1769
1770/* Setup and Configuration
1771 */
1772
1773/**
1774 * nfp_net_tx_ring_free() - Free resources allocated to a TX ring
1775 * @tx_ring: TX ring to free
1776 */
1777static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
1778{
1779 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
79c12a75 1780 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
4c352362 1781
4c352362
JK
1782 kfree(tx_ring->txbufs);
1783
1784 if (tx_ring->txds)
79c12a75 1785 dma_free_coherent(dp->dev, tx_ring->size,
4c352362
JK
1786 tx_ring->txds, tx_ring->dma);
1787
1788 tx_ring->cnt = 0;
4c352362
JK
1789 tx_ring->txbufs = NULL;
1790 tx_ring->txds = NULL;
1791 tx_ring->dma = 0;
1792 tx_ring->size = 0;
1793}
1794
1795/**
1796 * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
892a7f70 1797 * @dp: NFP Net data path struct
4c352362
JK
1798 * @tx_ring: TX Ring structure to allocate
1799 *
1800 * Return: 0 on success, negative errno otherwise.
1801 */
ecd63a02 1802static int
92e68195 1803nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
4c352362
JK
1804{
1805 struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
4c352362
JK
1806 int sz;
1807
892a7f70 1808 tx_ring->cnt = dp->txd_cnt;
4c352362
JK
1809
1810 tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt;
79c12a75 1811 tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size,
4c352362
JK
1812 &tx_ring->dma, GFP_KERNEL);
1813 if (!tx_ring->txds)
1814 goto err_alloc;
1815
1816 sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt;
1817 tx_ring->txbufs = kzalloc(sz, GFP_KERNEL);
1818 if (!tx_ring->txbufs)
1819 goto err_alloc;
1820
92e68195 1821 if (!tx_ring->is_xdp)
79c12a75 1822 netif_set_xps_queue(dp->netdev, &r_vec->affinity_mask,
ecd63a02 1823 tx_ring->idx);
4c352362 1824
4c352362
JK
1825 return 0;
1826
1827err_alloc:
1828 nfp_net_tx_ring_free(tx_ring);
1829 return -ENOMEM;
1830}
1831
92e68195
JK
1832static void
1833nfp_net_tx_ring_bufs_free(struct nfp_net_dp *dp,
1834 struct nfp_net_tx_ring *tx_ring)
1835{
1836 unsigned int i;
1837
1838 if (!tx_ring->is_xdp)
1839 return;
1840
1841 for (i = 0; i < tx_ring->cnt; i++) {
1842 if (!tx_ring->txbufs[i].frag)
1843 return;
1844
1845 nfp_net_dma_unmap_rx(dp, tx_ring->txbufs[i].dma_addr);
1846 __free_page(virt_to_page(tx_ring->txbufs[i].frag));
1847 }
1848}
1849
1850static int
1851nfp_net_tx_ring_bufs_alloc(struct nfp_net_dp *dp,
1852 struct nfp_net_tx_ring *tx_ring)
1853{
1854 struct nfp_net_tx_buf *txbufs = tx_ring->txbufs;
1855 unsigned int i;
1856
1857 if (!tx_ring->is_xdp)
1858 return 0;
1859
1860 for (i = 0; i < tx_ring->cnt; i++) {
1861 txbufs[i].frag = nfp_net_rx_alloc_one(dp, &txbufs[i].dma_addr);
1862 if (!txbufs[i].frag) {
1863 nfp_net_tx_ring_bufs_free(dp, tx_ring);
1864 return -ENOMEM;
1865 }
1866 }
1867
1868 return 0;
1869}
1870
892a7f70 1871static int nfp_net_tx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
cc7c0333 1872{
cc7c0333
JK
1873 unsigned int r;
1874
892a7f70
JK
1875 dp->tx_rings = kcalloc(dp->num_tx_rings, sizeof(*dp->tx_rings),
1876 GFP_KERNEL);
1877 if (!dp->tx_rings)
1878 return -ENOMEM;
cc7c0333 1879
892a7f70 1880 for (r = 0; r < dp->num_tx_rings; r++) {
ecd63a02
JK
1881 int bias = 0;
1882
512e94dc
JK
1883 if (r >= dp->num_stack_tx_rings)
1884 bias = dp->num_stack_tx_rings;
cc7c0333 1885
892a7f70 1886 nfp_net_tx_ring_init(&dp->tx_rings[r], &nn->r_vecs[r - bias],
92e68195 1887 r, bias);
ecd63a02 1888
92e68195 1889 if (nfp_net_tx_ring_alloc(dp, &dp->tx_rings[r]))
cc7c0333 1890 goto err_free_prev;
92e68195
JK
1891
1892 if (nfp_net_tx_ring_bufs_alloc(dp, &dp->tx_rings[r]))
1893 goto err_free_ring;
cc7c0333
JK
1894 }
1895
892a7f70 1896 return 0;
cc7c0333
JK
1897
1898err_free_prev:
92e68195
JK
1899 while (r--) {
1900 nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
1901err_free_ring:
892a7f70 1902 nfp_net_tx_ring_free(&dp->tx_rings[r]);
92e68195 1903 }
892a7f70
JK
1904 kfree(dp->tx_rings);
1905 return -ENOMEM;
cc7c0333
JK
1906}
1907
892a7f70 1908static void nfp_net_tx_rings_free(struct nfp_net_dp *dp)
cc7c0333
JK
1909{
1910 unsigned int r;
1911
92e68195
JK
1912 for (r = 0; r < dp->num_tx_rings; r++) {
1913 nfp_net_tx_ring_bufs_free(dp, &dp->tx_rings[r]);
892a7f70 1914 nfp_net_tx_ring_free(&dp->tx_rings[r]);
92e68195 1915 }
cc7c0333 1916
892a7f70 1917 kfree(dp->tx_rings);
cc7c0333
JK
1918}
1919
4c352362
JK
1920/**
1921 * nfp_net_rx_ring_free() - Free resources allocated to a RX ring
1922 * @rx_ring: RX ring to free
1923 */
1924static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring)
1925{
1926 struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
79c12a75 1927 struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
4c352362 1928
4c352362
JK
1929 kfree(rx_ring->rxbufs);
1930
1931 if (rx_ring->rxds)
79c12a75 1932 dma_free_coherent(dp->dev, rx_ring->size,
4c352362
JK
1933 rx_ring->rxds, rx_ring->dma);
1934
1935 rx_ring->cnt = 0;
4c352362
JK
1936 rx_ring->rxbufs = NULL;
1937 rx_ring->rxds = NULL;
1938 rx_ring->dma = 0;
1939 rx_ring->size = 0;
1940}
1941
1942/**
1943 * nfp_net_rx_ring_alloc() - Allocate resource for a RX ring
2195c263 1944 * @dp: NFP Net data path struct
4c352362
JK
1945 * @rx_ring: RX ring to allocate
1946 *
1947 * Return: 0 on success, negative errno otherwise.
1948 */
30d21171 1949static int
892a7f70 1950nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring)
4c352362 1951{
4c352362
JK
1952 int sz;
1953
892a7f70 1954 rx_ring->cnt = dp->rxd_cnt;
4c352362 1955 rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt;
79c12a75 1956 rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size,
4c352362
JK
1957 &rx_ring->dma, GFP_KERNEL);
1958 if (!rx_ring->rxds)
1959 goto err_alloc;
1960
1961 sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt;
1962 rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL);
1963 if (!rx_ring->rxbufs)
1964 goto err_alloc;
1965
4c352362
JK
1966 return 0;
1967
1968err_alloc:
1969 nfp_net_rx_ring_free(rx_ring);
1970 return -ENOMEM;
1971}
1972
892a7f70 1973static int nfp_net_rx_rings_prepare(struct nfp_net *nn, struct nfp_net_dp *dp)
36a857e4 1974{
36a857e4
JK
1975 unsigned int r;
1976
892a7f70
JK
1977 dp->rx_rings = kcalloc(dp->num_rx_rings, sizeof(*dp->rx_rings),
1978 GFP_KERNEL);
1979 if (!dp->rx_rings)
1980 return -ENOMEM;
36a857e4 1981
892a7f70
JK
1982 for (r = 0; r < dp->num_rx_rings; r++) {
1983 nfp_net_rx_ring_init(&dp->rx_rings[r], &nn->r_vecs[r], r);
36a857e4 1984
892a7f70 1985 if (nfp_net_rx_ring_alloc(dp, &dp->rx_rings[r]))
36a857e4
JK
1986 goto err_free_prev;
1987
892a7f70 1988 if (nfp_net_rx_ring_bufs_alloc(dp, &dp->rx_rings[r]))
36a857e4
JK
1989 goto err_free_ring;
1990 }
1991
892a7f70 1992 return 0;
36a857e4
JK
1993
1994err_free_prev:
1995 while (r--) {
892a7f70 1996 nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
36a857e4 1997err_free_ring:
892a7f70 1998 nfp_net_rx_ring_free(&dp->rx_rings[r]);
36a857e4 1999 }
892a7f70
JK
2000 kfree(dp->rx_rings);
2001 return -ENOMEM;
36a857e4
JK
2002}
2003
892a7f70 2004static void nfp_net_rx_rings_free(struct nfp_net_dp *dp)
36a857e4
JK
2005{
2006 unsigned int r;
2007
892a7f70
JK
2008 for (r = 0; r < dp->num_rx_rings; r++) {
2009 nfp_net_rx_ring_bufs_free(dp, &dp->rx_rings[r]);
2010 nfp_net_rx_ring_free(&dp->rx_rings[r]);
36a857e4
JK
2011 }
2012
892a7f70 2013 kfree(dp->rx_rings);
36a857e4
JK
2014}
2015
e31230f9 2016static void
79c12a75
JK
2017nfp_net_vector_assign_rings(struct nfp_net_dp *dp,
2018 struct nfp_net_r_vector *r_vec, int idx)
e31230f9 2019{
79c12a75 2020 r_vec->rx_ring = idx < dp->num_rx_rings ? &dp->rx_rings[idx] : NULL;
ecd63a02 2021 r_vec->tx_ring =
79c12a75 2022 idx < dp->num_stack_tx_rings ? &dp->tx_rings[idx] : NULL;
ecd63a02 2023
79c12a75
JK
2024 r_vec->xdp_ring = idx < dp->num_tx_rings - dp->num_stack_tx_rings ?
2025 &dp->tx_rings[dp->num_stack_tx_rings + idx] : NULL;
e31230f9
JK
2026}
2027
0afbfb18
JK
2028static int
2029nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
2030 int idx)
4c352362 2031{
0afbfb18 2032 int err;
4c352362 2033
164d1e9e 2034 /* Setup NAPI */
79c12a75 2035 netif_napi_add(nn->dp.netdev, &r_vec->napi,
164d1e9e
JK
2036 nfp_net_poll, NAPI_POLL_WEIGHT);
2037
0afbfb18 2038 snprintf(r_vec->name, sizeof(r_vec->name),
79c12a75 2039 "%s-rxtx-%d", nn->dp.netdev->name, idx);
fdace6c2
JK
2040 err = request_irq(r_vec->irq_vector, r_vec->handler, 0, r_vec->name,
2041 r_vec);
0afbfb18 2042 if (err) {
164d1e9e 2043 netif_napi_del(&r_vec->napi);
fdace6c2 2044 nn_err(nn, "Error requesting IRQ %d\n", r_vec->irq_vector);
0afbfb18
JK
2045 return err;
2046 }
fdace6c2 2047 disable_irq(r_vec->irq_vector);
4c352362 2048
fdace6c2 2049 irq_set_affinity_hint(r_vec->irq_vector, &r_vec->affinity_mask);
4c352362 2050
fdace6c2
JK
2051 nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, r_vec->irq_vector,
2052 r_vec->irq_entry);
4c352362 2053
0afbfb18 2054 return 0;
4c352362
JK
2055}
2056
0afbfb18
JK
2057static void
2058nfp_net_cleanup_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec)
4c352362 2059{
fdace6c2 2060 irq_set_affinity_hint(r_vec->irq_vector, NULL);
4c352362 2061 netif_napi_del(&r_vec->napi);
fdace6c2 2062 free_irq(r_vec->irq_vector, r_vec);
4c352362
JK
2063}
2064
2065/**
2066 * nfp_net_rss_write_itbl() - Write RSS indirection table to device
2067 * @nn: NFP Net device to reconfigure
2068 */
2069void nfp_net_rss_write_itbl(struct nfp_net *nn)
2070{
2071 int i;
2072
2073 for (i = 0; i < NFP_NET_CFG_RSS_ITBL_SZ; i += 4)
2074 nn_writel(nn, NFP_NET_CFG_RSS_ITBL + i,
2075 get_unaligned_le32(nn->rss_itbl + i));
2076}
2077
2078/**
2079 * nfp_net_rss_write_key() - Write RSS hash key to device
2080 * @nn: NFP Net device to reconfigure
2081 */
2082void nfp_net_rss_write_key(struct nfp_net *nn)
2083{
2084 int i;
2085
9ff304bf 2086 for (i = 0; i < nfp_net_rss_key_sz(nn); i += 4)
4c352362
JK
2087 nn_writel(nn, NFP_NET_CFG_RSS_KEY + i,
2088 get_unaligned_le32(nn->rss_key + i));
2089}
2090
2091/**
2092 * nfp_net_coalesce_write_cfg() - Write irq coalescence configuration to HW
2093 * @nn: NFP Net device to reconfigure
2094 */
2095void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
2096{
2097 u8 i;
2098 u32 factor;
2099 u32 value;
2100
2101 /* Compute factor used to convert coalesce '_usecs' parameters to
2102 * ME timestamp ticks. There are 16 ME clock cycles for each timestamp
2103 * count.
2104 */
2105 factor = nn->me_freq_mhz / 16;
2106
2107 /* copy RX interrupt coalesce parameters */
2108 value = (nn->rx_coalesce_max_frames << 16) |
2109 (factor * nn->rx_coalesce_usecs);
79c12a75 2110 for (i = 0; i < nn->dp.num_rx_rings; i++)
4c352362
JK
2111 nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
2112
2113 /* copy TX interrupt coalesce parameters */
2114 value = (nn->tx_coalesce_max_frames << 16) |
2115 (factor * nn->tx_coalesce_usecs);
79c12a75 2116 for (i = 0; i < nn->dp.num_tx_rings; i++)
4c352362
JK
2117 nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
2118}
2119
2120/**
f642963b 2121 * nfp_net_write_mac_addr() - Write mac address to the device control BAR
4c352362 2122 * @nn: NFP Net device to reconfigure
9d372759 2123 * @addr: MAC address to write
4c352362 2124 *
f642963b
JK
2125 * Writes the MAC address from the netdev to the device control BAR. Does not
2126 * perform the required reconfig. We do a bit of byte swapping dance because
2127 * firmware is LE.
4c352362 2128 */
9d372759 2129static void nfp_net_write_mac_addr(struct nfp_net *nn, const u8 *addr)
4c352362 2130{
9d372759
PC
2131 nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(addr));
2132 nn_writew(nn, NFP_NET_CFG_MACADDR + 6, get_unaligned_be16(addr + 4));
4c352362
JK
2133}
2134
ca40feab
JK
2135static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
2136{
2137 nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), 0);
2138 nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), 0);
2139 nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), 0);
2140
2141 nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), 0);
2142 nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), 0);
2143 nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), 0);
2144}
2145
4c352362
JK
2146/**
2147 * nfp_net_clear_config_and_disable() - Clear control BAR and disable NFP
2148 * @nn: NFP Net device to reconfigure
2149 */
2150static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
2151{
2152 u32 new_ctrl, update;
ca40feab 2153 unsigned int r;
4c352362
JK
2154 int err;
2155
79c12a75 2156 new_ctrl = nn->dp.ctrl;
4c352362
JK
2157 new_ctrl &= ~NFP_NET_CFG_CTRL_ENABLE;
2158 update = NFP_NET_CFG_UPDATE_GEN;
2159 update |= NFP_NET_CFG_UPDATE_MSIX;
2160 update |= NFP_NET_CFG_UPDATE_RING;
2161
2162 if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2163 new_ctrl &= ~NFP_NET_CFG_CTRL_RINGCFG;
2164
2165 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
2166 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
2167
2168 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2169 err = nfp_net_reconfig(nn, update);
aba52df8 2170 if (err)
4c352362 2171 nn_err(nn, "Could not disable device: %d\n", err);
4c352362 2172
79c12a75
JK
2173 for (r = 0; r < nn->dp.num_rx_rings; r++)
2174 nfp_net_rx_ring_reset(&nn->dp.rx_rings[r]);
2175 for (r = 0; r < nn->dp.num_tx_rings; r++)
2176 nfp_net_tx_ring_reset(&nn->dp, &nn->dp.tx_rings[r]);
2177 for (r = 0; r < nn->dp.num_r_vecs; r++)
ca40feab
JK
2178 nfp_net_vec_clear_ring_data(nn, r);
2179
79c12a75 2180 nn->dp.ctrl = new_ctrl;
4c352362
JK
2181}
2182
ca40feab 2183static void
cbeaf7aa
JK
2184nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
2185 struct nfp_net_rx_ring *rx_ring, unsigned int idx)
ca40feab
JK
2186{
2187 /* Write the DMA address, size and MSI-X info to the device */
cbeaf7aa
JK
2188 nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
2189 nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
fdace6c2 2190 nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_entry);
cbeaf7aa 2191}
ca40feab 2192
cbeaf7aa
JK
2193static void
2194nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
2195 struct nfp_net_tx_ring *tx_ring, unsigned int idx)
2196{
2197 nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
2198 nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
fdace6c2 2199 nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_entry);
ca40feab
JK
2200}
2201
ac0488ef
JK
2202/**
2203 * nfp_net_set_config_and_enable() - Write control BAR and enable NFP
2204 * @nn: NFP Net device to reconfigure
2205 */
2206static int nfp_net_set_config_and_enable(struct nfp_net *nn)
1cd0cfc4 2207{
ee200a73 2208 u32 bufsz, new_ctrl, update = 0;
1cd0cfc4
JK
2209 unsigned int r;
2210 int err;
2211
79c12a75 2212 new_ctrl = nn->dp.ctrl;
1cd0cfc4 2213
611bdd49 2214 if (nn->dp.ctrl & NFP_NET_CFG_CTRL_RSS_ANY) {
1cd0cfc4
JK
2215 nfp_net_rss_write_key(nn);
2216 nfp_net_rss_write_itbl(nn);
2217 nn_writel(nn, NFP_NET_CFG_RSS_CTRL, nn->rss_cfg);
2218 update |= NFP_NET_CFG_UPDATE_RSS;
2219 }
2220
ad50451e 2221 if (nn->dp.ctrl & NFP_NET_CFG_CTRL_IRQMOD) {
1cd0cfc4 2222 nfp_net_coalesce_write_cfg(nn);
1cd0cfc4
JK
2223 update |= NFP_NET_CFG_UPDATE_IRQMOD;
2224 }
2225
79c12a75
JK
2226 for (r = 0; r < nn->dp.num_tx_rings; r++)
2227 nfp_net_tx_ring_hw_cfg_write(nn, &nn->dp.tx_rings[r], r);
2228 for (r = 0; r < nn->dp.num_rx_rings; r++)
2229 nfp_net_rx_ring_hw_cfg_write(nn, &nn->dp.rx_rings[r], r);
1cd0cfc4 2230
79c12a75
JK
2231 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->dp.num_tx_rings == 64 ?
2232 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_tx_rings) - 1);
1cd0cfc4 2233
79c12a75
JK
2234 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, nn->dp.num_rx_rings == 64 ?
2235 0xffffffffffffffffULL : ((u64)1 << nn->dp.num_rx_rings) - 1);
1cd0cfc4 2236
9d372759 2237 nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
1cd0cfc4 2238
79c12a75 2239 nn_writel(nn, NFP_NET_CFG_MTU, nn->dp.netdev->mtu);
ee200a73
JK
2240
2241 bufsz = nn->dp.fl_bufsz - nn->dp.rx_dma_off - NFP_NET_RX_BUF_NON_DATA;
2242 nn_writel(nn, NFP_NET_CFG_FLBUFSZ, bufsz);
1cd0cfc4
JK
2243
2244 /* Enable device */
2245 new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
2246 update |= NFP_NET_CFG_UPDATE_GEN;
2247 update |= NFP_NET_CFG_UPDATE_MSIX;
2248 update |= NFP_NET_CFG_UPDATE_RING;
2249 if (nn->cap & NFP_NET_CFG_CTRL_RINGCFG)
2250 new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG;
2251
2252 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2253 err = nfp_net_reconfig(nn, update);
ac0488ef
JK
2254 if (err) {
2255 nfp_net_clear_config_and_disable(nn);
2256 return err;
2257 }
1cd0cfc4 2258
79c12a75 2259 nn->dp.ctrl = new_ctrl;
1cd0cfc4 2260
79c12a75 2261 for (r = 0; r < nn->dp.num_rx_rings; r++)
6fe0c3b4 2262 nfp_net_rx_ring_fill_freelist(&nn->dp, &nn->dp.rx_rings[r]);
aba52df8 2263
1cd0cfc4
JK
2264 /* Since reconfiguration requests while NFP is down are ignored we
2265 * have to wipe the entire VXLAN configuration and reinitialize it.
2266 */
79c12a75 2267 if (nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN) {
1cd0cfc4
JK
2268 memset(&nn->vxlan_ports, 0, sizeof(nn->vxlan_ports));
2269 memset(&nn->vxlan_usecnt, 0, sizeof(nn->vxlan_usecnt));
79c12a75 2270 udp_tunnel_get_rx_info(nn->dp.netdev);
1cd0cfc4
JK
2271 }
2272
ac0488ef 2273 return 0;
1cd0cfc4
JK
2274}
2275
1cd0cfc4
JK
2276/**
2277 * nfp_net_open_stack() - Start the device from stack's perspective
2278 * @nn: NFP Net device to reconfigure
2279 */
2280static void nfp_net_open_stack(struct nfp_net *nn)
2281{
2282 unsigned int r;
2283
79c12a75 2284 for (r = 0; r < nn->dp.num_r_vecs; r++) {
aba52df8 2285 napi_enable(&nn->r_vecs[r].napi);
fdace6c2 2286 enable_irq(nn->r_vecs[r].irq_vector);
aba52df8 2287 }
1cd0cfc4 2288
79c12a75 2289 netif_tx_wake_all_queues(nn->dp.netdev);
1cd0cfc4 2290
ce449ba7 2291 enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
1cd0cfc4
JK
2292 nfp_net_read_link_status(nn);
2293}
2294
4c352362
JK
2295static int nfp_net_netdev_open(struct net_device *netdev)
2296{
2297 struct nfp_net *nn = netdev_priv(netdev);
2298 int err, r;
4c352362 2299
4c352362
JK
2300 /* Step 1: Allocate resources for rings and the like
2301 * - Request interrupts
2302 * - Allocate RX and TX ring resources
2303 * - Setup initial RSS table
2304 */
2305 err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_EXN, "%s-exn",
2306 nn->exn_name, sizeof(nn->exn_name),
2307 NFP_NET_IRQ_EXN_IDX, nn->exn_handler);
2308 if (err)
2309 return err;
0ba40af9
JK
2310 err = nfp_net_aux_irq_request(nn, NFP_NET_CFG_LSC, "%s-lsc",
2311 nn->lsc_name, sizeof(nn->lsc_name),
2312 NFP_NET_IRQ_LSC_IDX, nn->lsc_handler);
2313 if (err)
2314 goto err_free_exn;
ce449ba7 2315 disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
4c352362 2316
79c12a75 2317 for (r = 0; r < nn->dp.num_r_vecs; r++) {
0afbfb18
JK
2318 err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
2319 if (err)
cbeaf7aa
JK
2320 goto err_cleanup_vec_p;
2321 }
a10b563d 2322
892a7f70
JK
2323 err = nfp_net_rx_rings_prepare(nn, &nn->dp);
2324 if (err)
a10b563d 2325 goto err_cleanup_vec;
114bdef0 2326
892a7f70
JK
2327 err = nfp_net_tx_rings_prepare(nn, &nn->dp);
2328 if (err)
a10b563d 2329 goto err_free_rx_rings;
4c352362 2330
e31230f9 2331 for (r = 0; r < nn->max_r_vecs; r++)
79c12a75 2332 nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
e31230f9 2333
79c12a75 2334 err = netif_set_real_num_tx_queues(netdev, nn->dp.num_stack_tx_rings);
4c352362
JK
2335 if (err)
2336 goto err_free_rings;
2337
79c12a75 2338 err = netif_set_real_num_rx_queues(netdev, nn->dp.num_rx_rings);
4c352362
JK
2339 if (err)
2340 goto err_free_rings;
2341
4c352362
JK
2342 /* Step 2: Configure the NFP
2343 * - Enable rings from 0 to tx_rings/rx_rings - 1.
2344 * - Write MAC address (in case it changed)
2345 * - Set the MTU
2346 * - Set the Freelist buffer size
2347 * - Enable the FW
2348 */
1cd0cfc4 2349 err = nfp_net_set_config_and_enable(nn);
4c352362 2350 if (err)
1cd0cfc4 2351 goto err_free_rings;
4c352362
JK
2352
2353 /* Step 3: Enable for kernel
2354 * - put some freelist descriptors on each RX ring
2355 * - enable NAPI on each ring
2356 * - enable all TX queues
2357 * - set link state
2358 */
1cd0cfc4 2359 nfp_net_open_stack(nn);
4c352362
JK
2360
2361 return 0;
2362
4c352362 2363err_free_rings:
892a7f70 2364 nfp_net_tx_rings_free(&nn->dp);
a10b563d 2365err_free_rx_rings:
892a7f70 2366 nfp_net_rx_rings_free(&nn->dp);
a10b563d 2367err_cleanup_vec:
79c12a75 2368 r = nn->dp.num_r_vecs;
0afbfb18 2369err_cleanup_vec_p:
cbeaf7aa 2370 while (r--)
0afbfb18 2371 nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
0ba40af9 2372 nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
4c352362
JK
2373err_free_exn:
2374 nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
2375 return err;
2376}
2377
2378/**
1cd0cfc4
JK
2379 * nfp_net_close_stack() - Quiescent the stack (part of close)
2380 * @nn: NFP Net device to reconfigure
4c352362 2381 */
1cd0cfc4 2382static void nfp_net_close_stack(struct nfp_net *nn)
4c352362 2383{
1cd0cfc4 2384 unsigned int r;
4c352362 2385
ce449ba7 2386 disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
79c12a75 2387 netif_carrier_off(nn->dp.netdev);
4c352362
JK
2388 nn->link_up = false;
2389
79c12a75 2390 for (r = 0; r < nn->dp.num_r_vecs; r++) {
fdace6c2 2391 disable_irq(nn->r_vecs[r].irq_vector);
4c352362 2392 napi_disable(&nn->r_vecs[r].napi);
aba52df8 2393 }
4c352362 2394
79c12a75 2395 netif_tx_disable(nn->dp.netdev);
1cd0cfc4 2396}
4c352362 2397
1cd0cfc4
JK
2398/**
2399 * nfp_net_close_free_all() - Free all runtime resources
2400 * @nn: NFP Net device to reconfigure
2401 */
2402static void nfp_net_close_free_all(struct nfp_net *nn)
2403{
2404 unsigned int r;
4c352362 2405
79c12a75 2406 for (r = 0; r < nn->dp.num_rx_rings; r++) {
9dc6b116 2407 nfp_net_rx_ring_bufs_free(&nn->dp, &nn->dp.rx_rings[r]);
79c12a75 2408 nfp_net_rx_ring_free(&nn->dp.rx_rings[r]);
cbeaf7aa 2409 }
92e68195
JK
2410 for (r = 0; r < nn->dp.num_tx_rings; r++) {
2411 nfp_net_tx_ring_bufs_free(&nn->dp, &nn->dp.tx_rings[r]);
79c12a75 2412 nfp_net_tx_ring_free(&nn->dp.tx_rings[r]);
92e68195 2413 }
79c12a75 2414 for (r = 0; r < nn->dp.num_r_vecs; r++)
0afbfb18 2415 nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
4c352362 2416
79c12a75
JK
2417 kfree(nn->dp.rx_rings);
2418 kfree(nn->dp.tx_rings);
73725d9d 2419
0ba40af9 2420 nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
4c352362 2421 nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
1cd0cfc4
JK
2422}
2423
2424/**
2425 * nfp_net_netdev_close() - Called when the device is downed
2426 * @netdev: netdev structure
2427 */
2428static int nfp_net_netdev_close(struct net_device *netdev)
2429{
2430 struct nfp_net *nn = netdev_priv(netdev);
2431
1cd0cfc4
JK
2432 /* Step 1: Disable RX and TX rings from the Linux kernel perspective
2433 */
2434 nfp_net_close_stack(nn);
2435
2436 /* Step 2: Tell NFP
2437 */
2438 nfp_net_clear_config_and_disable(nn);
2439
2440 /* Step 3: Free resources
2441 */
2442 nfp_net_close_free_all(nn);
4c352362
JK
2443
2444 nn_dbg(nn, "%s down", netdev->name);
2445 return 0;
2446}
2447
2448static void nfp_net_set_rx_mode(struct net_device *netdev)
2449{
2450 struct nfp_net *nn = netdev_priv(netdev);
2451 u32 new_ctrl;
2452
79c12a75 2453 new_ctrl = nn->dp.ctrl;
4c352362
JK
2454
2455 if (netdev->flags & IFF_PROMISC) {
2456 if (nn->cap & NFP_NET_CFG_CTRL_PROMISC)
2457 new_ctrl |= NFP_NET_CFG_CTRL_PROMISC;
2458 else
2459 nn_warn(nn, "FW does not support promiscuous mode\n");
2460 } else {
2461 new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC;
2462 }
2463
79c12a75 2464 if (new_ctrl == nn->dp.ctrl)
4c352362
JK
2465 return;
2466
2467 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
3d780b92 2468 nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
4c352362 2469
79c12a75 2470 nn->dp.ctrl = new_ctrl;
4c352362
JK
2471}
2472
1e9e10d0
JK
2473static void nfp_net_rss_init_itbl(struct nfp_net *nn)
2474{
2475 int i;
2476
2477 for (i = 0; i < sizeof(nn->rss_itbl); i++)
2478 nn->rss_itbl[i] =
79c12a75 2479 ethtool_rxfh_indir_default(i, nn->dp.num_rx_rings);
1e9e10d0
JK
2480}
2481
512e94dc
JK
2482static void nfp_net_dp_swap(struct nfp_net *nn, struct nfp_net_dp *dp)
2483{
2484 struct nfp_net_dp new_dp = *dp;
2485
2486 *dp = nn->dp;
2487 nn->dp = new_dp;
76e1e1a8
JK
2488
2489 nn->dp.netdev->mtu = new_dp.mtu;
892a7f70
JK
2490
2491 if (!netif_is_rxfh_configured(nn->dp.netdev))
2492 nfp_net_rss_init_itbl(nn);
512e94dc
JK
2493}
2494
892a7f70 2495static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp)
4c352362 2496{
e31230f9 2497 unsigned int r;
164d1e9e 2498 int err;
e31230f9 2499
892a7f70 2500 nfp_net_dp_swap(nn, dp);
164d1e9e 2501
e31230f9 2502 for (r = 0; r < nn->max_r_vecs; r++)
79c12a75 2503 nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r);
e31230f9 2504
79c12a75 2505 err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings);
08348995
AB
2506 if (err)
2507 return err;
164d1e9e 2508
79c12a75
JK
2509 if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) {
2510 err = netif_set_real_num_tx_queues(nn->dp.netdev,
2511 nn->dp.num_stack_tx_rings);
164d1e9e
JK
2512 if (err)
2513 return err;
2514 }
2515
ac0488ef 2516 return nfp_net_set_config_and_enable(nn);
68453c7a 2517}
36a857e4 2518
783496b0
JK
2519struct nfp_net_dp *nfp_net_clone_dp(struct nfp_net *nn)
2520{
2521 struct nfp_net_dp *new;
2522
2523 new = kmalloc(sizeof(*new), GFP_KERNEL);
2524 if (!new)
2525 return NULL;
2526
2527 *new = nn->dp;
2528
2529 /* Clear things which need to be recomputed */
2530 new->fl_bufsz = 0;
2531 new->tx_rings = NULL;
2532 new->rx_rings = NULL;
2533 new->num_r_vecs = 0;
2534 new->num_stack_tx_rings = 0;
2535
2536 return new;
2537}
2538
d957c0f7
JK
2539static int
2540nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
2541 struct netlink_ext_ack *extack)
ecd63a02
JK
2542{
2543 /* XDP-enabled tests */
9dc6b116 2544 if (!dp->xdp_prog)
ecd63a02 2545 return 0;
2195c263 2546 if (dp->fl_bufsz > PAGE_SIZE) {
4d463c4d 2547 NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled");
ecd63a02
JK
2548 return -EINVAL;
2549 }
892a7f70 2550 if (dp->num_tx_rings > nn->max_tx_rings) {
4d463c4d 2551 NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled");
ecd63a02
JK
2552 return -EINVAL;
2553 }
2554
2555 return 0;
2556}
2557
d957c0f7
JK
2558int nfp_net_ring_reconfig(struct nfp_net *nn, struct nfp_net_dp *dp,
2559 struct netlink_ext_ack *extack)
cc7c0333 2560{
512e94dc 2561 int r, err;
cc7c0333 2562
76e1e1a8 2563 dp->fl_bufsz = nfp_net_calc_fl_bufsz(dp);
2195c263 2564
892a7f70 2565 dp->num_stack_tx_rings = dp->num_tx_rings;
9dc6b116 2566 if (dp->xdp_prog)
892a7f70 2567 dp->num_stack_tx_rings -= dp->num_rx_rings;
ecd63a02 2568
892a7f70 2569 dp->num_r_vecs = max(dp->num_rx_rings, dp->num_stack_tx_rings);
ecd63a02 2570
d957c0f7 2571 err = nfp_net_check_config(nn, dp, extack);
ecd63a02 2572 if (err)
783496b0 2573 goto exit_free_dp;
164d1e9e 2574
783496b0 2575 if (!netif_running(dp->netdev)) {
892a7f70 2576 nfp_net_dp_swap(nn, dp);
783496b0
JK
2577 err = 0;
2578 goto exit_free_dp;
cc7c0333
JK
2579 }
2580
cc7c0333 2581 /* Prepare new rings */
512e94dc 2582 for (r = nn->dp.num_r_vecs; r < dp->num_r_vecs; r++) {
164d1e9e
JK
2583 err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
2584 if (err) {
512e94dc 2585 dp->num_r_vecs = r;
164d1e9e
JK
2586 goto err_cleanup_vecs;
2587 }
2588 }
892a7f70
JK
2589
2590 err = nfp_net_rx_rings_prepare(nn, dp);
2591 if (err)
2592 goto err_cleanup_vecs;
2593
2594 err = nfp_net_tx_rings_prepare(nn, dp);
2595 if (err)
2596 goto err_free_rx;
cc7c0333
JK
2597
2598 /* Stop device, swap in new rings, try to start the firmware */
2599 nfp_net_close_stack(nn);
2600 nfp_net_clear_config_and_disable(nn);
2601
892a7f70 2602 err = nfp_net_dp_swap_enable(nn, dp);
cc7c0333 2603 if (err) {
68453c7a 2604 int err2;
cc7c0333 2605
68453c7a 2606 nfp_net_clear_config_and_disable(nn);
cc7c0333 2607
68453c7a 2608 /* Try with old configuration and old rings */
892a7f70 2609 err2 = nfp_net_dp_swap_enable(nn, dp);
68453c7a 2610 if (err2)
cc7c0333 2611 nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
68453c7a 2612 err, err2);
cc7c0333 2613 }
512e94dc 2614 for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
164d1e9e 2615 nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
cc7c0333 2616
892a7f70
JK
2617 nfp_net_rx_rings_free(dp);
2618 nfp_net_tx_rings_free(dp);
cc7c0333
JK
2619
2620 nfp_net_open_stack(nn);
783496b0
JK
2621exit_free_dp:
2622 kfree(dp);
cc7c0333
JK
2623
2624 return err;
68453c7a
JK
2625
2626err_free_rx:
892a7f70 2627 nfp_net_rx_rings_free(dp);
164d1e9e 2628err_cleanup_vecs:
512e94dc 2629 for (r = dp->num_r_vecs - 1; r >= nn->dp.num_r_vecs; r--)
164d1e9e 2630 nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
783496b0 2631 kfree(dp);
68453c7a
JK
2632 return err;
2633}
2634
2635static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
2636{
2637 struct nfp_net *nn = netdev_priv(netdev);
783496b0
JK
2638 struct nfp_net_dp *dp;
2639
2640 dp = nfp_net_clone_dp(nn);
2641 if (!dp)
2642 return -ENOMEM;
68453c7a 2643
76e1e1a8
JK
2644 dp->mtu = new_mtu;
2645
d957c0f7 2646 return nfp_net_ring_reconfig(nn, dp, NULL);
cc7c0333
JK
2647}
2648
bc1f4470 2649static void nfp_net_stat64(struct net_device *netdev,
2650 struct rtnl_link_stats64 *stats)
4c352362
JK
2651{
2652 struct nfp_net *nn = netdev_priv(netdev);
2653 int r;
2654
79c12a75 2655 for (r = 0; r < nn->dp.num_r_vecs; r++) {
4c352362
JK
2656 struct nfp_net_r_vector *r_vec = &nn->r_vecs[r];
2657 u64 data[3];
2658 unsigned int start;
2659
2660 do {
2661 start = u64_stats_fetch_begin(&r_vec->rx_sync);
2662 data[0] = r_vec->rx_pkts;
2663 data[1] = r_vec->rx_bytes;
2664 data[2] = r_vec->rx_drops;
2665 } while (u64_stats_fetch_retry(&r_vec->rx_sync, start));
2666 stats->rx_packets += data[0];
2667 stats->rx_bytes += data[1];
2668 stats->rx_dropped += data[2];
2669
2670 do {
2671 start = u64_stats_fetch_begin(&r_vec->tx_sync);
2672 data[0] = r_vec->tx_pkts;
2673 data[1] = r_vec->tx_bytes;
2674 data[2] = r_vec->tx_errors;
2675 } while (u64_stats_fetch_retry(&r_vec->tx_sync, start));
2676 stats->tx_packets += data[0];
2677 stats->tx_bytes += data[1];
2678 stats->tx_errors += data[2];
2679 }
4c352362
JK
2680}
2681
7533fdc0
JK
2682static bool nfp_net_ebpf_capable(struct nfp_net *nn)
2683{
2684 if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
2685 nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
2686 return true;
2687 return false;
2688}
2689
2690static int
2691nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
2692 struct tc_to_netdev *tc)
2693{
2694 struct nfp_net *nn = netdev_priv(netdev);
2695
2696 if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
46c50518 2697 return -EOPNOTSUPP;
7533fdc0 2698 if (proto != htons(ETH_P_ALL))
46c50518 2699 return -EOPNOTSUPP;
7533fdc0 2700
6d677075 2701 if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
79c12a75 2702 if (!nn->dp.bpf_offload_xdp)
6d677075
JK
2703 return nfp_net_bpf_offload(nn, tc->cls_bpf);
2704 else
2705 return -EBUSY;
2706 }
7533fdc0
JK
2707
2708 return -EINVAL;
2709}
2710
4c352362
JK
2711static int nfp_net_set_features(struct net_device *netdev,
2712 netdev_features_t features)
2713{
2714 netdev_features_t changed = netdev->features ^ features;
2715 struct nfp_net *nn = netdev_priv(netdev);
2716 u32 new_ctrl;
2717 int err;
2718
2719 /* Assume this is not called with features we have not advertised */
2720
79c12a75 2721 new_ctrl = nn->dp.ctrl;
4c352362
JK
2722
2723 if (changed & NETIF_F_RXCSUM) {
2724 if (features & NETIF_F_RXCSUM)
ddb98d94 2725 new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
4c352362 2726 else
ddb98d94 2727 new_ctrl &= ~NFP_NET_CFG_CTRL_RXCSUM_ANY;
4c352362
JK
2728 }
2729
2730 if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) {
2731 if (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
2732 new_ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
2733 else
2734 new_ctrl &= ~NFP_NET_CFG_CTRL_TXCSUM;
2735 }
2736
2737 if (changed & (NETIF_F_TSO | NETIF_F_TSO6)) {
2738 if (features & (NETIF_F_TSO | NETIF_F_TSO6))
28063be6
EP
2739 new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
2740 NFP_NET_CFG_CTRL_LSO;
4c352362 2741 else
28063be6 2742 new_ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
4c352362
JK
2743 }
2744
2745 if (changed & NETIF_F_HW_VLAN_CTAG_RX) {
2746 if (features & NETIF_F_HW_VLAN_CTAG_RX)
2747 new_ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
2748 else
2749 new_ctrl &= ~NFP_NET_CFG_CTRL_RXVLAN;
2750 }
2751
2752 if (changed & NETIF_F_HW_VLAN_CTAG_TX) {
2753 if (features & NETIF_F_HW_VLAN_CTAG_TX)
2754 new_ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
2755 else
2756 new_ctrl &= ~NFP_NET_CFG_CTRL_TXVLAN;
2757 }
2758
2759 if (changed & NETIF_F_SG) {
2760 if (features & NETIF_F_SG)
2761 new_ctrl |= NFP_NET_CFG_CTRL_GATHER;
2762 else
2763 new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
2764 }
2765
79c12a75 2766 if (changed & NETIF_F_HW_TC && nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
7533fdc0
JK
2767 nn_err(nn, "Cannot disable HW TC offload while in use\n");
2768 return -EBUSY;
2769 }
2770
4c352362
JK
2771 nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
2772 netdev->features, features, changed);
2773
79c12a75 2774 if (new_ctrl == nn->dp.ctrl)
4c352362
JK
2775 return 0;
2776
79c12a75 2777 nn_dbg(nn, "NIC ctrl: 0x%x -> 0x%x\n", nn->dp.ctrl, new_ctrl);
4c352362
JK
2778 nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
2779 err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
2780 if (err)
2781 return err;
2782
79c12a75 2783 nn->dp.ctrl = new_ctrl;
4c352362
JK
2784
2785 return 0;
2786}
2787
2788static netdev_features_t
2789nfp_net_features_check(struct sk_buff *skb, struct net_device *dev,
2790 netdev_features_t features)
2791{
2792 u8 l4_hdr;
2793
2794 /* We can't do TSO over double tagged packets (802.1AD) */
2795 features &= vlan_features_check(skb, features);
2796
2797 if (!skb->encapsulation)
2798 return features;
2799
2800 /* Ensure that inner L4 header offset fits into TX descriptor field */
2801 if (skb_is_gso(skb)) {
2802 u32 hdrlen;
2803
2804 hdrlen = skb_inner_transport_header(skb) - skb->data +
2805 inner_tcp_hdrlen(skb);
2806
2807 if (unlikely(hdrlen > NFP_NET_LSO_MAX_HDR_SZ))
2808 features &= ~NETIF_F_GSO_MASK;
2809 }
2810
2811 /* VXLAN/GRE check */
2812 switch (vlan_get_protocol(skb)) {
2813 case htons(ETH_P_IP):
2814 l4_hdr = ip_hdr(skb)->protocol;
2815 break;
2816 case htons(ETH_P_IPV6):
2817 l4_hdr = ipv6_hdr(skb)->nexthdr;
2818 break;
2819 default:
a188222b 2820 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4c352362
JK
2821 }
2822
2823 if (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
2824 skb->inner_protocol != htons(ETH_P_TEB) ||
2825 (l4_hdr != IPPROTO_UDP && l4_hdr != IPPROTO_GRE) ||
2826 (l4_hdr == IPPROTO_UDP &&
2827 (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
2828 sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
a188222b 2829 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4c352362
JK
2830
2831 return features;
2832}
2833
2834/**
2835 * nfp_net_set_vxlan_port() - set vxlan port in SW and reconfigure HW
2836 * @nn: NFP Net device to reconfigure
2837 * @idx: Index into the port table where new port should be written
2838 * @port: UDP port to configure (pass zero to remove VXLAN port)
2839 */
2840static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
2841{
2842 int i;
2843
2844 nn->vxlan_ports[idx] = port;
2845
79c12a75 2846 if (!(nn->dp.ctrl & NFP_NET_CFG_CTRL_VXLAN))
4c352362
JK
2847 return;
2848
2849 BUILD_BUG_ON(NFP_NET_N_VXLAN_PORTS & 1);
2850 for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i += 2)
2851 nn_writel(nn, NFP_NET_CFG_VXLAN_PORT + i * sizeof(port),
2852 be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 |
2853 be16_to_cpu(nn->vxlan_ports[i]));
2854
3d780b92 2855 nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_VXLAN);
4c352362
JK
2856}
2857
2858/**
2859 * nfp_net_find_vxlan_idx() - find table entry of the port or a free one
2860 * @nn: NFP Network structure
2861 * @port: UDP port to look for
2862 *
2863 * Return: if the port is already in the table -- it's position;
2864 * if the port is not in the table -- free position to use;
2865 * if the table is full -- -ENOSPC.
2866 */
2867static int nfp_net_find_vxlan_idx(struct nfp_net *nn, __be16 port)
2868{
2869 int i, free_idx = -ENOSPC;
2870
2871 for (i = 0; i < NFP_NET_N_VXLAN_PORTS; i++) {
2872 if (nn->vxlan_ports[i] == port)
2873 return i;
2874 if (!nn->vxlan_usecnt[i])
2875 free_idx = i;
2876 }
2877
2878 return free_idx;
2879}
2880
2881static void nfp_net_add_vxlan_port(struct net_device *netdev,
3ab68837 2882 struct udp_tunnel_info *ti)
4c352362
JK
2883{
2884 struct nfp_net *nn = netdev_priv(netdev);
2885 int idx;
2886
3ab68837
AD
2887 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
2888 return;
2889
2890 idx = nfp_net_find_vxlan_idx(nn, ti->port);
4c352362
JK
2891 if (idx == -ENOSPC)
2892 return;
2893
2894 if (!nn->vxlan_usecnt[idx]++)
3ab68837 2895 nfp_net_set_vxlan_port(nn, idx, ti->port);
4c352362
JK
2896}
2897
2898static void nfp_net_del_vxlan_port(struct net_device *netdev,
3ab68837 2899 struct udp_tunnel_info *ti)
4c352362
JK
2900{
2901 struct nfp_net *nn = netdev_priv(netdev);
2902 int idx;
2903
3ab68837
AD
2904 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
2905 return;
2906
2907 idx = nfp_net_find_vxlan_idx(nn, ti->port);
f50cef6f 2908 if (idx == -ENOSPC || !nn->vxlan_usecnt[idx])
4c352362
JK
2909 return;
2910
2911 if (!--nn->vxlan_usecnt[idx])
2912 nfp_net_set_vxlan_port(nn, idx, 0);
2913}
2914
6d677075
JK
2915static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog)
2916{
2917 struct tc_cls_bpf_offload cmd = {
2918 .prog = prog,
2919 };
2920 int ret;
2921
2922 if (!nfp_net_ebpf_capable(nn))
2923 return -EINVAL;
2924
79c12a75
JK
2925 if (nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) {
2926 if (!nn->dp.bpf_offload_xdp)
6d677075
JK
2927 return prog ? -EBUSY : 0;
2928 cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
2929 } else {
2930 if (!prog)
2931 return 0;
2932 cmd.command = TC_CLSBPF_ADD;
2933 }
2934
2935 ret = nfp_net_bpf_offload(nn, &cmd);
2936 /* Stop offload if replace not possible */
2937 if (ret && cmd.command == TC_CLSBPF_REPLACE)
2938 nfp_net_xdp_offload(nn, NULL);
79c12a75 2939 nn->dp.bpf_offload_xdp = prog && !ret;
6d677075
JK
2940 return ret;
2941}
2942
d957c0f7 2943static int nfp_net_xdp_setup(struct nfp_net *nn, struct netdev_xdp *xdp)
ecd63a02 2944{
9dc6b116 2945 struct bpf_prog *old_prog = nn->dp.xdp_prog;
d957c0f7 2946 struct bpf_prog *prog = xdp->prog;
783496b0 2947 struct nfp_net_dp *dp;
ecd63a02
JK
2948 int err;
2949
79c12a75 2950 if (!prog && !nn->dp.xdp_prog)
ecd63a02 2951 return 0;
79c12a75
JK
2952 if (prog && nn->dp.xdp_prog) {
2953 prog = xchg(&nn->dp.xdp_prog, prog);
ecd63a02 2954 bpf_prog_put(prog);
79c12a75 2955 nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
ecd63a02
JK
2956 return 0;
2957 }
2958
783496b0
JK
2959 dp = nfp_net_clone_dp(nn);
2960 if (!dp)
2961 return -ENOMEM;
2962
9dc6b116 2963 dp->xdp_prog = prog;
892a7f70 2964 dp->num_tx_rings += prog ? nn->dp.num_rx_rings : -nn->dp.num_rx_rings;
c487e6b1 2965 dp->rx_dma_dir = prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
dbf637ff 2966 dp->rx_dma_off = prog ? XDP_PACKET_HEADROOM - nn->dp.rx_offset : 0;
ecd63a02
JK
2967
2968 /* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
d957c0f7 2969 err = nfp_net_ring_reconfig(nn, dp, xdp->extack);
ecd63a02
JK
2970 if (err)
2971 return err;
2972
9dc6b116
JK
2973 if (old_prog)
2974 bpf_prog_put(old_prog);
ecd63a02 2975
79c12a75 2976 nfp_net_xdp_offload(nn, nn->dp.xdp_prog);
6d677075 2977
ecd63a02
JK
2978 return 0;
2979}
2980
2981static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
2982{
2983 struct nfp_net *nn = netdev_priv(netdev);
2984
2985 switch (xdp->command) {
2986 case XDP_SETUP_PROG:
d957c0f7 2987 return nfp_net_xdp_setup(nn, xdp);
ecd63a02 2988 case XDP_QUERY_PROG:
79c12a75 2989 xdp->prog_attached = !!nn->dp.xdp_prog;
ecd63a02
JK
2990 return 0;
2991 default:
2992 return -EINVAL;
2993 }
2994}
2995
9d372759
PC
2996static int nfp_net_set_mac_address(struct net_device *netdev, void *addr)
2997{
2998 struct nfp_net *nn = netdev_priv(netdev);
2999 struct sockaddr *saddr = addr;
3000 int err;
3001
3002 err = eth_prepare_mac_addr_change(netdev, addr);
3003 if (err)
3004 return err;
3005
3006 nfp_net_write_mac_addr(nn, saddr->sa_data);
3007
3008 err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_MACADDR);
3009 if (err)
3010 return err;
3011
3012 eth_commit_mac_addr_change(netdev, addr);
3013
3014 return 0;
3015}
3016
eb488c26 3017const struct net_device_ops nfp_net_netdev_ops = {
4c352362
JK
3018 .ndo_open = nfp_net_netdev_open,
3019 .ndo_stop = nfp_net_netdev_close,
3020 .ndo_start_xmit = nfp_net_tx,
3021 .ndo_get_stats64 = nfp_net_stat64,
7533fdc0 3022 .ndo_setup_tc = nfp_net_setup_tc,
4c352362
JK
3023 .ndo_tx_timeout = nfp_net_tx_timeout,
3024 .ndo_set_rx_mode = nfp_net_set_rx_mode,
3025 .ndo_change_mtu = nfp_net_change_mtu,
9d372759 3026 .ndo_set_mac_address = nfp_net_set_mac_address,
4c352362
JK
3027 .ndo_set_features = nfp_net_set_features,
3028 .ndo_features_check = nfp_net_features_check,
eb488c26 3029 .ndo_get_phys_port_name = nfp_port_get_phys_port_name,
3ab68837
AD
3030 .ndo_udp_tunnel_add = nfp_net_add_vxlan_port,
3031 .ndo_udp_tunnel_del = nfp_net_del_vxlan_port,
ecd63a02 3032 .ndo_xdp = nfp_net_xdp,
4c352362
JK
3033};
3034
3035/**
3036 * nfp_net_info() - Print general info about the NIC
3037 * @nn: NFP Net device to reconfigure
3038 */
3039void nfp_net_info(struct nfp_net *nn)
3040{
416db5c1 3041 nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
79c12a75
JK
3042 nn->dp.is_vf ? "VF " : "",
3043 nn->dp.num_tx_rings, nn->max_tx_rings,
3044 nn->dp.num_rx_rings, nn->max_rx_rings);
4c352362
JK
3045 nn_info(nn, "VER: %d.%d.%d.%d, Maximum supported MTU: %d\n",
3046 nn->fw_ver.resv, nn->fw_ver.class,
3047 nn->fw_ver.major, nn->fw_ver.minor,
3048 nn->max_mtu);
9d372759 3049 nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
4c352362
JK
3050 nn->cap,
3051 nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "",
3052 nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "",
3053 nn->cap & NFP_NET_CFG_CTRL_L2MC ? "L2MCFILT " : "",
3054 nn->cap & NFP_NET_CFG_CTRL_RXCSUM ? "RXCSUM " : "",
3055 nn->cap & NFP_NET_CFG_CTRL_TXCSUM ? "TXCSUM " : "",
3056 nn->cap & NFP_NET_CFG_CTRL_RXVLAN ? "RXVLAN " : "",
3057 nn->cap & NFP_NET_CFG_CTRL_TXVLAN ? "TXVLAN " : "",
3058 nn->cap & NFP_NET_CFG_CTRL_SCATTER ? "SCATTER " : "",
3059 nn->cap & NFP_NET_CFG_CTRL_GATHER ? "GATHER " : "",
28063be6
EP
3060 nn->cap & NFP_NET_CFG_CTRL_LSO ? "TSO1 " : "",
3061 nn->cap & NFP_NET_CFG_CTRL_LSO2 ? "TSO2 " : "",
611bdd49
EP
3062 nn->cap & NFP_NET_CFG_CTRL_RSS ? "RSS1 " : "",
3063 nn->cap & NFP_NET_CFG_CTRL_RSS2 ? "RSS2 " : "",
4c352362
JK
3064 nn->cap & NFP_NET_CFG_CTRL_L2SWITCH ? "L2SWITCH " : "",
3065 nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
3066 nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "",
3067 nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "",
7533fdc0 3068 nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "",
ddb98d94
JK
3069 nfp_net_ebpf_capable(nn) ? "BPF " : "",
3070 nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ?
9d372759
PC
3071 "RXCSUM_COMPLETE " : "",
3072 nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "");
4c352362
JK
3073}
3074
3075/**
beba69ca 3076 * nfp_net_alloc() - Allocate netdev and related structure
4c352362
JK
3077 * @pdev: PCI device
3078 * @max_tx_rings: Maximum number of TX rings supported by device
3079 * @max_rx_rings: Maximum number of RX rings supported by device
3080 *
3081 * This function allocates a netdev device and fills in the initial
3082 * part of the @struct nfp_net structure.
3083 *
3084 * Return: NFP Net device structure, or ERR_PTR on error.
3085 */
beba69ca
JK
3086struct nfp_net *nfp_net_alloc(struct pci_dev *pdev,
3087 unsigned int max_tx_rings,
3088 unsigned int max_rx_rings)
4c352362
JK
3089{
3090 struct net_device *netdev;
3091 struct nfp_net *nn;
4c352362
JK
3092
3093 netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
3094 max_tx_rings, max_rx_rings);
3095 if (!netdev)
3096 return ERR_PTR(-ENOMEM);
3097
3098 SET_NETDEV_DEV(netdev, &pdev->dev);
3099 nn = netdev_priv(netdev);
3100
79c12a75
JK
3101 nn->dp.netdev = netdev;
3102 nn->dp.dev = &pdev->dev;
4c352362
JK
3103 nn->pdev = pdev;
3104
3105 nn->max_tx_rings = max_tx_rings;
3106 nn->max_rx_rings = max_rx_rings;
3107
79c12a75
JK
3108 nn->dp.num_tx_rings = min_t(unsigned int,
3109 max_tx_rings, num_online_cpus());
3110 nn->dp.num_rx_rings = min_t(unsigned int, max_rx_rings,
cbeaf7aa 3111 netif_get_num_default_rss_queues());
4c352362 3112
79c12a75
JK
3113 nn->dp.num_r_vecs = max(nn->dp.num_tx_rings, nn->dp.num_rx_rings);
3114 nn->dp.num_r_vecs = min_t(unsigned int,
3115 nn->dp.num_r_vecs, num_online_cpus());
4b27a1eb 3116
79c12a75
JK
3117 nn->dp.txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
3118 nn->dp.rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
4c352362
JK
3119
3120 spin_lock_init(&nn->reconfig_lock);
66860beb 3121 spin_lock_init(&nn->rx_filter_lock);
4c352362
JK
3122 spin_lock_init(&nn->link_status_lock);
3123
3d780b92
JK
3124 setup_timer(&nn->reconfig_timer,
3125 nfp_net_reconfig_timer, (unsigned long)nn);
66860beb
JK
3126 setup_timer(&nn->rx_filter_stats_timer,
3127 nfp_net_filter_stats_timer, (unsigned long)nn);
3d780b92 3128
4c352362
JK
3129 return nn;
3130}
3131
3132/**
beba69ca 3133 * nfp_net_free() - Undo what @nfp_net_alloc() did
4c352362
JK
3134 * @nn: NFP Net device to reconfigure
3135 */
beba69ca 3136void nfp_net_free(struct nfp_net *nn)
4c352362 3137{
79c12a75 3138 free_netdev(nn->dp.netdev);
4c352362
JK
3139}
3140
9ff304bf
JK
3141/**
3142 * nfp_net_rss_key_sz() - Get current size of the RSS key
3143 * @nn: NFP Net device instance
3144 *
3145 * Return: size of the RSS key for currently selected hash function.
3146 */
3147unsigned int nfp_net_rss_key_sz(struct nfp_net *nn)
3148{
3149 switch (nn->rss_hfunc) {
3150 case ETH_RSS_HASH_TOP:
3151 return NFP_NET_CFG_RSS_KEY_SZ;
3152 case ETH_RSS_HASH_XOR:
3153 return 0;
3154 case ETH_RSS_HASH_CRC32:
3155 return 4;
3156 }
3157
3158 nn_warn(nn, "Unknown hash function: %u\n", nn->rss_hfunc);
3159 return 0;
3160}
3161
4c352362
JK
3162/**
3163 * nfp_net_rss_init() - Set the initial RSS parameters
3164 * @nn: NFP Net device to reconfigure
3165 */
3166static void nfp_net_rss_init(struct nfp_net *nn)
3167{
9ff304bf
JK
3168 unsigned long func_bit, rss_cap_hfunc;
3169 u32 reg;
3170
3171 /* Read the RSS function capability and select first supported func */
3172 reg = nn_readl(nn, NFP_NET_CFG_RSS_CAP);
3173 rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC, reg);
3174 if (!rss_cap_hfunc)
3175 rss_cap_hfunc = FIELD_GET(NFP_NET_CFG_RSS_CAP_HFUNC,
3176 NFP_NET_CFG_RSS_TOEPLITZ);
3177
3178 func_bit = find_first_bit(&rss_cap_hfunc, NFP_NET_CFG_RSS_HFUNCS);
3179 if (func_bit == NFP_NET_CFG_RSS_HFUNCS) {
79c12a75 3180 dev_warn(nn->dp.dev,
9ff304bf
JK
3181 "Bad RSS config, defaulting to Toeplitz hash\n");
3182 func_bit = ETH_RSS_HASH_TOP_BIT;
3183 }
3184 nn->rss_hfunc = 1 << func_bit;
3185
3186 netdev_rss_key_fill(nn->rss_key, nfp_net_rss_key_sz(nn));
4c352362 3187
1e9e10d0 3188 nfp_net_rss_init_itbl(nn);
4c352362
JK
3189
3190 /* Enable IPv4/IPv6 TCP by default */
3191 nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
3192 NFP_NET_CFG_RSS_IPV6_TCP |
9ff304bf 3193 FIELD_PREP(NFP_NET_CFG_RSS_HFUNC, nn->rss_hfunc) |
4c352362
JK
3194 NFP_NET_CFG_RSS_MASK;
3195}
3196
3197/**
3198 * nfp_net_irqmod_init() - Set the initial IRQ moderation parameters
3199 * @nn: NFP Net device to reconfigure
3200 */
3201static void nfp_net_irqmod_init(struct nfp_net *nn)
3202{
3203 nn->rx_coalesce_usecs = 50;
3204 nn->rx_coalesce_max_frames = 64;
3205 nn->tx_coalesce_usecs = 50;
3206 nn->tx_coalesce_max_frames = 64;
3207}
3208
3209/**
beba69ca
JK
3210 * nfp_net_init() - Initialise/finalise the nfp_net structure
3211 * @nn: NFP Net device structure
4c352362
JK
3212 *
3213 * Return: 0 on success or negative errno on error.
3214 */
beba69ca 3215int nfp_net_init(struct nfp_net *nn)
4c352362 3216{
beba69ca 3217 struct net_device *netdev = nn->dp.netdev;
4c352362
JK
3218 int err;
3219
c487e6b1
JK
3220 nn->dp.rx_dma_dir = DMA_FROM_DEVICE;
3221
4c352362
JK
3222 /* Get some of the read-only fields from the BAR */
3223 nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
3224 nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
3225
611bdd49
EP
3226 /* Chained metadata is signalled by capabilities except in version 4 */
3227 nn->dp.chained_metadata_format = nn->fw_ver.major == 4 ||
3228 nn->cap & NFP_NET_CFG_CTRL_CHAIN_META;
3229 if (nn->dp.chained_metadata_format && nn->fw_ver.major != 4)
3230 nn->cap &= ~NFP_NET_CFG_CTRL_RSS;
3231
9d372759 3232 nfp_net_write_mac_addr(nn, nn->dp.netdev->dev_addr);
4c352362 3233
bf187ea0 3234 /* Determine RX packet/metadata boundary offset */
97717aca
JK
3235 if (nn->fw_ver.major >= 2) {
3236 u32 reg;
3237
3238 reg = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
3239 if (reg > NFP_NET_MAX_PREPEND) {
3240 nn_err(nn, "Invalid rx offset: %d\n", reg);
3241 return -EINVAL;
3242 }
3243 nn->dp.rx_offset = reg;
3244 } else {
79c12a75 3245 nn->dp.rx_offset = NFP_NET_RX_OFFSET;
97717aca 3246 }
bf187ea0 3247
4c352362
JK
3248 /* Set default MTU and Freelist buffer size */
3249 if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
3250 netdev->mtu = nn->max_mtu;
3251 else
3252 netdev->mtu = NFP_NET_DEFAULT_MTU;
76e1e1a8
JK
3253 nn->dp.mtu = netdev->mtu;
3254 nn->dp.fl_bufsz = nfp_net_calc_fl_bufsz(&nn->dp);
4c352362
JK
3255
3256 /* Advertise/enable offloads based on capabilities
3257 *
3258 * Note: netdev->features show the currently enabled features
3259 * and netdev->hw_features advertises which features are
3260 * supported. By default we enable most features.
3261 */
9d372759
PC
3262 if (nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR)
3263 netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
3264
4c352362 3265 netdev->hw_features = NETIF_F_HIGHDMA;
ddb98d94 3266 if (nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY) {
4c352362 3267 netdev->hw_features |= NETIF_F_RXCSUM;
ddb98d94 3268 nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RXCSUM_ANY;
4c352362
JK
3269 }
3270 if (nn->cap & NFP_NET_CFG_CTRL_TXCSUM) {
3271 netdev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
79c12a75 3272 nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXCSUM;
4c352362
JK
3273 }
3274 if (nn->cap & NFP_NET_CFG_CTRL_GATHER) {
3275 netdev->hw_features |= NETIF_F_SG;
79c12a75 3276 nn->dp.ctrl |= NFP_NET_CFG_CTRL_GATHER;
4c352362 3277 }
28063be6
EP
3278 if ((nn->cap & NFP_NET_CFG_CTRL_LSO && nn->fw_ver.major > 2) ||
3279 nn->cap & NFP_NET_CFG_CTRL_LSO2) {
4c352362 3280 netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
28063be6
EP
3281 nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_LSO2 ?:
3282 NFP_NET_CFG_CTRL_LSO;
4c352362 3283 }
611bdd49 3284 if (nn->cap & NFP_NET_CFG_CTRL_RSS_ANY) {
4c352362
JK
3285 netdev->hw_features |= NETIF_F_RXHASH;
3286 nfp_net_rss_init(nn);
611bdd49
EP
3287 nn->dp.ctrl |= nn->cap & NFP_NET_CFG_CTRL_RSS2 ?:
3288 NFP_NET_CFG_CTRL_RSS;
4c352362
JK
3289 }
3290 if (nn->cap & NFP_NET_CFG_CTRL_VXLAN &&
3291 nn->cap & NFP_NET_CFG_CTRL_NVGRE) {
3292 if (nn->cap & NFP_NET_CFG_CTRL_LSO)
3293 netdev->hw_features |= NETIF_F_GSO_GRE |
3294 NETIF_F_GSO_UDP_TUNNEL;
79c12a75 3295 nn->dp.ctrl |= NFP_NET_CFG_CTRL_VXLAN | NFP_NET_CFG_CTRL_NVGRE;
4c352362
JK
3296
3297 netdev->hw_enc_features = netdev->hw_features;
3298 }
3299
3300 netdev->vlan_features = netdev->hw_features;
3301
3302 if (nn->cap & NFP_NET_CFG_CTRL_RXVLAN) {
3303 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
79c12a75 3304 nn->dp.ctrl |= NFP_NET_CFG_CTRL_RXVLAN;
4c352362
JK
3305 }
3306 if (nn->cap & NFP_NET_CFG_CTRL_TXVLAN) {
28063be6
EP
3307 if (nn->cap & NFP_NET_CFG_CTRL_LSO2) {
3308 nn_warn(nn, "Device advertises both TSO2 and TXVLAN. Refusing to enable TXVLAN.\n");
3309 } else {
3310 netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
3311 nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXVLAN;
3312 }
4c352362
JK
3313 }
3314
3315 netdev->features = netdev->hw_features;
3316
7533fdc0
JK
3317 if (nfp_net_ebpf_capable(nn))
3318 netdev->hw_features |= NETIF_F_HW_TC;
3319
4c352362
JK
3320 /* Advertise but disable TSO by default. */
3321 netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
28063be6 3322 nn->dp.ctrl &= ~NFP_NET_CFG_CTRL_LSO_ANY;
4c352362
JK
3323
3324 /* Allow L2 Broadcast and Multicast through by default, if supported */
3325 if (nn->cap & NFP_NET_CFG_CTRL_L2BC)
79c12a75 3326 nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC;
4c352362 3327 if (nn->cap & NFP_NET_CFG_CTRL_L2MC)
79c12a75 3328 nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2MC;
4c352362
JK
3329
3330 /* Allow IRQ moderation, if supported */
3331 if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) {
3332 nfp_net_irqmod_init(nn);
79c12a75 3333 nn->dp.ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
4c352362
JK
3334 }
3335
4c352362
JK
3336 /* Stash the re-configuration queue away. First odd queue in TX Bar */
3337 nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
3338
3339 /* Make sure the FW knows the netdev is supposed to be disabled here */
3340 nn_writel(nn, NFP_NET_CFG_CTRL, 0);
3341 nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0);
3342 nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0);
3343 err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING |
3344 NFP_NET_CFG_UPDATE_GEN);
3345 if (err)
3346 return err;
3347
3348 /* Finalise the netdev setup */
4c352362
JK
3349 netdev->netdev_ops = &nfp_net_netdev_ops;
3350 netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
44770e11
JW
3351
3352 /* MTU range: 68 - hw-specific max */
3353 netdev->min_mtu = ETH_MIN_MTU;
3354 netdev->max_mtu = nn->max_mtu;
3355
4b402d71 3356 netif_carrier_off(netdev);
4c352362
JK
3357
3358 nfp_net_set_ethtool_ops(netdev);
beba69ca 3359 nfp_net_vecs_init(nn);
4c352362
JK
3360
3361 return register_netdev(netdev);
3362}
3363
3364/**
beba69ca
JK
3365 * nfp_net_clean() - Undo what nfp_net_init() did.
3366 * @nn: NFP Net device structure
4c352362 3367 */
beba69ca 3368void nfp_net_clean(struct nfp_net *nn)
4c352362 3369{
6f14f443 3370 unregister_netdev(nn->dp.netdev);
c383bdd1 3371
79c12a75
JK
3372 if (nn->dp.xdp_prog)
3373 bpf_prog_put(nn->dp.xdp_prog);
3374 if (nn->dp.bpf_offload_xdp)
6d677075 3375 nfp_net_xdp_offload(nn, NULL);
4c352362 3376}