]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blob - drivers/net/ethernet/emulex/benet/be_main.c
Merge tag 'sh-pfc-for-v5.1-tag2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[mirror_ubuntu-focal-kernel.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2 * Copyright (C) 2005 - 2016 Broadcom
3 * All rights reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License version 2
7 * as published by the Free Software Foundation. The full GNU General
8 * Public License is included in this distribution in the file called COPYING.
9 *
10 * Contact Information:
11 * linux-drivers@emulex.com
12 *
13 * Emulex
14 * 3333 Susan Street
15 * Costa Mesa, CA 92626
16 */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34 * Use sysfs method to enable/disable VFs.
35 */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45 * Each function schedules its own work request on this shared workq.
46 */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55 { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56 { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64 { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66 { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75 "CEV",
76 "CTX",
77 "DBUF",
78 "ERX",
79 "Host",
80 "MPU",
81 "NDMA",
82 "PTC ",
83 "RDMA ",
84 "RXF ",
85 "RXIPS ",
86 "RXULP0 ",
87 "RXULP1 ",
88 "RXULP2 ",
89 "TIM ",
90 "TPOST ",
91 "TPRE ",
92 "TXIPS ",
93 "TXULP0 ",
94 "TXULP1 ",
95 "UC ",
96 "WDMA ",
97 "TXULP2 ",
98 "HOST1 ",
99 "P0_OB_LINK ",
100 "P1_OB_LINK ",
101 "HOST_GPIO ",
102 "MBOX ",
103 "ERX2 ",
104 "SPARE ",
105 "JTAG ",
106 "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111 "LPCMEMHOST",
112 "MGMT_MAC",
113 "PCS0ONLINE",
114 "MPU_IRAM",
115 "PCS1ONLINE",
116 "PCTL0",
117 "PCTL1",
118 "PMEM",
119 "RR",
120 "TXPB",
121 "RXPP",
122 "XAUI",
123 "TXP",
124 "ARM",
125 "IPC",
126 "HOST2",
127 "HOST3",
128 "HOST4",
129 "HOST5",
130 "HOST6",
131 "HOST7",
132 "ECRC",
133 "Poison TLP",
134 "NETC",
135 "PERIPH",
136 "LLTXULP",
137 "D2P",
138 "RCON",
139 "LDMA",
140 "LLTXP",
141 "LLTXPB",
142 "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS (BE_IF_FLAGS_UNTAGGED | \
146 BE_IF_FLAGS_BROADCAST | \
147 BE_IF_FLAGS_MULTICAST | \
148 BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152 struct be_dma_mem *mem = &q->dma_mem;
153
154 if (mem->va) {
155 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156 mem->dma);
157 mem->va = NULL;
158 }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162 u16 len, u16 entry_size)
163 {
164 struct be_dma_mem *mem = &q->dma_mem;
165
166 memset(q, 0, sizeof(*q));
167 q->len = len;
168 q->entry_size = entry_size;
169 mem->size = len * entry_size;
170 mem->va = dma_alloc_coherent(&adapter->pdev->dev, mem->size,
171 &mem->dma, GFP_KERNEL);
172 if (!mem->va)
173 return -ENOMEM;
174 return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179 u32 reg, enabled;
180
181 pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182 &reg);
183 enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185 if (!enabled && enable)
186 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187 else if (enabled && !enable)
188 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189 else
190 return;
191
192 pci_write_config_dword(adapter->pdev,
193 PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198 int status = 0;
199
200 /* On lancer interrupts can't be controlled via this register */
201 if (lancer_chip(adapter))
202 return;
203
204 if (be_check_error(adapter, BE_ERROR_EEH))
205 return;
206
207 status = be_cmd_intr_set(adapter, enable);
208 if (status)
209 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214 u32 val = 0;
215
216 if (be_check_error(adapter, BE_ERROR_HW))
217 return;
218
219 val |= qid & DB_RQ_RING_ID_MASK;
220 val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222 wmb();
223 iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227 u16 posted)
228 {
229 u32 val = 0;
230
231 if (be_check_error(adapter, BE_ERROR_HW))
232 return;
233
234 val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235 val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237 wmb();
238 iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242 bool arm, bool clear_int, u16 num_popped,
243 u32 eq_delay_mult_enc)
244 {
245 u32 val = 0;
246
247 val |= qid & DB_EQ_RING_ID_MASK;
248 val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250 if (be_check_error(adapter, BE_ERROR_HW))
251 return;
252
253 if (arm)
254 val |= 1 << DB_EQ_REARM_SHIFT;
255 if (clear_int)
256 val |= 1 << DB_EQ_CLR_SHIFT;
257 val |= 1 << DB_EQ_EVNT_SHIFT;
258 val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259 val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260 iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265 u32 val = 0;
266
267 val |= qid & DB_CQ_RING_ID_MASK;
268 val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269 DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271 if (be_check_error(adapter, BE_ERROR_HW))
272 return;
273
274 if (arm)
275 val |= 1 << DB_CQ_REARM_SHIFT;
276 val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277 iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282 int i;
283
284 /* Check if mac has already been added as part of uc-list */
285 for (i = 0; i < adapter->uc_macs; i++) {
286 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287 /* mac already added, skip addition */
288 adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289 return 0;
290 }
291 }
292
293 return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294 &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299 int i;
300
301 /* Skip deletion if the programmed mac is
302 * being used in uc-list
303 */
304 for (i = 0; i < adapter->uc_macs; i++) {
305 if (adapter->pmac_id[i + 1] == pmac_id)
306 return;
307 }
308 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313 struct be_adapter *adapter = netdev_priv(netdev);
314 struct device *dev = &adapter->pdev->dev;
315 struct sockaddr *addr = p;
316 int status;
317 u8 mac[ETH_ALEN];
318 u32 old_pmac_id = adapter->pmac_id[0];
319
320 if (!is_valid_ether_addr(addr->sa_data))
321 return -EADDRNOTAVAIL;
322
323 /* Proceed further only if, User provided MAC is different
324 * from active MAC
325 */
326 if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327 return 0;
328
329 /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330 * address
331 */
332 if (BEx_chip(adapter) && be_virtfn(adapter) &&
333 !check_privilege(adapter, BE_PRIV_FILTMGMT))
334 return -EPERM;
335
336 /* if device is not running, copy MAC to netdev->dev_addr */
337 if (!netif_running(netdev))
338 goto done;
339
340 /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341 * privilege or if PF did not provision the new MAC address.
342 * On BE3, this cmd will always fail if the VF doesn't have the
343 * FILTMGMT privilege. This failure is OK, only if the PF programmed
344 * the MAC for the VF.
345 */
346 mutex_lock(&adapter->rx_filter_lock);
347 status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348 if (!status) {
349
350 /* Delete the old programmed MAC. This call may fail if the
351 * old MAC was already deleted by the PF driver.
352 */
353 if (adapter->pmac_id[0] != old_pmac_id)
354 be_dev_mac_del(adapter, old_pmac_id);
355 }
356
357 mutex_unlock(&adapter->rx_filter_lock);
358 /* Decide if the new MAC is successfully activated only after
359 * querying the FW
360 */
361 status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362 adapter->if_handle, true, 0);
363 if (status)
364 goto err;
365
366 /* The MAC change did not happen, either due to lack of privilege
367 * or PF didn't pre-provision.
368 */
369 if (!ether_addr_equal(addr->sa_data, mac)) {
370 status = -EPERM;
371 goto err;
372 }
373
374 /* Remember currently programmed MAC */
375 ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377 ether_addr_copy(netdev->dev_addr, addr->sa_data);
378 dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379 return 0;
380 err:
381 dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382 return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388 if (BE2_chip(adapter)) {
389 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391 return &cmd->hw_stats;
392 } else if (BE3_chip(adapter)) {
393 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395 return &cmd->hw_stats;
396 } else {
397 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399 return &cmd->hw_stats;
400 }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406 if (BE2_chip(adapter)) {
407 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409 return &hw_stats->erx;
410 } else if (BE3_chip(adapter)) {
411 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413 return &hw_stats->erx;
414 } else {
415 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417 return &hw_stats->erx;
418 }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425 struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426 struct be_port_rxf_stats_v0 *port_stats =
427 &rxf_stats->port[adapter->port_num];
428 struct be_drv_stats *drvs = &adapter->drv_stats;
429
430 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431 drvs->rx_pause_frames = port_stats->rx_pause_frames;
432 drvs->rx_crc_errors = port_stats->rx_crc_errors;
433 drvs->rx_control_frames = port_stats->rx_control_frames;
434 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440 drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445 drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446 drvs->rx_dropped_header_too_small =
447 port_stats->rx_dropped_header_too_small;
448 drvs->rx_address_filtered =
449 port_stats->rx_address_filtered +
450 port_stats->rx_vlan_filtered;
451 drvs->rx_alignment_symbol_errors =
452 port_stats->rx_alignment_symbol_errors;
453
454 drvs->tx_pauseframes = port_stats->tx_pauseframes;
455 drvs->tx_controlframes = port_stats->tx_controlframes;
456
457 if (adapter->port_num)
458 drvs->jabber_events = rxf_stats->port1_jabber_events;
459 else
460 drvs->jabber_events = rxf_stats->port0_jabber_events;
461 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463 drvs->forwarded_packets = rxf_stats->forwarded_packets;
464 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474 struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475 struct be_port_rxf_stats_v1 *port_stats =
476 &rxf_stats->port[adapter->port_num];
477 struct be_drv_stats *drvs = &adapter->drv_stats;
478
479 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482 drvs->rx_pause_frames = port_stats->rx_pause_frames;
483 drvs->rx_crc_errors = port_stats->rx_crc_errors;
484 drvs->rx_control_frames = port_stats->rx_control_frames;
485 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495 drvs->rx_dropped_header_too_small =
496 port_stats->rx_dropped_header_too_small;
497 drvs->rx_input_fifo_overflow_drop =
498 port_stats->rx_input_fifo_overflow_drop;
499 drvs->rx_address_filtered = port_stats->rx_address_filtered;
500 drvs->rx_alignment_symbol_errors =
501 port_stats->rx_alignment_symbol_errors;
502 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503 drvs->tx_pauseframes = port_stats->tx_pauseframes;
504 drvs->tx_controlframes = port_stats->tx_controlframes;
505 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506 drvs->jabber_events = port_stats->jabber_events;
507 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509 drvs->forwarded_packets = rxf_stats->forwarded_packets;
510 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519 struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520 struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521 struct be_port_rxf_stats_v2 *port_stats =
522 &rxf_stats->port[adapter->port_num];
523 struct be_drv_stats *drvs = &adapter->drv_stats;
524
525 be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526 drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527 drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528 drvs->rx_pause_frames = port_stats->rx_pause_frames;
529 drvs->rx_crc_errors = port_stats->rx_crc_errors;
530 drvs->rx_control_frames = port_stats->rx_control_frames;
531 drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532 drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533 drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534 drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535 drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536 drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537 drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538 drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539 drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540 drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541 drvs->rx_dropped_header_too_small =
542 port_stats->rx_dropped_header_too_small;
543 drvs->rx_input_fifo_overflow_drop =
544 port_stats->rx_input_fifo_overflow_drop;
545 drvs->rx_address_filtered = port_stats->rx_address_filtered;
546 drvs->rx_alignment_symbol_errors =
547 port_stats->rx_alignment_symbol_errors;
548 drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549 drvs->tx_pauseframes = port_stats->tx_pauseframes;
550 drvs->tx_controlframes = port_stats->tx_controlframes;
551 drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552 drvs->jabber_events = port_stats->jabber_events;
553 drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554 drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555 drvs->forwarded_packets = rxf_stats->forwarded_packets;
556 drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557 drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558 drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559 adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560 if (be_roce_supported(adapter)) {
561 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563 drvs->rx_roce_frames = port_stats->roce_frames_received;
564 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565 drvs->roce_drops_payload_len =
566 port_stats->roce_drops_payload_len;
567 }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572 struct be_drv_stats *drvs = &adapter->drv_stats;
573 struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575 be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576 drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577 drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578 drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579 drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580 drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581 drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582 drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583 drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584 drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585 drvs->rx_dropped_tcp_length =
586 pport_stats->rx_dropped_invalid_tcp_length;
587 drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588 drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589 drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590 drvs->rx_dropped_header_too_small =
591 pport_stats->rx_dropped_header_too_small;
592 drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593 drvs->rx_address_filtered =
594 pport_stats->rx_address_filtered +
595 pport_stats->rx_vlan_filtered;
596 drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597 drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598 drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599 drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600 drvs->jabber_events = pport_stats->rx_jabbers;
601 drvs->forwarded_packets = pport_stats->num_forwards_lo;
602 drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603 drvs->rx_drops_too_many_frags =
604 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x) (x & 0xFFFF)
610 #define hi(x) (x & 0xFFFF0000)
611 bool wrapped = val < lo(*acc);
612 u32 newacc = hi(*acc) + val;
613
614 if (wrapped)
615 newacc += 65536;
616 WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620 struct be_rx_obj *rxo, u32 erx_stat)
621 {
622 if (!BEx_chip(adapter))
623 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624 else
625 /* below erx HW counter can actually wrap around after
626 * 65535. Driver accumulates a 32-bit value
627 */
628 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629 (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634 struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635 struct be_rx_obj *rxo;
636 int i;
637 u32 erx_stat;
638
639 if (lancer_chip(adapter)) {
640 populate_lancer_stats(adapter);
641 } else {
642 if (BE2_chip(adapter))
643 populate_be_v0_stats(adapter);
644 else if (BE3_chip(adapter))
645 /* for BE3 */
646 populate_be_v1_stats(adapter);
647 else
648 populate_be_v2_stats(adapter);
649
650 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651 for_all_rx_queues(adapter, rxo, i) {
652 erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653 populate_erx_stats(adapter, rxo, erx_stat);
654 }
655 }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659 struct rtnl_link_stats64 *stats)
660 {
661 struct be_adapter *adapter = netdev_priv(netdev);
662 struct be_drv_stats *drvs = &adapter->drv_stats;
663 struct be_rx_obj *rxo;
664 struct be_tx_obj *txo;
665 u64 pkts, bytes;
666 unsigned int start;
667 int i;
668
669 for_all_rx_queues(adapter, rxo, i) {
670 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672 do {
673 start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674 pkts = rx_stats(rxo)->rx_pkts;
675 bytes = rx_stats(rxo)->rx_bytes;
676 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677 stats->rx_packets += pkts;
678 stats->rx_bytes += bytes;
679 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681 rx_stats(rxo)->rx_drops_no_frags;
682 }
683
684 for_all_tx_queues(adapter, txo, i) {
685 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687 do {
688 start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689 pkts = tx_stats(txo)->tx_pkts;
690 bytes = tx_stats(txo)->tx_bytes;
691 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692 stats->tx_packets += pkts;
693 stats->tx_bytes += bytes;
694 }
695
696 /* bad pkts received */
697 stats->rx_errors = drvs->rx_crc_errors +
698 drvs->rx_alignment_symbol_errors +
699 drvs->rx_in_range_errors +
700 drvs->rx_out_range_errors +
701 drvs->rx_frame_too_long +
702 drvs->rx_dropped_too_small +
703 drvs->rx_dropped_too_short +
704 drvs->rx_dropped_header_too_small +
705 drvs->rx_dropped_tcp_length +
706 drvs->rx_dropped_runt;
707
708 /* detailed rx errors */
709 stats->rx_length_errors = drvs->rx_in_range_errors +
710 drvs->rx_out_range_errors +
711 drvs->rx_frame_too_long;
712
713 stats->rx_crc_errors = drvs->rx_crc_errors;
714
715 /* frame alignment errors */
716 stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718 /* receiver fifo overrun */
719 /* drops_no_pbuf is no per i/f, it's per BE card */
720 stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721 drvs->rx_input_fifo_overflow_drop +
722 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727 struct net_device *netdev = adapter->netdev;
728
729 if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730 netif_carrier_off(netdev);
731 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732 }
733
734 if (link_status)
735 netif_carrier_on(netdev);
736 else
737 netif_carrier_off(netdev);
738
739 netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744 if (skb->encapsulation)
745 return skb_inner_transport_offset(skb) +
746 inner_tcp_hdrlen(skb);
747 return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752 struct be_tx_stats *stats = tx_stats(txo);
753 u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754 /* Account for headers which get duplicated in TSO pkt */
755 u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757 u64_stats_update_begin(&stats->sync);
758 stats->tx_reqs++;
759 stats->tx_bytes += skb->len + dup_hdr_len;
760 stats->tx_pkts += tx_pkts;
761 if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762 stats->tx_vxlan_offload_pkts += tx_pkts;
763 u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769 /* +1 for the header wrb */
770 return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775 wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776 wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777 wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778 wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782 * to avoid the swap and shift/mask operations in wrb_fill().
783 */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786 wrb->frag_pa_hi = 0;
787 wrb->frag_pa_lo = 0;
788 wrb->frag_len = 0;
789 wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793 struct sk_buff *skb)
794 {
795 u8 vlan_prio;
796 u16 vlan_tag;
797
798 vlan_tag = skb_vlan_tag_get(skb);
799 vlan_prio = skb_vlan_tag_get_prio(skb);
800 /* If vlan priority provided by OS is NOT in available bmap */
801 if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803 adapter->recommended_prio_bits;
804
805 return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811 return (inner_ip_hdr(skb)->version == 4) ?
812 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817 return (ip_hdr(skb)->version == 4) ?
818 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823 return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828 return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833 return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837 struct sk_buff *skb,
838 struct be_wrb_params *wrb_params)
839 {
840 u16 proto;
841
842 if (skb_is_gso(skb)) {
843 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846 BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847 } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848 if (skb->encapsulation) {
849 BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850 proto = skb_inner_ip_proto(skb);
851 } else {
852 proto = skb_ip_proto(skb);
853 }
854 if (proto == IPPROTO_TCP)
855 BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856 else if (proto == IPPROTO_UDP)
857 BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858 }
859
860 if (skb_vlan_tag_present(skb)) {
861 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863 }
864
865 BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869 struct be_eth_hdr_wrb *hdr,
870 struct be_wrb_params *wrb_params,
871 struct sk_buff *skb)
872 {
873 memset(hdr, 0, sizeof(*hdr));
874
875 SET_TX_WRB_HDR_BITS(crc, hdr,
876 BE_WRB_F_GET(wrb_params->features, CRC));
877 SET_TX_WRB_HDR_BITS(ipcs, hdr,
878 BE_WRB_F_GET(wrb_params->features, IPCS));
879 SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880 BE_WRB_F_GET(wrb_params->features, TCPCS));
881 SET_TX_WRB_HDR_BITS(udpcs, hdr,
882 BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884 SET_TX_WRB_HDR_BITS(lso, hdr,
885 BE_WRB_F_GET(wrb_params->features, LSO));
886 SET_TX_WRB_HDR_BITS(lso6, hdr,
887 BE_WRB_F_GET(wrb_params->features, LSO6));
888 SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890 /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891 * hack is not needed, the evt bit is set while ringing DB.
892 */
893 SET_TX_WRB_HDR_BITS(event, hdr,
894 BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895 SET_TX_WRB_HDR_BITS(vlan, hdr,
896 BE_WRB_F_GET(wrb_params->features, VLAN));
897 SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899 SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900 SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901 SET_TX_WRB_HDR_BITS(mgmt, hdr,
902 BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906 bool unmap_single)
907 {
908 dma_addr_t dma;
909 u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912 dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913 (u64)le32_to_cpu(wrb->frag_pa_lo);
914 if (frag_len) {
915 if (unmap_single)
916 dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917 else
918 dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919 }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925 u32 head = txo->q.head;
926
927 queue_head_inc(&txo->q);
928 return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933 struct be_tx_obj *txo,
934 struct be_wrb_params *wrb_params,
935 struct sk_buff *skb, u16 head)
936 {
937 u32 num_frags = skb_wrb_cnt(skb);
938 struct be_queue_info *txq = &txo->q;
939 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941 wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942 be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944 BUG_ON(txo->sent_skb_list[head]);
945 txo->sent_skb_list[head] = skb;
946 txo->last_req_hdr = head;
947 atomic_add(num_frags, &txq->used);
948 txo->last_req_wrb_cnt = num_frags;
949 txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954 int len)
955 {
956 struct be_eth_wrb *wrb;
957 struct be_queue_info *txq = &txo->q;
958
959 wrb = queue_head_node(txq);
960 wrb_fill(wrb, busaddr, len);
961 queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965 * was invoked. The producer index is restored to the previous packet and the
966 * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967 */
968 static void be_xmit_restore(struct be_adapter *adapter,
969 struct be_tx_obj *txo, u32 head, bool map_single,
970 u32 copied)
971 {
972 struct device *dev;
973 struct be_eth_wrb *wrb;
974 struct be_queue_info *txq = &txo->q;
975
976 dev = &adapter->pdev->dev;
977 txq->head = head;
978
979 /* skip the first wrb (hdr); it's not mapped */
980 queue_head_inc(txq);
981 while (copied) {
982 wrb = queue_head_node(txq);
983 unmap_tx_frag(dev, wrb, map_single);
984 map_single = false;
985 copied -= le32_to_cpu(wrb->frag_len);
986 queue_head_inc(txq);
987 }
988
989 txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993 * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994 * of WRBs used up by the packet.
995 */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997 struct sk_buff *skb,
998 struct be_wrb_params *wrb_params)
999 {
1000 u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001 struct device *dev = &adapter->pdev->dev;
1002 bool map_single = false;
1003 u32 head;
1004 dma_addr_t busaddr;
1005 int len;
1006
1007 head = be_tx_get_wrb_hdr(txo);
1008
1009 if (skb->len > skb->data_len) {
1010 len = skb_headlen(skb);
1011
1012 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013 if (dma_mapping_error(dev, busaddr))
1014 goto dma_err;
1015 map_single = true;
1016 be_tx_setup_wrb_frag(txo, busaddr, len);
1017 copied += len;
1018 }
1019
1020 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022 len = skb_frag_size(frag);
1023
1024 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025 if (dma_mapping_error(dev, busaddr))
1026 goto dma_err;
1027 be_tx_setup_wrb_frag(txo, busaddr, len);
1028 copied += len;
1029 }
1030
1031 be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033 be_tx_stats_update(txo, skb);
1034 return wrb_cnt;
1035
1036 dma_err:
1037 adapter->drv_stats.dma_map_errors++;
1038 be_xmit_restore(adapter, txo, head, map_single, copied);
1039 return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044 return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048 struct sk_buff *skb,
1049 struct be_wrb_params
1050 *wrb_params)
1051 {
1052 bool insert_vlan = false;
1053 u16 vlan_tag = 0;
1054
1055 skb = skb_share_check(skb, GFP_ATOMIC);
1056 if (unlikely(!skb))
1057 return skb;
1058
1059 if (skb_vlan_tag_present(skb)) {
1060 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1061 insert_vlan = true;
1062 }
1063
1064 if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1065 if (!insert_vlan) {
1066 vlan_tag = adapter->pvid;
1067 insert_vlan = true;
1068 }
1069 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1070 * skip VLAN insertion
1071 */
1072 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1073 }
1074
1075 if (insert_vlan) {
1076 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1077 vlan_tag);
1078 if (unlikely(!skb))
1079 return skb;
1080 __vlan_hwaccel_clear_tag(skb);
1081 }
1082
1083 /* Insert the outer VLAN, if any */
1084 if (adapter->qnq_vid) {
1085 vlan_tag = adapter->qnq_vid;
1086 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1087 vlan_tag);
1088 if (unlikely(!skb))
1089 return skb;
1090 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1091 }
1092
1093 return skb;
1094 }
1095
1096 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1097 {
1098 struct ethhdr *eh = (struct ethhdr *)skb->data;
1099 u16 offset = ETH_HLEN;
1100
1101 if (eh->h_proto == htons(ETH_P_IPV6)) {
1102 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1103
1104 offset += sizeof(struct ipv6hdr);
1105 if (ip6h->nexthdr != NEXTHDR_TCP &&
1106 ip6h->nexthdr != NEXTHDR_UDP) {
1107 struct ipv6_opt_hdr *ehdr =
1108 (struct ipv6_opt_hdr *)(skb->data + offset);
1109
1110 /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1111 if (ehdr->hdrlen == 0xff)
1112 return true;
1113 }
1114 }
1115 return false;
1116 }
1117
1118 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120 return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1121 }
1122
1123 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1124 {
1125 return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1126 }
1127
1128 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1129 struct sk_buff *skb,
1130 struct be_wrb_params
1131 *wrb_params)
1132 {
1133 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1134 unsigned int eth_hdr_len;
1135 struct iphdr *ip;
1136
1137 /* For padded packets, BE HW modifies tot_len field in IP header
1138 * incorrecly when VLAN tag is inserted by HW.
1139 * For padded packets, Lancer computes incorrect checksum.
1140 */
1141 eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1142 VLAN_ETH_HLEN : ETH_HLEN;
1143 if (skb->len <= 60 &&
1144 (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1145 is_ipv4_pkt(skb)) {
1146 ip = (struct iphdr *)ip_hdr(skb);
1147 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1148 }
1149
1150 /* If vlan tag is already inlined in the packet, skip HW VLAN
1151 * tagging in pvid-tagging mode
1152 */
1153 if (be_pvid_tagging_enabled(adapter) &&
1154 veh->h_vlan_proto == htons(ETH_P_8021Q))
1155 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1156
1157 /* HW has a bug wherein it will calculate CSUM for VLAN
1158 * pkts even though it is disabled.
1159 * Manually insert VLAN in pkt.
1160 */
1161 if (skb->ip_summed != CHECKSUM_PARTIAL &&
1162 skb_vlan_tag_present(skb)) {
1163 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1164 if (unlikely(!skb))
1165 goto err;
1166 }
1167
1168 /* HW may lockup when VLAN HW tagging is requested on
1169 * certain ipv6 packets. Drop such pkts if the HW workaround to
1170 * skip HW tagging is not enabled by FW.
1171 */
1172 if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1173 (adapter->pvid || adapter->qnq_vid) &&
1174 !qnq_async_evt_rcvd(adapter)))
1175 goto tx_drop;
1176
1177 /* Manual VLAN tag insertion to prevent:
1178 * ASIC lockup when the ASIC inserts VLAN tag into
1179 * certain ipv6 packets. Insert VLAN tags in driver,
1180 * and set event, completion, vlan bits accordingly
1181 * in the Tx WRB.
1182 */
1183 if (be_ipv6_tx_stall_chk(adapter, skb) &&
1184 be_vlan_tag_tx_chk(adapter, skb)) {
1185 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1186 if (unlikely(!skb))
1187 goto err;
1188 }
1189
1190 return skb;
1191 tx_drop:
1192 dev_kfree_skb_any(skb);
1193 err:
1194 return NULL;
1195 }
1196
1197 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1198 struct sk_buff *skb,
1199 struct be_wrb_params *wrb_params)
1200 {
1201 int err;
1202
1203 /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1204 * packets that are 32b or less may cause a transmit stall
1205 * on that port. The workaround is to pad such packets
1206 * (len <= 32 bytes) to a minimum length of 36b.
1207 */
1208 if (skb->len <= 32) {
1209 if (skb_put_padto(skb, 36))
1210 return NULL;
1211 }
1212
1213 if (BEx_chip(adapter) || lancer_chip(adapter)) {
1214 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1215 if (!skb)
1216 return NULL;
1217 }
1218
1219 /* The stack can send us skbs with length greater than
1220 * what the HW can handle. Trim the extra bytes.
1221 */
1222 WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1223 err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1224 WARN_ON(err);
1225
1226 return skb;
1227 }
1228
1229 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1230 {
1231 struct be_queue_info *txq = &txo->q;
1232 struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1233
1234 /* Mark the last request eventable if it hasn't been marked already */
1235 if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1236 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1237
1238 /* compose a dummy wrb if there are odd set of wrbs to notify */
1239 if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1240 wrb_fill_dummy(queue_head_node(txq));
1241 queue_head_inc(txq);
1242 atomic_inc(&txq->used);
1243 txo->pend_wrb_cnt++;
1244 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1245 TX_HDR_WRB_NUM_SHIFT);
1246 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1247 TX_HDR_WRB_NUM_SHIFT);
1248 }
1249 be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1250 txo->pend_wrb_cnt = 0;
1251 }
1252
1253 /* OS2BMC related */
1254
1255 #define DHCP_CLIENT_PORT 68
1256 #define DHCP_SERVER_PORT 67
1257 #define NET_BIOS_PORT1 137
1258 #define NET_BIOS_PORT2 138
1259 #define DHCPV6_RAS_PORT 547
1260
1261 #define is_mc_allowed_on_bmc(adapter, eh) \
1262 (!is_multicast_filt_enabled(adapter) && \
1263 is_multicast_ether_addr(eh->h_dest) && \
1264 !is_broadcast_ether_addr(eh->h_dest))
1265
1266 #define is_bc_allowed_on_bmc(adapter, eh) \
1267 (!is_broadcast_filt_enabled(adapter) && \
1268 is_broadcast_ether_addr(eh->h_dest))
1269
1270 #define is_arp_allowed_on_bmc(adapter, skb) \
1271 (is_arp(skb) && is_arp_filt_enabled(adapter))
1272
1273 #define is_broadcast_packet(eh, adapter) \
1274 (is_multicast_ether_addr(eh->h_dest) && \
1275 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1276
1277 #define is_arp(skb) (skb->protocol == htons(ETH_P_ARP))
1278
1279 #define is_arp_filt_enabled(adapter) \
1280 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1281
1282 #define is_dhcp_client_filt_enabled(adapter) \
1283 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1284
1285 #define is_dhcp_srvr_filt_enabled(adapter) \
1286 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1287
1288 #define is_nbios_filt_enabled(adapter) \
1289 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1290
1291 #define is_ipv6_na_filt_enabled(adapter) \
1292 (adapter->bmc_filt_mask & \
1293 BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1294
1295 #define is_ipv6_ra_filt_enabled(adapter) \
1296 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1297
1298 #define is_ipv6_ras_filt_enabled(adapter) \
1299 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1300
1301 #define is_broadcast_filt_enabled(adapter) \
1302 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1303
1304 #define is_multicast_filt_enabled(adapter) \
1305 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1306
1307 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1308 struct sk_buff **skb)
1309 {
1310 struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1311 bool os2bmc = false;
1312
1313 if (!be_is_os2bmc_enabled(adapter))
1314 goto done;
1315
1316 if (!is_multicast_ether_addr(eh->h_dest))
1317 goto done;
1318
1319 if (is_mc_allowed_on_bmc(adapter, eh) ||
1320 is_bc_allowed_on_bmc(adapter, eh) ||
1321 is_arp_allowed_on_bmc(adapter, (*skb))) {
1322 os2bmc = true;
1323 goto done;
1324 }
1325
1326 if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1327 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1328 u8 nexthdr = hdr->nexthdr;
1329
1330 if (nexthdr == IPPROTO_ICMPV6) {
1331 struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1332
1333 switch (icmp6->icmp6_type) {
1334 case NDISC_ROUTER_ADVERTISEMENT:
1335 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1336 goto done;
1337 case NDISC_NEIGHBOUR_ADVERTISEMENT:
1338 os2bmc = is_ipv6_na_filt_enabled(adapter);
1339 goto done;
1340 default:
1341 break;
1342 }
1343 }
1344 }
1345
1346 if (is_udp_pkt((*skb))) {
1347 struct udphdr *udp = udp_hdr((*skb));
1348
1349 switch (ntohs(udp->dest)) {
1350 case DHCP_CLIENT_PORT:
1351 os2bmc = is_dhcp_client_filt_enabled(adapter);
1352 goto done;
1353 case DHCP_SERVER_PORT:
1354 os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1355 goto done;
1356 case NET_BIOS_PORT1:
1357 case NET_BIOS_PORT2:
1358 os2bmc = is_nbios_filt_enabled(adapter);
1359 goto done;
1360 case DHCPV6_RAS_PORT:
1361 os2bmc = is_ipv6_ras_filt_enabled(adapter);
1362 goto done;
1363 default:
1364 break;
1365 }
1366 }
1367 done:
1368 /* For packets over a vlan, which are destined
1369 * to BMC, asic expects the vlan to be inline in the packet.
1370 */
1371 if (os2bmc)
1372 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1373
1374 return os2bmc;
1375 }
1376
1377 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1378 {
1379 struct be_adapter *adapter = netdev_priv(netdev);
1380 u16 q_idx = skb_get_queue_mapping(skb);
1381 struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1382 struct be_wrb_params wrb_params = { 0 };
1383 bool flush = !skb->xmit_more;
1384 u16 wrb_cnt;
1385
1386 skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1387 if (unlikely(!skb))
1388 goto drop;
1389
1390 be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1391
1392 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393 if (unlikely(!wrb_cnt)) {
1394 dev_kfree_skb_any(skb);
1395 goto drop;
1396 }
1397
1398 /* if os2bmc is enabled and if the pkt is destined to bmc,
1399 * enqueue the pkt a 2nd time with mgmt bit set.
1400 */
1401 if (be_send_pkt_to_bmc(adapter, &skb)) {
1402 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1403 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1404 if (unlikely(!wrb_cnt))
1405 goto drop;
1406 else
1407 skb_get(skb);
1408 }
1409
1410 if (be_is_txq_full(txo)) {
1411 netif_stop_subqueue(netdev, q_idx);
1412 tx_stats(txo)->tx_stops++;
1413 }
1414
1415 if (flush || __netif_subqueue_stopped(netdev, q_idx))
1416 be_xmit_flush(adapter, txo);
1417
1418 return NETDEV_TX_OK;
1419 drop:
1420 tx_stats(txo)->tx_drv_drops++;
1421 /* Flush the already enqueued tx requests */
1422 if (flush && txo->pend_wrb_cnt)
1423 be_xmit_flush(adapter, txo);
1424
1425 return NETDEV_TX_OK;
1426 }
1427
1428 static void be_tx_timeout(struct net_device *netdev)
1429 {
1430 struct be_adapter *adapter = netdev_priv(netdev);
1431 struct device *dev = &adapter->pdev->dev;
1432 struct be_tx_obj *txo;
1433 struct sk_buff *skb;
1434 struct tcphdr *tcphdr;
1435 struct udphdr *udphdr;
1436 u32 *entry;
1437 int status;
1438 int i, j;
1439
1440 for_all_tx_queues(adapter, txo, i) {
1441 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1442 i, txo->q.head, txo->q.tail,
1443 atomic_read(&txo->q.used), txo->q.id);
1444
1445 entry = txo->q.dma_mem.va;
1446 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1447 if (entry[j] != 0 || entry[j + 1] != 0 ||
1448 entry[j + 2] != 0 || entry[j + 3] != 0) {
1449 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1450 j, entry[j], entry[j + 1],
1451 entry[j + 2], entry[j + 3]);
1452 }
1453 }
1454
1455 entry = txo->cq.dma_mem.va;
1456 dev_info(dev, "TXCQ Dump: %d H: %d T: %d used: %d\n",
1457 i, txo->cq.head, txo->cq.tail,
1458 atomic_read(&txo->cq.used));
1459 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1460 if (entry[j] != 0 || entry[j + 1] != 0 ||
1461 entry[j + 2] != 0 || entry[j + 3] != 0) {
1462 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1463 j, entry[j], entry[j + 1],
1464 entry[j + 2], entry[j + 3]);
1465 }
1466 }
1467
1468 for (j = 0; j < TX_Q_LEN; j++) {
1469 if (txo->sent_skb_list[j]) {
1470 skb = txo->sent_skb_list[j];
1471 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1472 tcphdr = tcp_hdr(skb);
1473 dev_info(dev, "TCP source port %d\n",
1474 ntohs(tcphdr->source));
1475 dev_info(dev, "TCP dest port %d\n",
1476 ntohs(tcphdr->dest));
1477 dev_info(dev, "TCP sequence num %d\n",
1478 ntohs(tcphdr->seq));
1479 dev_info(dev, "TCP ack_seq %d\n",
1480 ntohs(tcphdr->ack_seq));
1481 } else if (ip_hdr(skb)->protocol ==
1482 IPPROTO_UDP) {
1483 udphdr = udp_hdr(skb);
1484 dev_info(dev, "UDP source port %d\n",
1485 ntohs(udphdr->source));
1486 dev_info(dev, "UDP dest port %d\n",
1487 ntohs(udphdr->dest));
1488 }
1489 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1490 j, skb, skb->len, skb->protocol);
1491 }
1492 }
1493 }
1494
1495 if (lancer_chip(adapter)) {
1496 dev_info(dev, "Initiating reset due to tx timeout\n");
1497 dev_info(dev, "Resetting adapter\n");
1498 status = lancer_physdev_ctrl(adapter,
1499 PHYSDEV_CONTROL_FW_RESET_MASK);
1500 if (status)
1501 dev_err(dev, "Reset failed .. Reboot server\n");
1502 }
1503 }
1504
1505 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1506 {
1507 return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1508 BE_IF_FLAGS_ALL_PROMISCUOUS;
1509 }
1510
1511 static int be_set_vlan_promisc(struct be_adapter *adapter)
1512 {
1513 struct device *dev = &adapter->pdev->dev;
1514 int status;
1515
1516 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1517 return 0;
1518
1519 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1520 if (!status) {
1521 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1522 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1523 } else {
1524 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1525 }
1526 return status;
1527 }
1528
1529 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1530 {
1531 struct device *dev = &adapter->pdev->dev;
1532 int status;
1533
1534 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1535 if (!status) {
1536 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1537 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1538 }
1539 return status;
1540 }
1541
1542 /*
1543 * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1544 * If the user configures more, place BE in vlan promiscuous mode.
1545 */
1546 static int be_vid_config(struct be_adapter *adapter)
1547 {
1548 struct device *dev = &adapter->pdev->dev;
1549 u16 vids[BE_NUM_VLANS_SUPPORTED];
1550 u16 num = 0, i = 0;
1551 int status = 0;
1552
1553 /* No need to change the VLAN state if the I/F is in promiscuous */
1554 if (adapter->netdev->flags & IFF_PROMISC)
1555 return 0;
1556
1557 if (adapter->vlans_added > be_max_vlans(adapter))
1558 return be_set_vlan_promisc(adapter);
1559
1560 if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1561 status = be_clear_vlan_promisc(adapter);
1562 if (status)
1563 return status;
1564 }
1565 /* Construct VLAN Table to give to HW */
1566 for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1567 vids[num++] = cpu_to_le16(i);
1568
1569 status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1570 if (status) {
1571 dev_err(dev, "Setting HW VLAN filtering failed\n");
1572 /* Set to VLAN promisc mode as setting VLAN filter failed */
1573 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1574 addl_status(status) ==
1575 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1576 return be_set_vlan_promisc(adapter);
1577 }
1578 return status;
1579 }
1580
1581 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1582 {
1583 struct be_adapter *adapter = netdev_priv(netdev);
1584 int status = 0;
1585
1586 mutex_lock(&adapter->rx_filter_lock);
1587
1588 /* Packets with VID 0 are always received by Lancer by default */
1589 if (lancer_chip(adapter) && vid == 0)
1590 goto done;
1591
1592 if (test_bit(vid, adapter->vids))
1593 goto done;
1594
1595 set_bit(vid, adapter->vids);
1596 adapter->vlans_added++;
1597
1598 status = be_vid_config(adapter);
1599 done:
1600 mutex_unlock(&adapter->rx_filter_lock);
1601 return status;
1602 }
1603
1604 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1605 {
1606 struct be_adapter *adapter = netdev_priv(netdev);
1607 int status = 0;
1608
1609 mutex_lock(&adapter->rx_filter_lock);
1610
1611 /* Packets with VID 0 are always received by Lancer by default */
1612 if (lancer_chip(adapter) && vid == 0)
1613 goto done;
1614
1615 if (!test_bit(vid, adapter->vids))
1616 goto done;
1617
1618 clear_bit(vid, adapter->vids);
1619 adapter->vlans_added--;
1620
1621 status = be_vid_config(adapter);
1622 done:
1623 mutex_unlock(&adapter->rx_filter_lock);
1624 return status;
1625 }
1626
1627 static void be_set_all_promisc(struct be_adapter *adapter)
1628 {
1629 be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1630 adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1631 }
1632
1633 static void be_set_mc_promisc(struct be_adapter *adapter)
1634 {
1635 int status;
1636
1637 if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1638 return;
1639
1640 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1641 if (!status)
1642 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1643 }
1644
1645 static void be_set_uc_promisc(struct be_adapter *adapter)
1646 {
1647 int status;
1648
1649 if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1650 return;
1651
1652 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1653 if (!status)
1654 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1655 }
1656
1657 static void be_clear_uc_promisc(struct be_adapter *adapter)
1658 {
1659 int status;
1660
1661 if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1662 return;
1663
1664 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1665 if (!status)
1666 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1667 }
1668
1669 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1670 * We use a single callback function for both sync and unsync. We really don't
1671 * add/remove addresses through this callback. But, we use it to detect changes
1672 * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1673 */
1674 static int be_uc_list_update(struct net_device *netdev,
1675 const unsigned char *addr)
1676 {
1677 struct be_adapter *adapter = netdev_priv(netdev);
1678
1679 adapter->update_uc_list = true;
1680 return 0;
1681 }
1682
1683 static int be_mc_list_update(struct net_device *netdev,
1684 const unsigned char *addr)
1685 {
1686 struct be_adapter *adapter = netdev_priv(netdev);
1687
1688 adapter->update_mc_list = true;
1689 return 0;
1690 }
1691
1692 static void be_set_mc_list(struct be_adapter *adapter)
1693 {
1694 struct net_device *netdev = adapter->netdev;
1695 struct netdev_hw_addr *ha;
1696 bool mc_promisc = false;
1697 int status;
1698
1699 netif_addr_lock_bh(netdev);
1700 __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1701
1702 if (netdev->flags & IFF_PROMISC) {
1703 adapter->update_mc_list = false;
1704 } else if (netdev->flags & IFF_ALLMULTI ||
1705 netdev_mc_count(netdev) > be_max_mc(adapter)) {
1706 /* Enable multicast promisc if num configured exceeds
1707 * what we support
1708 */
1709 mc_promisc = true;
1710 adapter->update_mc_list = false;
1711 } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1712 /* Update mc-list unconditionally if the iface was previously
1713 * in mc-promisc mode and now is out of that mode.
1714 */
1715 adapter->update_mc_list = true;
1716 }
1717
1718 if (adapter->update_mc_list) {
1719 int i = 0;
1720
1721 /* cache the mc-list in adapter */
1722 netdev_for_each_mc_addr(ha, netdev) {
1723 ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1724 i++;
1725 }
1726 adapter->mc_count = netdev_mc_count(netdev);
1727 }
1728 netif_addr_unlock_bh(netdev);
1729
1730 if (mc_promisc) {
1731 be_set_mc_promisc(adapter);
1732 } else if (adapter->update_mc_list) {
1733 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1734 if (!status)
1735 adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1736 else
1737 be_set_mc_promisc(adapter);
1738
1739 adapter->update_mc_list = false;
1740 }
1741 }
1742
1743 static void be_clear_mc_list(struct be_adapter *adapter)
1744 {
1745 struct net_device *netdev = adapter->netdev;
1746
1747 __dev_mc_unsync(netdev, NULL);
1748 be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1749 adapter->mc_count = 0;
1750 }
1751
1752 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1753 {
1754 if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1755 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1756 return 0;
1757 }
1758
1759 return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1760 adapter->if_handle,
1761 &adapter->pmac_id[uc_idx + 1], 0);
1762 }
1763
1764 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1765 {
1766 if (pmac_id == adapter->pmac_id[0])
1767 return;
1768
1769 be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1770 }
1771
1772 static void be_set_uc_list(struct be_adapter *adapter)
1773 {
1774 struct net_device *netdev = adapter->netdev;
1775 struct netdev_hw_addr *ha;
1776 bool uc_promisc = false;
1777 int curr_uc_macs = 0, i;
1778
1779 netif_addr_lock_bh(netdev);
1780 __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1781
1782 if (netdev->flags & IFF_PROMISC) {
1783 adapter->update_uc_list = false;
1784 } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1785 uc_promisc = true;
1786 adapter->update_uc_list = false;
1787 } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1788 /* Update uc-list unconditionally if the iface was previously
1789 * in uc-promisc mode and now is out of that mode.
1790 */
1791 adapter->update_uc_list = true;
1792 }
1793
1794 if (adapter->update_uc_list) {
1795 /* cache the uc-list in adapter array */
1796 i = 0;
1797 netdev_for_each_uc_addr(ha, netdev) {
1798 ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1799 i++;
1800 }
1801 curr_uc_macs = netdev_uc_count(netdev);
1802 }
1803 netif_addr_unlock_bh(netdev);
1804
1805 if (uc_promisc) {
1806 be_set_uc_promisc(adapter);
1807 } else if (adapter->update_uc_list) {
1808 be_clear_uc_promisc(adapter);
1809
1810 for (i = 0; i < adapter->uc_macs; i++)
1811 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1812
1813 for (i = 0; i < curr_uc_macs; i++)
1814 be_uc_mac_add(adapter, i);
1815 adapter->uc_macs = curr_uc_macs;
1816 adapter->update_uc_list = false;
1817 }
1818 }
1819
1820 static void be_clear_uc_list(struct be_adapter *adapter)
1821 {
1822 struct net_device *netdev = adapter->netdev;
1823 int i;
1824
1825 __dev_uc_unsync(netdev, NULL);
1826 for (i = 0; i < adapter->uc_macs; i++)
1827 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1828
1829 adapter->uc_macs = 0;
1830 }
1831
1832 static void __be_set_rx_mode(struct be_adapter *adapter)
1833 {
1834 struct net_device *netdev = adapter->netdev;
1835
1836 mutex_lock(&adapter->rx_filter_lock);
1837
1838 if (netdev->flags & IFF_PROMISC) {
1839 if (!be_in_all_promisc(adapter))
1840 be_set_all_promisc(adapter);
1841 } else if (be_in_all_promisc(adapter)) {
1842 /* We need to re-program the vlan-list or clear
1843 * vlan-promisc mode (if needed) when the interface
1844 * comes out of promisc mode.
1845 */
1846 be_vid_config(adapter);
1847 }
1848
1849 be_set_uc_list(adapter);
1850 be_set_mc_list(adapter);
1851
1852 mutex_unlock(&adapter->rx_filter_lock);
1853 }
1854
1855 static void be_work_set_rx_mode(struct work_struct *work)
1856 {
1857 struct be_cmd_work *cmd_work =
1858 container_of(work, struct be_cmd_work, work);
1859
1860 __be_set_rx_mode(cmd_work->adapter);
1861 kfree(cmd_work);
1862 }
1863
1864 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1865 {
1866 struct be_adapter *adapter = netdev_priv(netdev);
1867 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1868 int status;
1869
1870 if (!sriov_enabled(adapter))
1871 return -EPERM;
1872
1873 if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1874 return -EINVAL;
1875
1876 /* Proceed further only if user provided MAC is different
1877 * from active MAC
1878 */
1879 if (ether_addr_equal(mac, vf_cfg->mac_addr))
1880 return 0;
1881
1882 if (BEx_chip(adapter)) {
1883 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1884 vf + 1);
1885
1886 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1887 &vf_cfg->pmac_id, vf + 1);
1888 } else {
1889 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1890 vf + 1);
1891 }
1892
1893 if (status) {
1894 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1895 mac, vf, status);
1896 return be_cmd_status(status);
1897 }
1898
1899 ether_addr_copy(vf_cfg->mac_addr, mac);
1900
1901 return 0;
1902 }
1903
1904 static int be_get_vf_config(struct net_device *netdev, int vf,
1905 struct ifla_vf_info *vi)
1906 {
1907 struct be_adapter *adapter = netdev_priv(netdev);
1908 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1909
1910 if (!sriov_enabled(adapter))
1911 return -EPERM;
1912
1913 if (vf >= adapter->num_vfs)
1914 return -EINVAL;
1915
1916 vi->vf = vf;
1917 vi->max_tx_rate = vf_cfg->tx_rate;
1918 vi->min_tx_rate = 0;
1919 vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1920 vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1921 memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1922 vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1923 vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1924
1925 return 0;
1926 }
1927
1928 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1929 {
1930 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1931 u16 vids[BE_NUM_VLANS_SUPPORTED];
1932 int vf_if_id = vf_cfg->if_handle;
1933 int status;
1934
1935 /* Enable Transparent VLAN Tagging */
1936 status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1937 if (status)
1938 return status;
1939
1940 /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1941 vids[0] = 0;
1942 status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1943 if (!status)
1944 dev_info(&adapter->pdev->dev,
1945 "Cleared guest VLANs on VF%d", vf);
1946
1947 /* After TVT is enabled, disallow VFs to program VLAN filters */
1948 if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1949 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1950 ~BE_PRIV_FILTMGMT, vf + 1);
1951 if (!status)
1952 vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1953 }
1954 return 0;
1955 }
1956
1957 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1958 {
1959 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1960 struct device *dev = &adapter->pdev->dev;
1961 int status;
1962
1963 /* Reset Transparent VLAN Tagging. */
1964 status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1965 vf_cfg->if_handle, 0, 0);
1966 if (status)
1967 return status;
1968
1969 /* Allow VFs to program VLAN filtering */
1970 if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1971 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1972 BE_PRIV_FILTMGMT, vf + 1);
1973 if (!status) {
1974 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1975 dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1976 }
1977 }
1978
1979 dev_info(dev,
1980 "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1981 return 0;
1982 }
1983
1984 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1985 __be16 vlan_proto)
1986 {
1987 struct be_adapter *adapter = netdev_priv(netdev);
1988 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1989 int status;
1990
1991 if (!sriov_enabled(adapter))
1992 return -EPERM;
1993
1994 if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1995 return -EINVAL;
1996
1997 if (vlan_proto != htons(ETH_P_8021Q))
1998 return -EPROTONOSUPPORT;
1999
2000 if (vlan || qos) {
2001 vlan |= qos << VLAN_PRIO_SHIFT;
2002 status = be_set_vf_tvt(adapter, vf, vlan);
2003 } else {
2004 status = be_clear_vf_tvt(adapter, vf);
2005 }
2006
2007 if (status) {
2008 dev_err(&adapter->pdev->dev,
2009 "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2010 status);
2011 return be_cmd_status(status);
2012 }
2013
2014 vf_cfg->vlan_tag = vlan;
2015 return 0;
2016 }
2017
2018 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2019 int min_tx_rate, int max_tx_rate)
2020 {
2021 struct be_adapter *adapter = netdev_priv(netdev);
2022 struct device *dev = &adapter->pdev->dev;
2023 int percent_rate, status = 0;
2024 u16 link_speed = 0;
2025 u8 link_status;
2026
2027 if (!sriov_enabled(adapter))
2028 return -EPERM;
2029
2030 if (vf >= adapter->num_vfs)
2031 return -EINVAL;
2032
2033 if (min_tx_rate)
2034 return -EINVAL;
2035
2036 if (!max_tx_rate)
2037 goto config_qos;
2038
2039 status = be_cmd_link_status_query(adapter, &link_speed,
2040 &link_status, 0);
2041 if (status)
2042 goto err;
2043
2044 if (!link_status) {
2045 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2046 status = -ENETDOWN;
2047 goto err;
2048 }
2049
2050 if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2051 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2052 link_speed);
2053 status = -EINVAL;
2054 goto err;
2055 }
2056
2057 /* On Skyhawk the QOS setting must be done only as a % value */
2058 percent_rate = link_speed / 100;
2059 if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2060 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2061 percent_rate);
2062 status = -EINVAL;
2063 goto err;
2064 }
2065
2066 config_qos:
2067 status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2068 if (status)
2069 goto err;
2070
2071 adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2072 return 0;
2073
2074 err:
2075 dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2076 max_tx_rate, vf);
2077 return be_cmd_status(status);
2078 }
2079
2080 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2081 int link_state)
2082 {
2083 struct be_adapter *adapter = netdev_priv(netdev);
2084 int status;
2085
2086 if (!sriov_enabled(adapter))
2087 return -EPERM;
2088
2089 if (vf >= adapter->num_vfs)
2090 return -EINVAL;
2091
2092 status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2093 if (status) {
2094 dev_err(&adapter->pdev->dev,
2095 "Link state change on VF %d failed: %#x\n", vf, status);
2096 return be_cmd_status(status);
2097 }
2098
2099 adapter->vf_cfg[vf].plink_tracking = link_state;
2100
2101 return 0;
2102 }
2103
2104 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2105 {
2106 struct be_adapter *adapter = netdev_priv(netdev);
2107 struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2108 u8 spoofchk;
2109 int status;
2110
2111 if (!sriov_enabled(adapter))
2112 return -EPERM;
2113
2114 if (vf >= adapter->num_vfs)
2115 return -EINVAL;
2116
2117 if (BEx_chip(adapter))
2118 return -EOPNOTSUPP;
2119
2120 if (enable == vf_cfg->spoofchk)
2121 return 0;
2122
2123 spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2124
2125 status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2126 0, spoofchk);
2127 if (status) {
2128 dev_err(&adapter->pdev->dev,
2129 "Spoofchk change on VF %d failed: %#x\n", vf, status);
2130 return be_cmd_status(status);
2131 }
2132
2133 vf_cfg->spoofchk = enable;
2134 return 0;
2135 }
2136
2137 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2138 ulong now)
2139 {
2140 aic->rx_pkts_prev = rx_pkts;
2141 aic->tx_reqs_prev = tx_pkts;
2142 aic->jiffies = now;
2143 }
2144
2145 static int be_get_new_eqd(struct be_eq_obj *eqo)
2146 {
2147 struct be_adapter *adapter = eqo->adapter;
2148 int eqd, start;
2149 struct be_aic_obj *aic;
2150 struct be_rx_obj *rxo;
2151 struct be_tx_obj *txo;
2152 u64 rx_pkts = 0, tx_pkts = 0;
2153 ulong now;
2154 u32 pps, delta;
2155 int i;
2156
2157 aic = &adapter->aic_obj[eqo->idx];
2158 if (!aic->enable) {
2159 if (aic->jiffies)
2160 aic->jiffies = 0;
2161 eqd = aic->et_eqd;
2162 return eqd;
2163 }
2164
2165 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2166 do {
2167 start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2168 rx_pkts += rxo->stats.rx_pkts;
2169 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2170 }
2171
2172 for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2173 do {
2174 start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2175 tx_pkts += txo->stats.tx_reqs;
2176 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2177 }
2178
2179 /* Skip, if wrapped around or first calculation */
2180 now = jiffies;
2181 if (!aic->jiffies || time_before(now, aic->jiffies) ||
2182 rx_pkts < aic->rx_pkts_prev ||
2183 tx_pkts < aic->tx_reqs_prev) {
2184 be_aic_update(aic, rx_pkts, tx_pkts, now);
2185 return aic->prev_eqd;
2186 }
2187
2188 delta = jiffies_to_msecs(now - aic->jiffies);
2189 if (delta == 0)
2190 return aic->prev_eqd;
2191
2192 pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2193 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2194 eqd = (pps / 15000) << 2;
2195
2196 if (eqd < 8)
2197 eqd = 0;
2198 eqd = min_t(u32, eqd, aic->max_eqd);
2199 eqd = max_t(u32, eqd, aic->min_eqd);
2200
2201 be_aic_update(aic, rx_pkts, tx_pkts, now);
2202
2203 return eqd;
2204 }
2205
2206 /* For Skyhawk-R only */
2207 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2208 {
2209 struct be_adapter *adapter = eqo->adapter;
2210 struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2211 ulong now = jiffies;
2212 int eqd;
2213 u32 mult_enc;
2214
2215 if (!aic->enable)
2216 return 0;
2217
2218 if (jiffies_to_msecs(now - aic->jiffies) < 1)
2219 eqd = aic->prev_eqd;
2220 else
2221 eqd = be_get_new_eqd(eqo);
2222
2223 if (eqd > 100)
2224 mult_enc = R2I_DLY_ENC_1;
2225 else if (eqd > 60)
2226 mult_enc = R2I_DLY_ENC_2;
2227 else if (eqd > 20)
2228 mult_enc = R2I_DLY_ENC_3;
2229 else
2230 mult_enc = R2I_DLY_ENC_0;
2231
2232 aic->prev_eqd = eqd;
2233
2234 return mult_enc;
2235 }
2236
2237 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2238 {
2239 struct be_set_eqd set_eqd[MAX_EVT_QS];
2240 struct be_aic_obj *aic;
2241 struct be_eq_obj *eqo;
2242 int i, num = 0, eqd;
2243
2244 for_all_evt_queues(adapter, eqo, i) {
2245 aic = &adapter->aic_obj[eqo->idx];
2246 eqd = be_get_new_eqd(eqo);
2247 if (force_update || eqd != aic->prev_eqd) {
2248 set_eqd[num].delay_multiplier = (eqd * 65)/100;
2249 set_eqd[num].eq_id = eqo->q.id;
2250 aic->prev_eqd = eqd;
2251 num++;
2252 }
2253 }
2254
2255 if (num)
2256 be_cmd_modify_eqd(adapter, set_eqd, num);
2257 }
2258
2259 static void be_rx_stats_update(struct be_rx_obj *rxo,
2260 struct be_rx_compl_info *rxcp)
2261 {
2262 struct be_rx_stats *stats = rx_stats(rxo);
2263
2264 u64_stats_update_begin(&stats->sync);
2265 stats->rx_compl++;
2266 stats->rx_bytes += rxcp->pkt_size;
2267 stats->rx_pkts++;
2268 if (rxcp->tunneled)
2269 stats->rx_vxlan_offload_pkts++;
2270 if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2271 stats->rx_mcast_pkts++;
2272 if (rxcp->err)
2273 stats->rx_compl_err++;
2274 u64_stats_update_end(&stats->sync);
2275 }
2276
2277 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2278 {
2279 /* L4 checksum is not reliable for non TCP/UDP packets.
2280 * Also ignore ipcksm for ipv6 pkts
2281 */
2282 return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2283 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2284 }
2285
2286 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2287 {
2288 struct be_adapter *adapter = rxo->adapter;
2289 struct be_rx_page_info *rx_page_info;
2290 struct be_queue_info *rxq = &rxo->q;
2291 u32 frag_idx = rxq->tail;
2292
2293 rx_page_info = &rxo->page_info_tbl[frag_idx];
2294 BUG_ON(!rx_page_info->page);
2295
2296 if (rx_page_info->last_frag) {
2297 dma_unmap_page(&adapter->pdev->dev,
2298 dma_unmap_addr(rx_page_info, bus),
2299 adapter->big_page_size, DMA_FROM_DEVICE);
2300 rx_page_info->last_frag = false;
2301 } else {
2302 dma_sync_single_for_cpu(&adapter->pdev->dev,
2303 dma_unmap_addr(rx_page_info, bus),
2304 rx_frag_size, DMA_FROM_DEVICE);
2305 }
2306
2307 queue_tail_inc(rxq);
2308 atomic_dec(&rxq->used);
2309 return rx_page_info;
2310 }
2311
2312 /* Throwaway the data in the Rx completion */
2313 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2314 struct be_rx_compl_info *rxcp)
2315 {
2316 struct be_rx_page_info *page_info;
2317 u16 i, num_rcvd = rxcp->num_rcvd;
2318
2319 for (i = 0; i < num_rcvd; i++) {
2320 page_info = get_rx_page_info(rxo);
2321 put_page(page_info->page);
2322 memset(page_info, 0, sizeof(*page_info));
2323 }
2324 }
2325
2326 /*
2327 * skb_fill_rx_data forms a complete skb for an ether frame
2328 * indicated by rxcp.
2329 */
2330 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2331 struct be_rx_compl_info *rxcp)
2332 {
2333 struct be_rx_page_info *page_info;
2334 u16 i, j;
2335 u16 hdr_len, curr_frag_len, remaining;
2336 u8 *start;
2337
2338 page_info = get_rx_page_info(rxo);
2339 start = page_address(page_info->page) + page_info->page_offset;
2340 prefetch(start);
2341
2342 /* Copy data in the first descriptor of this completion */
2343 curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2344
2345 skb->len = curr_frag_len;
2346 if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2347 memcpy(skb->data, start, curr_frag_len);
2348 /* Complete packet has now been moved to data */
2349 put_page(page_info->page);
2350 skb->data_len = 0;
2351 skb->tail += curr_frag_len;
2352 } else {
2353 hdr_len = ETH_HLEN;
2354 memcpy(skb->data, start, hdr_len);
2355 skb_shinfo(skb)->nr_frags = 1;
2356 skb_frag_set_page(skb, 0, page_info->page);
2357 skb_shinfo(skb)->frags[0].page_offset =
2358 page_info->page_offset + hdr_len;
2359 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2360 curr_frag_len - hdr_len);
2361 skb->data_len = curr_frag_len - hdr_len;
2362 skb->truesize += rx_frag_size;
2363 skb->tail += hdr_len;
2364 }
2365 page_info->page = NULL;
2366
2367 if (rxcp->pkt_size <= rx_frag_size) {
2368 BUG_ON(rxcp->num_rcvd != 1);
2369 return;
2370 }
2371
2372 /* More frags present for this completion */
2373 remaining = rxcp->pkt_size - curr_frag_len;
2374 for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2375 page_info = get_rx_page_info(rxo);
2376 curr_frag_len = min(remaining, rx_frag_size);
2377
2378 /* Coalesce all frags from the same physical page in one slot */
2379 if (page_info->page_offset == 0) {
2380 /* Fresh page */
2381 j++;
2382 skb_frag_set_page(skb, j, page_info->page);
2383 skb_shinfo(skb)->frags[j].page_offset =
2384 page_info->page_offset;
2385 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2386 skb_shinfo(skb)->nr_frags++;
2387 } else {
2388 put_page(page_info->page);
2389 }
2390
2391 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2392 skb->len += curr_frag_len;
2393 skb->data_len += curr_frag_len;
2394 skb->truesize += rx_frag_size;
2395 remaining -= curr_frag_len;
2396 page_info->page = NULL;
2397 }
2398 BUG_ON(j > MAX_SKB_FRAGS);
2399 }
2400
2401 /* Process the RX completion indicated by rxcp when GRO is disabled */
2402 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2403 struct be_rx_compl_info *rxcp)
2404 {
2405 struct be_adapter *adapter = rxo->adapter;
2406 struct net_device *netdev = adapter->netdev;
2407 struct sk_buff *skb;
2408
2409 skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2410 if (unlikely(!skb)) {
2411 rx_stats(rxo)->rx_drops_no_skbs++;
2412 be_rx_compl_discard(rxo, rxcp);
2413 return;
2414 }
2415
2416 skb_fill_rx_data(rxo, skb, rxcp);
2417
2418 if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2419 skb->ip_summed = CHECKSUM_UNNECESSARY;
2420 else
2421 skb_checksum_none_assert(skb);
2422
2423 skb->protocol = eth_type_trans(skb, netdev);
2424 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2425 if (netdev->features & NETIF_F_RXHASH)
2426 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2427
2428 skb->csum_level = rxcp->tunneled;
2429 skb_mark_napi_id(skb, napi);
2430
2431 if (rxcp->vlanf)
2432 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2433
2434 netif_receive_skb(skb);
2435 }
2436
2437 /* Process the RX completion indicated by rxcp when GRO is enabled */
2438 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2439 struct napi_struct *napi,
2440 struct be_rx_compl_info *rxcp)
2441 {
2442 struct be_adapter *adapter = rxo->adapter;
2443 struct be_rx_page_info *page_info;
2444 struct sk_buff *skb = NULL;
2445 u16 remaining, curr_frag_len;
2446 u16 i, j;
2447
2448 skb = napi_get_frags(napi);
2449 if (!skb) {
2450 be_rx_compl_discard(rxo, rxcp);
2451 return;
2452 }
2453
2454 remaining = rxcp->pkt_size;
2455 for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2456 page_info = get_rx_page_info(rxo);
2457
2458 curr_frag_len = min(remaining, rx_frag_size);
2459
2460 /* Coalesce all frags from the same physical page in one slot */
2461 if (i == 0 || page_info->page_offset == 0) {
2462 /* First frag or Fresh page */
2463 j++;
2464 skb_frag_set_page(skb, j, page_info->page);
2465 skb_shinfo(skb)->frags[j].page_offset =
2466 page_info->page_offset;
2467 skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2468 } else {
2469 put_page(page_info->page);
2470 }
2471 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2472 skb->truesize += rx_frag_size;
2473 remaining -= curr_frag_len;
2474 memset(page_info, 0, sizeof(*page_info));
2475 }
2476 BUG_ON(j > MAX_SKB_FRAGS);
2477
2478 skb_shinfo(skb)->nr_frags = j + 1;
2479 skb->len = rxcp->pkt_size;
2480 skb->data_len = rxcp->pkt_size;
2481 skb->ip_summed = CHECKSUM_UNNECESSARY;
2482 skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2483 if (adapter->netdev->features & NETIF_F_RXHASH)
2484 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2485
2486 skb->csum_level = rxcp->tunneled;
2487
2488 if (rxcp->vlanf)
2489 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2490
2491 napi_gro_frags(napi);
2492 }
2493
2494 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2495 struct be_rx_compl_info *rxcp)
2496 {
2497 rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2498 rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2499 rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2500 rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2501 rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2502 rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2503 rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2504 rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2505 rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2506 rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2507 rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2508 if (rxcp->vlanf) {
2509 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2510 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2511 }
2512 rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2513 rxcp->tunneled =
2514 GET_RX_COMPL_V1_BITS(tunneled, compl);
2515 }
2516
2517 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2518 struct be_rx_compl_info *rxcp)
2519 {
2520 rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2521 rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2522 rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2523 rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2524 rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2525 rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2526 rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2527 rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2528 rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2529 rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2530 rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2531 if (rxcp->vlanf) {
2532 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2533 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2534 }
2535 rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2536 rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2537 }
2538
2539 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2540 {
2541 struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2542 struct be_rx_compl_info *rxcp = &rxo->rxcp;
2543 struct be_adapter *adapter = rxo->adapter;
2544
2545 /* For checking the valid bit it is Ok to use either definition as the
2546 * valid bit is at the same position in both v0 and v1 Rx compl */
2547 if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2548 return NULL;
2549
2550 rmb();
2551 be_dws_le_to_cpu(compl, sizeof(*compl));
2552
2553 if (adapter->be3_native)
2554 be_parse_rx_compl_v1(compl, rxcp);
2555 else
2556 be_parse_rx_compl_v0(compl, rxcp);
2557
2558 if (rxcp->ip_frag)
2559 rxcp->l4_csum = 0;
2560
2561 if (rxcp->vlanf) {
2562 /* In QNQ modes, if qnq bit is not set, then the packet was
2563 * tagged only with the transparent outer vlan-tag and must
2564 * not be treated as a vlan packet by host
2565 */
2566 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2567 rxcp->vlanf = 0;
2568
2569 if (!lancer_chip(adapter))
2570 rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2571
2572 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2573 !test_bit(rxcp->vlan_tag, adapter->vids))
2574 rxcp->vlanf = 0;
2575 }
2576
2577 /* As the compl has been parsed, reset it; we wont touch it again */
2578 compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2579
2580 queue_tail_inc(&rxo->cq);
2581 return rxcp;
2582 }
2583
2584 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2585 {
2586 u32 order = get_order(size);
2587
2588 if (order > 0)
2589 gfp |= __GFP_COMP;
2590 return alloc_pages(gfp, order);
2591 }
2592
2593 /*
2594 * Allocate a page, split it to fragments of size rx_frag_size and post as
2595 * receive buffers to BE
2596 */
2597 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2598 {
2599 struct be_adapter *adapter = rxo->adapter;
2600 struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2601 struct be_queue_info *rxq = &rxo->q;
2602 struct page *pagep = NULL;
2603 struct device *dev = &adapter->pdev->dev;
2604 struct be_eth_rx_d *rxd;
2605 u64 page_dmaaddr = 0, frag_dmaaddr;
2606 u32 posted, page_offset = 0, notify = 0;
2607
2608 page_info = &rxo->page_info_tbl[rxq->head];
2609 for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2610 if (!pagep) {
2611 pagep = be_alloc_pages(adapter->big_page_size, gfp);
2612 if (unlikely(!pagep)) {
2613 rx_stats(rxo)->rx_post_fail++;
2614 break;
2615 }
2616 page_dmaaddr = dma_map_page(dev, pagep, 0,
2617 adapter->big_page_size,
2618 DMA_FROM_DEVICE);
2619 if (dma_mapping_error(dev, page_dmaaddr)) {
2620 put_page(pagep);
2621 pagep = NULL;
2622 adapter->drv_stats.dma_map_errors++;
2623 break;
2624 }
2625 page_offset = 0;
2626 } else {
2627 get_page(pagep);
2628 page_offset += rx_frag_size;
2629 }
2630 page_info->page_offset = page_offset;
2631 page_info->page = pagep;
2632
2633 rxd = queue_head_node(rxq);
2634 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2635 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2636 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2637
2638 /* Any space left in the current big page for another frag? */
2639 if ((page_offset + rx_frag_size + rx_frag_size) >
2640 adapter->big_page_size) {
2641 pagep = NULL;
2642 page_info->last_frag = true;
2643 dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2644 } else {
2645 dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2646 }
2647
2648 prev_page_info = page_info;
2649 queue_head_inc(rxq);
2650 page_info = &rxo->page_info_tbl[rxq->head];
2651 }
2652
2653 /* Mark the last frag of a page when we break out of the above loop
2654 * with no more slots available in the RXQ
2655 */
2656 if (pagep) {
2657 prev_page_info->last_frag = true;
2658 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2659 }
2660
2661 if (posted) {
2662 atomic_add(posted, &rxq->used);
2663 if (rxo->rx_post_starved)
2664 rxo->rx_post_starved = false;
2665 do {
2666 notify = min(MAX_NUM_POST_ERX_DB, posted);
2667 be_rxq_notify(adapter, rxq->id, notify);
2668 posted -= notify;
2669 } while (posted);
2670 } else if (atomic_read(&rxq->used) == 0) {
2671 /* Let be_worker replenish when memory is available */
2672 rxo->rx_post_starved = true;
2673 }
2674 }
2675
2676 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2677 {
2678 switch (status) {
2679 case BE_TX_COMP_HDR_PARSE_ERR:
2680 tx_stats(txo)->tx_hdr_parse_err++;
2681 break;
2682 case BE_TX_COMP_NDMA_ERR:
2683 tx_stats(txo)->tx_dma_err++;
2684 break;
2685 case BE_TX_COMP_ACL_ERR:
2686 tx_stats(txo)->tx_spoof_check_err++;
2687 break;
2688 }
2689 }
2690
2691 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2692 {
2693 switch (status) {
2694 case LANCER_TX_COMP_LSO_ERR:
2695 tx_stats(txo)->tx_tso_err++;
2696 break;
2697 case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2698 case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2699 tx_stats(txo)->tx_spoof_check_err++;
2700 break;
2701 case LANCER_TX_COMP_QINQ_ERR:
2702 tx_stats(txo)->tx_qinq_err++;
2703 break;
2704 case LANCER_TX_COMP_PARITY_ERR:
2705 tx_stats(txo)->tx_internal_parity_err++;
2706 break;
2707 case LANCER_TX_COMP_DMA_ERR:
2708 tx_stats(txo)->tx_dma_err++;
2709 break;
2710 case LANCER_TX_COMP_SGE_ERR:
2711 tx_stats(txo)->tx_sge_err++;
2712 break;
2713 }
2714 }
2715
2716 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2717 struct be_tx_obj *txo)
2718 {
2719 struct be_queue_info *tx_cq = &txo->cq;
2720 struct be_tx_compl_info *txcp = &txo->txcp;
2721 struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2722
2723 if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2724 return NULL;
2725
2726 /* Ensure load ordering of valid bit dword and other dwords below */
2727 rmb();
2728 be_dws_le_to_cpu(compl, sizeof(*compl));
2729
2730 txcp->status = GET_TX_COMPL_BITS(status, compl);
2731 txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2732
2733 if (txcp->status) {
2734 if (lancer_chip(adapter)) {
2735 lancer_update_tx_err(txo, txcp->status);
2736 /* Reset the adapter incase of TSO,
2737 * SGE or Parity error
2738 */
2739 if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2740 txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2741 txcp->status == LANCER_TX_COMP_SGE_ERR)
2742 be_set_error(adapter, BE_ERROR_TX);
2743 } else {
2744 be_update_tx_err(txo, txcp->status);
2745 }
2746 }
2747
2748 if (be_check_error(adapter, BE_ERROR_TX))
2749 return NULL;
2750
2751 compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2752 queue_tail_inc(tx_cq);
2753 return txcp;
2754 }
2755
2756 static u16 be_tx_compl_process(struct be_adapter *adapter,
2757 struct be_tx_obj *txo, u16 last_index)
2758 {
2759 struct sk_buff **sent_skbs = txo->sent_skb_list;
2760 struct be_queue_info *txq = &txo->q;
2761 struct sk_buff *skb = NULL;
2762 bool unmap_skb_hdr = false;
2763 struct be_eth_wrb *wrb;
2764 u16 num_wrbs = 0;
2765 u32 frag_index;
2766
2767 do {
2768 if (sent_skbs[txq->tail]) {
2769 /* Free skb from prev req */
2770 if (skb)
2771 dev_consume_skb_any(skb);
2772 skb = sent_skbs[txq->tail];
2773 sent_skbs[txq->tail] = NULL;
2774 queue_tail_inc(txq); /* skip hdr wrb */
2775 num_wrbs++;
2776 unmap_skb_hdr = true;
2777 }
2778 wrb = queue_tail_node(txq);
2779 frag_index = txq->tail;
2780 unmap_tx_frag(&adapter->pdev->dev, wrb,
2781 (unmap_skb_hdr && skb_headlen(skb)));
2782 unmap_skb_hdr = false;
2783 queue_tail_inc(txq);
2784 num_wrbs++;
2785 } while (frag_index != last_index);
2786 dev_consume_skb_any(skb);
2787
2788 return num_wrbs;
2789 }
2790
2791 /* Return the number of events in the event queue */
2792 static inline int events_get(struct be_eq_obj *eqo)
2793 {
2794 struct be_eq_entry *eqe;
2795 int num = 0;
2796
2797 do {
2798 eqe = queue_tail_node(&eqo->q);
2799 if (eqe->evt == 0)
2800 break;
2801
2802 rmb();
2803 eqe->evt = 0;
2804 num++;
2805 queue_tail_inc(&eqo->q);
2806 } while (true);
2807
2808 return num;
2809 }
2810
2811 /* Leaves the EQ is disarmed state */
2812 static void be_eq_clean(struct be_eq_obj *eqo)
2813 {
2814 int num = events_get(eqo);
2815
2816 be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2817 }
2818
2819 /* Free posted rx buffers that were not used */
2820 static void be_rxq_clean(struct be_rx_obj *rxo)
2821 {
2822 struct be_queue_info *rxq = &rxo->q;
2823 struct be_rx_page_info *page_info;
2824
2825 while (atomic_read(&rxq->used) > 0) {
2826 page_info = get_rx_page_info(rxo);
2827 put_page(page_info->page);
2828 memset(page_info, 0, sizeof(*page_info));
2829 }
2830 BUG_ON(atomic_read(&rxq->used));
2831 rxq->tail = 0;
2832 rxq->head = 0;
2833 }
2834
2835 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2836 {
2837 struct be_queue_info *rx_cq = &rxo->cq;
2838 struct be_rx_compl_info *rxcp;
2839 struct be_adapter *adapter = rxo->adapter;
2840 int flush_wait = 0;
2841
2842 /* Consume pending rx completions.
2843 * Wait for the flush completion (identified by zero num_rcvd)
2844 * to arrive. Notify CQ even when there are no more CQ entries
2845 * for HW to flush partially coalesced CQ entries.
2846 * In Lancer, there is no need to wait for flush compl.
2847 */
2848 for (;;) {
2849 rxcp = be_rx_compl_get(rxo);
2850 if (!rxcp) {
2851 if (lancer_chip(adapter))
2852 break;
2853
2854 if (flush_wait++ > 50 ||
2855 be_check_error(adapter,
2856 BE_ERROR_HW)) {
2857 dev_warn(&adapter->pdev->dev,
2858 "did not receive flush compl\n");
2859 break;
2860 }
2861 be_cq_notify(adapter, rx_cq->id, true, 0);
2862 mdelay(1);
2863 } else {
2864 be_rx_compl_discard(rxo, rxcp);
2865 be_cq_notify(adapter, rx_cq->id, false, 1);
2866 if (rxcp->num_rcvd == 0)
2867 break;
2868 }
2869 }
2870
2871 /* After cleanup, leave the CQ in unarmed state */
2872 be_cq_notify(adapter, rx_cq->id, false, 0);
2873 }
2874
2875 static void be_tx_compl_clean(struct be_adapter *adapter)
2876 {
2877 struct device *dev = &adapter->pdev->dev;
2878 u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2879 struct be_tx_compl_info *txcp;
2880 struct be_queue_info *txq;
2881 u32 end_idx, notified_idx;
2882 struct be_tx_obj *txo;
2883 int i, pending_txqs;
2884
2885 /* Stop polling for compls when HW has been silent for 10ms */
2886 do {
2887 pending_txqs = adapter->num_tx_qs;
2888
2889 for_all_tx_queues(adapter, txo, i) {
2890 cmpl = 0;
2891 num_wrbs = 0;
2892 txq = &txo->q;
2893 while ((txcp = be_tx_compl_get(adapter, txo))) {
2894 num_wrbs +=
2895 be_tx_compl_process(adapter, txo,
2896 txcp->end_index);
2897 cmpl++;
2898 }
2899 if (cmpl) {
2900 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2901 atomic_sub(num_wrbs, &txq->used);
2902 timeo = 0;
2903 }
2904 if (!be_is_tx_compl_pending(txo))
2905 pending_txqs--;
2906 }
2907
2908 if (pending_txqs == 0 || ++timeo > 10 ||
2909 be_check_error(adapter, BE_ERROR_HW))
2910 break;
2911
2912 mdelay(1);
2913 } while (true);
2914
2915 /* Free enqueued TX that was never notified to HW */
2916 for_all_tx_queues(adapter, txo, i) {
2917 txq = &txo->q;
2918
2919 if (atomic_read(&txq->used)) {
2920 dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2921 i, atomic_read(&txq->used));
2922 notified_idx = txq->tail;
2923 end_idx = txq->tail;
2924 index_adv(&end_idx, atomic_read(&txq->used) - 1,
2925 txq->len);
2926 /* Use the tx-compl process logic to handle requests
2927 * that were not sent to the HW.
2928 */
2929 num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2930 atomic_sub(num_wrbs, &txq->used);
2931 BUG_ON(atomic_read(&txq->used));
2932 txo->pend_wrb_cnt = 0;
2933 /* Since hw was never notified of these requests,
2934 * reset TXQ indices
2935 */
2936 txq->head = notified_idx;
2937 txq->tail = notified_idx;
2938 }
2939 }
2940 }
2941
2942 static void be_evt_queues_destroy(struct be_adapter *adapter)
2943 {
2944 struct be_eq_obj *eqo;
2945 int i;
2946
2947 for_all_evt_queues(adapter, eqo, i) {
2948 if (eqo->q.created) {
2949 be_eq_clean(eqo);
2950 be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2951 netif_napi_del(&eqo->napi);
2952 free_cpumask_var(eqo->affinity_mask);
2953 }
2954 be_queue_free(adapter, &eqo->q);
2955 }
2956 }
2957
2958 static int be_evt_queues_create(struct be_adapter *adapter)
2959 {
2960 struct be_queue_info *eq;
2961 struct be_eq_obj *eqo;
2962 struct be_aic_obj *aic;
2963 int i, rc;
2964
2965 /* need enough EQs to service both RX and TX queues */
2966 adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2967 max(adapter->cfg_num_rx_irqs,
2968 adapter->cfg_num_tx_irqs));
2969
2970 for_all_evt_queues(adapter, eqo, i) {
2971 int numa_node = dev_to_node(&adapter->pdev->dev);
2972
2973 aic = &adapter->aic_obj[i];
2974 eqo->adapter = adapter;
2975 eqo->idx = i;
2976 aic->max_eqd = BE_MAX_EQD;
2977 aic->enable = true;
2978
2979 eq = &eqo->q;
2980 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2981 sizeof(struct be_eq_entry));
2982 if (rc)
2983 return rc;
2984
2985 rc = be_cmd_eq_create(adapter, eqo);
2986 if (rc)
2987 return rc;
2988
2989 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2990 return -ENOMEM;
2991 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2992 eqo->affinity_mask);
2993 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2994 BE_NAPI_WEIGHT);
2995 }
2996 return 0;
2997 }
2998
2999 static void be_mcc_queues_destroy(struct be_adapter *adapter)
3000 {
3001 struct be_queue_info *q;
3002
3003 q = &adapter->mcc_obj.q;
3004 if (q->created)
3005 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3006 be_queue_free(adapter, q);
3007
3008 q = &adapter->mcc_obj.cq;
3009 if (q->created)
3010 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3011 be_queue_free(adapter, q);
3012 }
3013
3014 /* Must be called only after TX qs are created as MCC shares TX EQ */
3015 static int be_mcc_queues_create(struct be_adapter *adapter)
3016 {
3017 struct be_queue_info *q, *cq;
3018
3019 cq = &adapter->mcc_obj.cq;
3020 if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3021 sizeof(struct be_mcc_compl)))
3022 goto err;
3023
3024 /* Use the default EQ for MCC completions */
3025 if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3026 goto mcc_cq_free;
3027
3028 q = &adapter->mcc_obj.q;
3029 if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3030 goto mcc_cq_destroy;
3031
3032 if (be_cmd_mccq_create(adapter, q, cq))
3033 goto mcc_q_free;
3034
3035 return 0;
3036
3037 mcc_q_free:
3038 be_queue_free(adapter, q);
3039 mcc_cq_destroy:
3040 be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3041 mcc_cq_free:
3042 be_queue_free(adapter, cq);
3043 err:
3044 return -1;
3045 }
3046
3047 static void be_tx_queues_destroy(struct be_adapter *adapter)
3048 {
3049 struct be_queue_info *q;
3050 struct be_tx_obj *txo;
3051 u8 i;
3052
3053 for_all_tx_queues(adapter, txo, i) {
3054 q = &txo->q;
3055 if (q->created)
3056 be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3057 be_queue_free(adapter, q);
3058
3059 q = &txo->cq;
3060 if (q->created)
3061 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3062 be_queue_free(adapter, q);
3063 }
3064 }
3065
3066 static int be_tx_qs_create(struct be_adapter *adapter)
3067 {
3068 struct be_queue_info *cq;
3069 struct be_tx_obj *txo;
3070 struct be_eq_obj *eqo;
3071 int status, i;
3072
3073 adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3074
3075 for_all_tx_queues(adapter, txo, i) {
3076 cq = &txo->cq;
3077 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3078 sizeof(struct be_eth_tx_compl));
3079 if (status)
3080 return status;
3081
3082 u64_stats_init(&txo->stats.sync);
3083 u64_stats_init(&txo->stats.sync_compl);
3084
3085 /* If num_evt_qs is less than num_tx_qs, then more than
3086 * one txq share an eq
3087 */
3088 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3089 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3090 if (status)
3091 return status;
3092
3093 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3094 sizeof(struct be_eth_wrb));
3095 if (status)
3096 return status;
3097
3098 status = be_cmd_txq_create(adapter, txo);
3099 if (status)
3100 return status;
3101
3102 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3103 eqo->idx);
3104 }
3105
3106 dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3107 adapter->num_tx_qs);
3108 return 0;
3109 }
3110
3111 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3112 {
3113 struct be_queue_info *q;
3114 struct be_rx_obj *rxo;
3115 int i;
3116
3117 for_all_rx_queues(adapter, rxo, i) {
3118 q = &rxo->cq;
3119 if (q->created)
3120 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3121 be_queue_free(adapter, q);
3122 }
3123 }
3124
3125 static int be_rx_cqs_create(struct be_adapter *adapter)
3126 {
3127 struct be_queue_info *eq, *cq;
3128 struct be_rx_obj *rxo;
3129 int rc, i;
3130
3131 adapter->num_rss_qs =
3132 min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3133
3134 /* We'll use RSS only if atleast 2 RSS rings are supported. */
3135 if (adapter->num_rss_qs < 2)
3136 adapter->num_rss_qs = 0;
3137
3138 adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3139
3140 /* When the interface is not capable of RSS rings (and there is no
3141 * need to create a default RXQ) we'll still need one RXQ
3142 */
3143 if (adapter->num_rx_qs == 0)
3144 adapter->num_rx_qs = 1;
3145
3146 adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3147 for_all_rx_queues(adapter, rxo, i) {
3148 rxo->adapter = adapter;
3149 cq = &rxo->cq;
3150 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3151 sizeof(struct be_eth_rx_compl));
3152 if (rc)
3153 return rc;
3154
3155 u64_stats_init(&rxo->stats.sync);
3156 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3157 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3158 if (rc)
3159 return rc;
3160 }
3161
3162 dev_info(&adapter->pdev->dev,
3163 "created %d RX queue(s)\n", adapter->num_rx_qs);
3164 return 0;
3165 }
3166
3167 static irqreturn_t be_intx(int irq, void *dev)
3168 {
3169 struct be_eq_obj *eqo = dev;
3170 struct be_adapter *adapter = eqo->adapter;
3171 int num_evts = 0;
3172
3173 /* IRQ is not expected when NAPI is scheduled as the EQ
3174 * will not be armed.
3175 * But, this can happen on Lancer INTx where it takes
3176 * a while to de-assert INTx or in BE2 where occasionaly
3177 * an interrupt may be raised even when EQ is unarmed.
3178 * If NAPI is already scheduled, then counting & notifying
3179 * events will orphan them.
3180 */
3181 if (napi_schedule_prep(&eqo->napi)) {
3182 num_evts = events_get(eqo);
3183 __napi_schedule(&eqo->napi);
3184 if (num_evts)
3185 eqo->spurious_intr = 0;
3186 }
3187 be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3188
3189 /* Return IRQ_HANDLED only for the the first spurious intr
3190 * after a valid intr to stop the kernel from branding
3191 * this irq as a bad one!
3192 */
3193 if (num_evts || eqo->spurious_intr++ == 0)
3194 return IRQ_HANDLED;
3195 else
3196 return IRQ_NONE;
3197 }
3198
3199 static irqreturn_t be_msix(int irq, void *dev)
3200 {
3201 struct be_eq_obj *eqo = dev;
3202
3203 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3204 napi_schedule(&eqo->napi);
3205 return IRQ_HANDLED;
3206 }
3207
3208 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3209 {
3210 return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3211 }
3212
3213 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3214 int budget)
3215 {
3216 struct be_adapter *adapter = rxo->adapter;
3217 struct be_queue_info *rx_cq = &rxo->cq;
3218 struct be_rx_compl_info *rxcp;
3219 u32 work_done;
3220 u32 frags_consumed = 0;
3221
3222 for (work_done = 0; work_done < budget; work_done++) {
3223 rxcp = be_rx_compl_get(rxo);
3224 if (!rxcp)
3225 break;
3226
3227 /* Is it a flush compl that has no data */
3228 if (unlikely(rxcp->num_rcvd == 0))
3229 goto loop_continue;
3230
3231 /* Discard compl with partial DMA Lancer B0 */
3232 if (unlikely(!rxcp->pkt_size)) {
3233 be_rx_compl_discard(rxo, rxcp);
3234 goto loop_continue;
3235 }
3236
3237 /* On BE drop pkts that arrive due to imperfect filtering in
3238 * promiscuous mode on some skews
3239 */
3240 if (unlikely(rxcp->port != adapter->port_num &&
3241 !lancer_chip(adapter))) {
3242 be_rx_compl_discard(rxo, rxcp);
3243 goto loop_continue;
3244 }
3245
3246 if (do_gro(rxcp))
3247 be_rx_compl_process_gro(rxo, napi, rxcp);
3248 else
3249 be_rx_compl_process(rxo, napi, rxcp);
3250
3251 loop_continue:
3252 frags_consumed += rxcp->num_rcvd;
3253 be_rx_stats_update(rxo, rxcp);
3254 }
3255
3256 if (work_done) {
3257 be_cq_notify(adapter, rx_cq->id, true, work_done);
3258
3259 /* When an rx-obj gets into post_starved state, just
3260 * let be_worker do the posting.
3261 */
3262 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3263 !rxo->rx_post_starved)
3264 be_post_rx_frags(rxo, GFP_ATOMIC,
3265 max_t(u32, MAX_RX_POST,
3266 frags_consumed));
3267 }
3268
3269 return work_done;
3270 }
3271
3272
3273 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3274 int idx)
3275 {
3276 int num_wrbs = 0, work_done = 0;
3277 struct be_tx_compl_info *txcp;
3278
3279 while ((txcp = be_tx_compl_get(adapter, txo))) {
3280 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3281 work_done++;
3282 }
3283
3284 if (work_done) {
3285 be_cq_notify(adapter, txo->cq.id, true, work_done);
3286 atomic_sub(num_wrbs, &txo->q.used);
3287
3288 /* As Tx wrbs have been freed up, wake up netdev queue
3289 * if it was stopped due to lack of tx wrbs. */
3290 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3291 be_can_txq_wake(txo)) {
3292 netif_wake_subqueue(adapter->netdev, idx);
3293 }
3294
3295 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3296 tx_stats(txo)->tx_compl += work_done;
3297 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3298 }
3299 }
3300
3301 int be_poll(struct napi_struct *napi, int budget)
3302 {
3303 struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3304 struct be_adapter *adapter = eqo->adapter;
3305 int max_work = 0, work, i, num_evts;
3306 struct be_rx_obj *rxo;
3307 struct be_tx_obj *txo;
3308 u32 mult_enc = 0;
3309
3310 num_evts = events_get(eqo);
3311
3312 for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3313 be_process_tx(adapter, txo, i);
3314
3315 /* This loop will iterate twice for EQ0 in which
3316 * completions of the last RXQ (default one) are also processed
3317 * For other EQs the loop iterates only once
3318 */
3319 for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3320 work = be_process_rx(rxo, napi, budget);
3321 max_work = max(work, max_work);
3322 }
3323
3324 if (is_mcc_eqo(eqo))
3325 be_process_mcc(adapter);
3326
3327 if (max_work < budget) {
3328 napi_complete_done(napi, max_work);
3329
3330 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3331 * delay via a delay multiplier encoding value
3332 */
3333 if (skyhawk_chip(adapter))
3334 mult_enc = be_get_eq_delay_mult_enc(eqo);
3335
3336 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3337 mult_enc);
3338 } else {
3339 /* As we'll continue in polling mode, count and clear events */
3340 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3341 }
3342 return max_work;
3343 }
3344
3345 void be_detect_error(struct be_adapter *adapter)
3346 {
3347 u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3348 u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3349 struct device *dev = &adapter->pdev->dev;
3350 u16 val;
3351 u32 i;
3352
3353 if (be_check_error(adapter, BE_ERROR_HW))
3354 return;
3355
3356 if (lancer_chip(adapter)) {
3357 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3358 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3359 be_set_error(adapter, BE_ERROR_UE);
3360 sliport_err1 = ioread32(adapter->db +
3361 SLIPORT_ERROR1_OFFSET);
3362 sliport_err2 = ioread32(adapter->db +
3363 SLIPORT_ERROR2_OFFSET);
3364 /* Do not log error messages if its a FW reset */
3365 if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3366 sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3367 dev_info(dev, "Reset is in progress\n");
3368 } else {
3369 dev_err(dev, "Error detected in the card\n");
3370 dev_err(dev, "ERR: sliport status 0x%x\n",
3371 sliport_status);
3372 dev_err(dev, "ERR: sliport error1 0x%x\n",
3373 sliport_err1);
3374 dev_err(dev, "ERR: sliport error2 0x%x\n",
3375 sliport_err2);
3376 }
3377 }
3378 } else {
3379 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3380 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3381 ue_lo_mask = ioread32(adapter->pcicfg +
3382 PCICFG_UE_STATUS_LOW_MASK);
3383 ue_hi_mask = ioread32(adapter->pcicfg +
3384 PCICFG_UE_STATUS_HI_MASK);
3385
3386 ue_lo = (ue_lo & ~ue_lo_mask);
3387 ue_hi = (ue_hi & ~ue_hi_mask);
3388
3389 if (ue_lo || ue_hi) {
3390 /* On certain platforms BE3 hardware can indicate
3391 * spurious UEs. In case of a UE in the chip,
3392 * the POST register correctly reports either a
3393 * FAT_LOG_START state (FW is currently dumping
3394 * FAT log data) or a ARMFW_UE state. Check for the
3395 * above states to ascertain if the UE is valid or not.
3396 */
3397 if (BE3_chip(adapter)) {
3398 val = be_POST_stage_get(adapter);
3399 if ((val & POST_STAGE_FAT_LOG_START)
3400 != POST_STAGE_FAT_LOG_START &&
3401 (val & POST_STAGE_ARMFW_UE)
3402 != POST_STAGE_ARMFW_UE &&
3403 (val & POST_STAGE_RECOVERABLE_ERR)
3404 != POST_STAGE_RECOVERABLE_ERR)
3405 return;
3406 }
3407
3408 dev_err(dev, "Error detected in the adapter");
3409 be_set_error(adapter, BE_ERROR_UE);
3410
3411 for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3412 if (ue_lo & 1)
3413 dev_err(dev, "UE: %s bit set\n",
3414 ue_status_low_desc[i]);
3415 }
3416 for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3417 if (ue_hi & 1)
3418 dev_err(dev, "UE: %s bit set\n",
3419 ue_status_hi_desc[i]);
3420 }
3421 }
3422 }
3423 }
3424
3425 static void be_msix_disable(struct be_adapter *adapter)
3426 {
3427 if (msix_enabled(adapter)) {
3428 pci_disable_msix(adapter->pdev);
3429 adapter->num_msix_vec = 0;
3430 adapter->num_msix_roce_vec = 0;
3431 }
3432 }
3433
3434 static int be_msix_enable(struct be_adapter *adapter)
3435 {
3436 unsigned int i, max_roce_eqs;
3437 struct device *dev = &adapter->pdev->dev;
3438 int num_vec;
3439
3440 /* If RoCE is supported, program the max number of vectors that
3441 * could be used for NIC and RoCE, else, just program the number
3442 * we'll use initially.
3443 */
3444 if (be_roce_supported(adapter)) {
3445 max_roce_eqs =
3446 be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3447 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3448 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3449 } else {
3450 num_vec = max(adapter->cfg_num_rx_irqs,
3451 adapter->cfg_num_tx_irqs);
3452 }
3453
3454 for (i = 0; i < num_vec; i++)
3455 adapter->msix_entries[i].entry = i;
3456
3457 num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3458 MIN_MSIX_VECTORS, num_vec);
3459 if (num_vec < 0)
3460 goto fail;
3461
3462 if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3463 adapter->num_msix_roce_vec = num_vec / 2;
3464 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3465 adapter->num_msix_roce_vec);
3466 }
3467
3468 adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3469
3470 dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3471 adapter->num_msix_vec);
3472 return 0;
3473
3474 fail:
3475 dev_warn(dev, "MSIx enable failed\n");
3476
3477 /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3478 if (be_virtfn(adapter))
3479 return num_vec;
3480 return 0;
3481 }
3482
3483 static inline int be_msix_vec_get(struct be_adapter *adapter,
3484 struct be_eq_obj *eqo)
3485 {
3486 return adapter->msix_entries[eqo->msix_idx].vector;
3487 }
3488
3489 static int be_msix_register(struct be_adapter *adapter)
3490 {
3491 struct net_device *netdev = adapter->netdev;
3492 struct be_eq_obj *eqo;
3493 int status, i, vec;
3494
3495 for_all_evt_queues(adapter, eqo, i) {
3496 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3497 vec = be_msix_vec_get(adapter, eqo);
3498 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3499 if (status)
3500 goto err_msix;
3501
3502 irq_set_affinity_hint(vec, eqo->affinity_mask);
3503 }
3504
3505 return 0;
3506 err_msix:
3507 for (i--; i >= 0; i--) {
3508 eqo = &adapter->eq_obj[i];
3509 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3510 }
3511 dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3512 status);
3513 be_msix_disable(adapter);
3514 return status;
3515 }
3516
3517 static int be_irq_register(struct be_adapter *adapter)
3518 {
3519 struct net_device *netdev = adapter->netdev;
3520 int status;
3521
3522 if (msix_enabled(adapter)) {
3523 status = be_msix_register(adapter);
3524 if (status == 0)
3525 goto done;
3526 /* INTx is not supported for VF */
3527 if (be_virtfn(adapter))
3528 return status;
3529 }
3530
3531 /* INTx: only the first EQ is used */
3532 netdev->irq = adapter->pdev->irq;
3533 status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3534 &adapter->eq_obj[0]);
3535 if (status) {
3536 dev_err(&adapter->pdev->dev,
3537 "INTx request IRQ failed - err %d\n", status);
3538 return status;
3539 }
3540 done:
3541 adapter->isr_registered = true;
3542 return 0;
3543 }
3544
3545 static void be_irq_unregister(struct be_adapter *adapter)
3546 {
3547 struct net_device *netdev = adapter->netdev;
3548 struct be_eq_obj *eqo;
3549 int i, vec;
3550
3551 if (!adapter->isr_registered)
3552 return;
3553
3554 /* INTx */
3555 if (!msix_enabled(adapter)) {
3556 free_irq(netdev->irq, &adapter->eq_obj[0]);
3557 goto done;
3558 }
3559
3560 /* MSIx */
3561 for_all_evt_queues(adapter, eqo, i) {
3562 vec = be_msix_vec_get(adapter, eqo);
3563 irq_set_affinity_hint(vec, NULL);
3564 free_irq(vec, eqo);
3565 }
3566
3567 done:
3568 adapter->isr_registered = false;
3569 }
3570
3571 static void be_rx_qs_destroy(struct be_adapter *adapter)
3572 {
3573 struct rss_info *rss = &adapter->rss_info;
3574 struct be_queue_info *q;
3575 struct be_rx_obj *rxo;
3576 int i;
3577
3578 for_all_rx_queues(adapter, rxo, i) {
3579 q = &rxo->q;
3580 if (q->created) {
3581 /* If RXQs are destroyed while in an "out of buffer"
3582 * state, there is a possibility of an HW stall on
3583 * Lancer. So, post 64 buffers to each queue to relieve
3584 * the "out of buffer" condition.
3585 * Make sure there's space in the RXQ before posting.
3586 */
3587 if (lancer_chip(adapter)) {
3588 be_rx_cq_clean(rxo);
3589 if (atomic_read(&q->used) == 0)
3590 be_post_rx_frags(rxo, GFP_KERNEL,
3591 MAX_RX_POST);
3592 }
3593
3594 be_cmd_rxq_destroy(adapter, q);
3595 be_rx_cq_clean(rxo);
3596 be_rxq_clean(rxo);
3597 }
3598 be_queue_free(adapter, q);
3599 }
3600
3601 if (rss->rss_flags) {
3602 rss->rss_flags = RSS_ENABLE_NONE;
3603 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3604 128, rss->rss_hkey);
3605 }
3606 }
3607
3608 static void be_disable_if_filters(struct be_adapter *adapter)
3609 {
3610 /* Don't delete MAC on BE3 VFs without FILTMGMT privilege */
3611 if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3612 check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3613 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3614 eth_zero_addr(adapter->dev_mac);
3615 }
3616
3617 be_clear_uc_list(adapter);
3618 be_clear_mc_list(adapter);
3619
3620 /* The IFACE flags are enabled in the open path and cleared
3621 * in the close path. When a VF gets detached from the host and
3622 * assigned to a VM the following happens:
3623 * - VF's IFACE flags get cleared in the detach path
3624 * - IFACE create is issued by the VF in the attach path
3625 * Due to a bug in the BE3/Skyhawk-R FW
3626 * (Lancer FW doesn't have the bug), the IFACE capability flags
3627 * specified along with the IFACE create cmd issued by a VF are not
3628 * honoured by FW. As a consequence, if a *new* driver
3629 * (that enables/disables IFACE flags in open/close)
3630 * is loaded in the host and an *old* driver is * used by a VM/VF,
3631 * the IFACE gets created *without* the needed flags.
3632 * To avoid this, disable RX-filter flags only for Lancer.
3633 */
3634 if (lancer_chip(adapter)) {
3635 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637 }
3638 }
3639
3640 static int be_close(struct net_device *netdev)
3641 {
3642 struct be_adapter *adapter = netdev_priv(netdev);
3643 struct be_eq_obj *eqo;
3644 int i;
3645
3646 /* This protection is needed as be_close() may be called even when the
3647 * adapter is in cleared state (after eeh perm failure)
3648 */
3649 if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650 return 0;
3651
3652 /* Before attempting cleanup ensure all the pending cmds in the
3653 * config_wq have finished execution
3654 */
3655 flush_workqueue(be_wq);
3656
3657 be_disable_if_filters(adapter);
3658
3659 if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660 for_all_evt_queues(adapter, eqo, i) {
3661 napi_disable(&eqo->napi);
3662 }
3663 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3664 }
3665
3666 be_async_mcc_disable(adapter);
3667
3668 /* Wait for all pending tx completions to arrive so that
3669 * all tx skbs are freed.
3670 */
3671 netif_tx_disable(netdev);
3672 be_tx_compl_clean(adapter);
3673
3674 be_rx_qs_destroy(adapter);
3675
3676 for_all_evt_queues(adapter, eqo, i) {
3677 if (msix_enabled(adapter))
3678 synchronize_irq(be_msix_vec_get(adapter, eqo));
3679 else
3680 synchronize_irq(netdev->irq);
3681 be_eq_clean(eqo);
3682 }
3683
3684 be_irq_unregister(adapter);
3685
3686 return 0;
3687 }
3688
3689 static int be_rx_qs_create(struct be_adapter *adapter)
3690 {
3691 struct rss_info *rss = &adapter->rss_info;
3692 u8 rss_key[RSS_HASH_KEY_LEN];
3693 struct be_rx_obj *rxo;
3694 int rc, i, j;
3695
3696 for_all_rx_queues(adapter, rxo, i) {
3697 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3698 sizeof(struct be_eth_rx_d));
3699 if (rc)
3700 return rc;
3701 }
3702
3703 if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3704 rxo = default_rxo(adapter);
3705 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706 rx_frag_size, adapter->if_handle,
3707 false, &rxo->rss_id);
3708 if (rc)
3709 return rc;
3710 }
3711
3712 for_all_rss_queues(adapter, rxo, i) {
3713 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714 rx_frag_size, adapter->if_handle,
3715 true, &rxo->rss_id);
3716 if (rc)
3717 return rc;
3718 }
3719
3720 if (be_multi_rxq(adapter)) {
3721 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3722 for_all_rss_queues(adapter, rxo, i) {
3723 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3724 break;
3725 rss->rsstable[j + i] = rxo->rss_id;
3726 rss->rss_queue[j + i] = i;
3727 }
3728 }
3729 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3730 RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3731
3732 if (!BEx_chip(adapter))
3733 rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3734 RSS_ENABLE_UDP_IPV6;
3735
3736 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3737 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3738 RSS_INDIR_TABLE_LEN, rss_key);
3739 if (rc) {
3740 rss->rss_flags = RSS_ENABLE_NONE;
3741 return rc;
3742 }
3743
3744 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3745 } else {
3746 /* Disable RSS, if only default RX Q is created */
3747 rss->rss_flags = RSS_ENABLE_NONE;
3748 }
3749
3750
3751 /* Post 1 less than RXQ-len to avoid head being equal to tail,
3752 * which is a queue empty condition
3753 */
3754 for_all_rx_queues(adapter, rxo, i)
3755 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3756
3757 return 0;
3758 }
3759
3760 static int be_enable_if_filters(struct be_adapter *adapter)
3761 {
3762 int status;
3763
3764 status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3765 if (status)
3766 return status;
3767
3768 /* Normally this condition usually true as the ->dev_mac is zeroed.
3769 * But on BE3 VFs the initial MAC is pre-programmed by PF and
3770 * subsequent be_dev_mac_add() can fail (after fresh boot)
3771 */
3772 if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3773 int old_pmac_id = -1;
3774
3775 /* Remember old programmed MAC if any - can happen on BE3 VF */
3776 if (!is_zero_ether_addr(adapter->dev_mac))
3777 old_pmac_id = adapter->pmac_id[0];
3778
3779 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780 if (status)
3781 return status;
3782
3783 /* Delete the old programmed MAC as we successfully programmed
3784 * a new MAC
3785 */
3786 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3787 be_dev_mac_del(adapter, old_pmac_id);
3788
3789 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3790 }
3791
3792 if (adapter->vlans_added)
3793 be_vid_config(adapter);
3794
3795 __be_set_rx_mode(adapter);
3796
3797 return 0;
3798 }
3799
3800 static int be_open(struct net_device *netdev)
3801 {
3802 struct be_adapter *adapter = netdev_priv(netdev);
3803 struct be_eq_obj *eqo;
3804 struct be_rx_obj *rxo;
3805 struct be_tx_obj *txo;
3806 u8 link_status;
3807 int status, i;
3808
3809 status = be_rx_qs_create(adapter);
3810 if (status)
3811 goto err;
3812
3813 status = be_enable_if_filters(adapter);
3814 if (status)
3815 goto err;
3816
3817 status = be_irq_register(adapter);
3818 if (status)
3819 goto err;
3820
3821 for_all_rx_queues(adapter, rxo, i)
3822 be_cq_notify(adapter, rxo->cq.id, true, 0);
3823
3824 for_all_tx_queues(adapter, txo, i)
3825 be_cq_notify(adapter, txo->cq.id, true, 0);
3826
3827 be_async_mcc_enable(adapter);
3828
3829 for_all_evt_queues(adapter, eqo, i) {
3830 napi_enable(&eqo->napi);
3831 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3832 }
3833 adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3834
3835 status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3836 if (!status)
3837 be_link_status_update(adapter, link_status);
3838
3839 netif_tx_start_all_queues(netdev);
3840 if (skyhawk_chip(adapter))
3841 udp_tunnel_get_rx_info(netdev);
3842
3843 return 0;
3844 err:
3845 be_close(adapter->netdev);
3846 return -EIO;
3847 }
3848
3849 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3850 {
3851 u32 addr;
3852
3853 addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3854
3855 mac[5] = (u8)(addr & 0xFF);
3856 mac[4] = (u8)((addr >> 8) & 0xFF);
3857 mac[3] = (u8)((addr >> 16) & 0xFF);
3858 /* Use the OUI from the current MAC address */
3859 memcpy(mac, adapter->netdev->dev_addr, 3);
3860 }
3861
3862 /*
3863 * Generate a seed MAC address from the PF MAC Address using jhash.
3864 * MAC Address for VFs are assigned incrementally starting from the seed.
3865 * These addresses are programmed in the ASIC by the PF and the VF driver
3866 * queries for the MAC address during its probe.
3867 */
3868 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3869 {
3870 u32 vf;
3871 int status = 0;
3872 u8 mac[ETH_ALEN];
3873 struct be_vf_cfg *vf_cfg;
3874
3875 be_vf_eth_addr_generate(adapter, mac);
3876
3877 for_all_vfs(adapter, vf_cfg, vf) {
3878 if (BEx_chip(adapter))
3879 status = be_cmd_pmac_add(adapter, mac,
3880 vf_cfg->if_handle,
3881 &vf_cfg->pmac_id, vf + 1);
3882 else
3883 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3884 vf + 1);
3885
3886 if (status)
3887 dev_err(&adapter->pdev->dev,
3888 "Mac address assignment failed for VF %d\n",
3889 vf);
3890 else
3891 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3892
3893 mac[5] += 1;
3894 }
3895 return status;
3896 }
3897
3898 static int be_vfs_mac_query(struct be_adapter *adapter)
3899 {
3900 int status, vf;
3901 u8 mac[ETH_ALEN];
3902 struct be_vf_cfg *vf_cfg;
3903
3904 for_all_vfs(adapter, vf_cfg, vf) {
3905 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3906 mac, vf_cfg->if_handle,
3907 false, vf+1);
3908 if (status)
3909 return status;
3910 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3911 }
3912 return 0;
3913 }
3914
3915 static void be_vf_clear(struct be_adapter *adapter)
3916 {
3917 struct be_vf_cfg *vf_cfg;
3918 u32 vf;
3919
3920 if (pci_vfs_assigned(adapter->pdev)) {
3921 dev_warn(&adapter->pdev->dev,
3922 "VFs are assigned to VMs: not disabling VFs\n");
3923 goto done;
3924 }
3925
3926 pci_disable_sriov(adapter->pdev);
3927
3928 for_all_vfs(adapter, vf_cfg, vf) {
3929 if (BEx_chip(adapter))
3930 be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3931 vf_cfg->pmac_id, vf + 1);
3932 else
3933 be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3934 vf + 1);
3935
3936 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3937 }
3938
3939 if (BE3_chip(adapter))
3940 be_cmd_set_hsw_config(adapter, 0, 0,
3941 adapter->if_handle,
3942 PORT_FWD_TYPE_PASSTHRU, 0);
3943 done:
3944 kfree(adapter->vf_cfg);
3945 adapter->num_vfs = 0;
3946 adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3947 }
3948
3949 static void be_clear_queues(struct be_adapter *adapter)
3950 {
3951 be_mcc_queues_destroy(adapter);
3952 be_rx_cqs_destroy(adapter);
3953 be_tx_queues_destroy(adapter);
3954 be_evt_queues_destroy(adapter);
3955 }
3956
3957 static void be_cancel_worker(struct be_adapter *adapter)
3958 {
3959 if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3960 cancel_delayed_work_sync(&adapter->work);
3961 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3962 }
3963 }
3964
3965 static void be_cancel_err_detection(struct be_adapter *adapter)
3966 {
3967 struct be_error_recovery *err_rec = &adapter->error_recovery;
3968
3969 if (!be_err_recovery_workq)
3970 return;
3971
3972 if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3973 cancel_delayed_work_sync(&err_rec->err_detection_work);
3974 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3975 }
3976 }
3977
3978 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3979 {
3980 struct net_device *netdev = adapter->netdev;
3981 struct device *dev = &adapter->pdev->dev;
3982 struct be_vxlan_port *vxlan_port;
3983 __be16 port;
3984 int status;
3985
3986 vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3987 struct be_vxlan_port, list);
3988 port = vxlan_port->port;
3989
3990 status = be_cmd_manage_iface(adapter, adapter->if_handle,
3991 OP_CONVERT_NORMAL_TO_TUNNEL);
3992 if (status) {
3993 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3994 return status;
3995 }
3996 adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3997
3998 status = be_cmd_set_vxlan_port(adapter, port);
3999 if (status) {
4000 dev_warn(dev, "Failed to add VxLAN port\n");
4001 return status;
4002 }
4003 adapter->vxlan_port = port;
4004
4005 netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4006 NETIF_F_TSO | NETIF_F_TSO6 |
4007 NETIF_F_GSO_UDP_TUNNEL;
4008
4009 dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4010 be16_to_cpu(port));
4011 return 0;
4012 }
4013
4014 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4015 {
4016 struct net_device *netdev = adapter->netdev;
4017
4018 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4019 be_cmd_manage_iface(adapter, adapter->if_handle,
4020 OP_CONVERT_TUNNEL_TO_NORMAL);
4021
4022 if (adapter->vxlan_port)
4023 be_cmd_set_vxlan_port(adapter, 0);
4024
4025 adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4026 adapter->vxlan_port = 0;
4027
4028 netdev->hw_enc_features = 0;
4029 }
4030
4031 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4032 struct be_resources *vft_res)
4033 {
4034 struct be_resources res = adapter->pool_res;
4035 u32 vf_if_cap_flags = res.vf_if_cap_flags;
4036 struct be_resources res_mod = {0};
4037 u16 num_vf_qs = 1;
4038
4039 /* Distribute the queue resources among the PF and it's VFs */
4040 if (num_vfs) {
4041 /* Divide the rx queues evenly among the VFs and the PF, capped
4042 * at VF-EQ-count. Any remainder queues belong to the PF.
4043 */
4044 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4045 res.max_rss_qs / (num_vfs + 1));
4046
4047 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4048 * RSS Tables per port. Provide RSS on VFs, only if number of
4049 * VFs requested is less than it's PF Pool's RSS Tables limit.
4050 */
4051 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4052 num_vf_qs = 1;
4053 }
4054
4055 /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4056 * which are modifiable using SET_PROFILE_CONFIG cmd.
4057 */
4058 be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4059 RESOURCE_MODIFIABLE, 0);
4060
4061 /* If RSS IFACE capability flags are modifiable for a VF, set the
4062 * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4063 * more than 1 RSSQ is available for a VF.
4064 * Otherwise, provision only 1 queue pair for VF.
4065 */
4066 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4067 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4068 if (num_vf_qs > 1) {
4069 vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4070 if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4071 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4072 } else {
4073 vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4074 BE_IF_FLAGS_DEFQ_RSS);
4075 }
4076 } else {
4077 num_vf_qs = 1;
4078 }
4079
4080 if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4081 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4082 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4083 }
4084
4085 vft_res->vf_if_cap_flags = vf_if_cap_flags;
4086 vft_res->max_rx_qs = num_vf_qs;
4087 vft_res->max_rss_qs = num_vf_qs;
4088 vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4089 vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4090
4091 /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4092 * among the PF and it's VFs, if the fields are changeable
4093 */
4094 if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4095 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4096
4097 if (res_mod.max_vlans == FIELD_MODIFIABLE)
4098 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4099
4100 if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4101 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4102
4103 if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4104 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4105 }
4106
4107 static void be_if_destroy(struct be_adapter *adapter)
4108 {
4109 be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4110
4111 kfree(adapter->pmac_id);
4112 adapter->pmac_id = NULL;
4113
4114 kfree(adapter->mc_list);
4115 adapter->mc_list = NULL;
4116
4117 kfree(adapter->uc_list);
4118 adapter->uc_list = NULL;
4119 }
4120
4121 static int be_clear(struct be_adapter *adapter)
4122 {
4123 struct pci_dev *pdev = adapter->pdev;
4124 struct be_resources vft_res = {0};
4125
4126 be_cancel_worker(adapter);
4127
4128 flush_workqueue(be_wq);
4129
4130 if (sriov_enabled(adapter))
4131 be_vf_clear(adapter);
4132
4133 /* Re-configure FW to distribute resources evenly across max-supported
4134 * number of VFs, only when VFs are not already enabled.
4135 */
4136 if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4137 !pci_vfs_assigned(pdev)) {
4138 be_calculate_vf_res(adapter,
4139 pci_sriov_get_totalvfs(pdev),
4140 &vft_res);
4141 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4142 pci_sriov_get_totalvfs(pdev),
4143 &vft_res);
4144 }
4145
4146 be_disable_vxlan_offloads(adapter);
4147
4148 be_if_destroy(adapter);
4149
4150 be_clear_queues(adapter);
4151
4152 be_msix_disable(adapter);
4153 adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4154 return 0;
4155 }
4156
4157 static int be_vfs_if_create(struct be_adapter *adapter)
4158 {
4159 struct be_resources res = {0};
4160 u32 cap_flags, en_flags, vf;
4161 struct be_vf_cfg *vf_cfg;
4162 int status;
4163
4164 /* If a FW profile exists, then cap_flags are updated */
4165 cap_flags = BE_VF_IF_EN_FLAGS;
4166
4167 for_all_vfs(adapter, vf_cfg, vf) {
4168 if (!BE3_chip(adapter)) {
4169 status = be_cmd_get_profile_config(adapter, &res, NULL,
4170 ACTIVE_PROFILE_TYPE,
4171 RESOURCE_LIMITS,
4172 vf + 1);
4173 if (!status) {
4174 cap_flags = res.if_cap_flags;
4175 /* Prevent VFs from enabling VLAN promiscuous
4176 * mode
4177 */
4178 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4179 }
4180 }
4181
4182 /* PF should enable IF flags during proxy if_create call */
4183 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4184 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4185 &vf_cfg->if_handle, vf + 1);
4186 if (status)
4187 return status;
4188 }
4189
4190 return 0;
4191 }
4192
4193 static int be_vf_setup_init(struct be_adapter *adapter)
4194 {
4195 struct be_vf_cfg *vf_cfg;
4196 int vf;
4197
4198 adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4199 GFP_KERNEL);
4200 if (!adapter->vf_cfg)
4201 return -ENOMEM;
4202
4203 for_all_vfs(adapter, vf_cfg, vf) {
4204 vf_cfg->if_handle = -1;
4205 vf_cfg->pmac_id = -1;
4206 }
4207 return 0;
4208 }
4209
4210 static int be_vf_setup(struct be_adapter *adapter)
4211 {
4212 struct device *dev = &adapter->pdev->dev;
4213 struct be_vf_cfg *vf_cfg;
4214 int status, old_vfs, vf;
4215 bool spoofchk;
4216
4217 old_vfs = pci_num_vf(adapter->pdev);
4218
4219 status = be_vf_setup_init(adapter);
4220 if (status)
4221 goto err;
4222
4223 if (old_vfs) {
4224 for_all_vfs(adapter, vf_cfg, vf) {
4225 status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4226 if (status)
4227 goto err;
4228 }
4229
4230 status = be_vfs_mac_query(adapter);
4231 if (status)
4232 goto err;
4233 } else {
4234 status = be_vfs_if_create(adapter);
4235 if (status)
4236 goto err;
4237
4238 status = be_vf_eth_addr_config(adapter);
4239 if (status)
4240 goto err;
4241 }
4242
4243 for_all_vfs(adapter, vf_cfg, vf) {
4244 /* Allow VFs to programs MAC/VLAN filters */
4245 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4246 vf + 1);
4247 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4248 status = be_cmd_set_fn_privileges(adapter,
4249 vf_cfg->privileges |
4250 BE_PRIV_FILTMGMT,
4251 vf + 1);
4252 if (!status) {
4253 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4254 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4255 vf);
4256 }
4257 }
4258
4259 /* Allow full available bandwidth */
4260 if (!old_vfs)
4261 be_cmd_config_qos(adapter, 0, 0, vf + 1);
4262
4263 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4264 vf_cfg->if_handle, NULL,
4265 &spoofchk);
4266 if (!status)
4267 vf_cfg->spoofchk = spoofchk;
4268
4269 if (!old_vfs) {
4270 be_cmd_enable_vf(adapter, vf + 1);
4271 be_cmd_set_logical_link_config(adapter,
4272 IFLA_VF_LINK_STATE_AUTO,
4273 vf+1);
4274 }
4275 }
4276
4277 if (!old_vfs) {
4278 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4279 if (status) {
4280 dev_err(dev, "SRIOV enable failed\n");
4281 adapter->num_vfs = 0;
4282 goto err;
4283 }
4284 }
4285
4286 if (BE3_chip(adapter)) {
4287 /* On BE3, enable VEB only when SRIOV is enabled */
4288 status = be_cmd_set_hsw_config(adapter, 0, 0,
4289 adapter->if_handle,
4290 PORT_FWD_TYPE_VEB, 0);
4291 if (status)
4292 goto err;
4293 }
4294
4295 adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4296 return 0;
4297 err:
4298 dev_err(dev, "VF setup failed\n");
4299 be_vf_clear(adapter);
4300 return status;
4301 }
4302
4303 /* Converting function_mode bits on BE3 to SH mc_type enums */
4304
4305 static u8 be_convert_mc_type(u32 function_mode)
4306 {
4307 if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4308 return vNIC1;
4309 else if (function_mode & QNQ_MODE)
4310 return FLEX10;
4311 else if (function_mode & VNIC_MODE)
4312 return vNIC2;
4313 else if (function_mode & UMC_ENABLED)
4314 return UMC;
4315 else
4316 return MC_NONE;
4317 }
4318
4319 /* On BE2/BE3 FW does not suggest the supported limits */
4320 static void BEx_get_resources(struct be_adapter *adapter,
4321 struct be_resources *res)
4322 {
4323 bool use_sriov = adapter->num_vfs ? 1 : 0;
4324
4325 if (be_physfn(adapter))
4326 res->max_uc_mac = BE_UC_PMAC_COUNT;
4327 else
4328 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4329
4330 adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4331
4332 if (be_is_mc(adapter)) {
4333 /* Assuming that there are 4 channels per port,
4334 * when multi-channel is enabled
4335 */
4336 if (be_is_qnq_mode(adapter))
4337 res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4338 else
4339 /* In a non-qnq multichannel mode, the pvid
4340 * takes up one vlan entry
4341 */
4342 res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4343 } else {
4344 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4345 }
4346
4347 res->max_mcast_mac = BE_MAX_MC;
4348
4349 /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4350 * 2) Create multiple TX rings on a BE3-R multi-channel interface
4351 * *only* if it is RSS-capable.
4352 */
4353 if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) ||
4354 be_virtfn(adapter) ||
4355 (be_is_mc(adapter) &&
4356 !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4357 res->max_tx_qs = 1;
4358 } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4359 struct be_resources super_nic_res = {0};
4360
4361 /* On a SuperNIC profile, the driver needs to use the
4362 * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4363 */
4364 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4365 ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4366 0);
4367 /* Some old versions of BE3 FW don't report max_tx_qs value */
4368 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4369 } else {
4370 res->max_tx_qs = BE3_MAX_TX_QS;
4371 }
4372
4373 if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4374 !use_sriov && be_physfn(adapter))
4375 res->max_rss_qs = (adapter->be3_native) ?
4376 BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4377 res->max_rx_qs = res->max_rss_qs + 1;
4378
4379 if (be_physfn(adapter))
4380 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4381 BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4382 else
4383 res->max_evt_qs = 1;
4384
4385 res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4386 res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4387 if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4388 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4389 }
4390
4391 static void be_setup_init(struct be_adapter *adapter)
4392 {
4393 adapter->vlan_prio_bmap = 0xff;
4394 adapter->phy.link_speed = -1;
4395 adapter->if_handle = -1;
4396 adapter->be3_native = false;
4397 adapter->if_flags = 0;
4398 adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4399 if (be_physfn(adapter))
4400 adapter->cmd_privileges = MAX_PRIVILEGES;
4401 else
4402 adapter->cmd_privileges = MIN_PRIVILEGES;
4403 }
4404
4405 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4406 * However, this HW limitation is not exposed to the host via any SLI cmd.
4407 * As a result, in the case of SRIOV and in particular multi-partition configs
4408 * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4409 * for distribution between the VFs. This self-imposed limit will determine the
4410 * no: of VFs for which RSS can be enabled.
4411 */
4412 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4413 {
4414 struct be_port_resources port_res = {0};
4415 u8 rss_tables_on_port;
4416 u16 max_vfs = be_max_vfs(adapter);
4417
4418 be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4419 RESOURCE_LIMITS, 0);
4420
4421 rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4422
4423 /* Each PF Pool's RSS Tables limit =
4424 * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4425 */
4426 adapter->pool_res.max_rss_tables =
4427 max_vfs * rss_tables_on_port / port_res.max_vfs;
4428 }
4429
4430 static int be_get_sriov_config(struct be_adapter *adapter)
4431 {
4432 struct be_resources res = {0};
4433 int max_vfs, old_vfs;
4434
4435 be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4436 RESOURCE_LIMITS, 0);
4437
4438 /* Some old versions of BE3 FW don't report max_vfs value */
4439 if (BE3_chip(adapter) && !res.max_vfs) {
4440 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4441 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4442 }
4443
4444 adapter->pool_res = res;
4445
4446 /* If during previous unload of the driver, the VFs were not disabled,
4447 * then we cannot rely on the PF POOL limits for the TotalVFs value.
4448 * Instead use the TotalVFs value stored in the pci-dev struct.
4449 */
4450 old_vfs = pci_num_vf(adapter->pdev);
4451 if (old_vfs) {
4452 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4453 old_vfs);
4454
4455 adapter->pool_res.max_vfs =
4456 pci_sriov_get_totalvfs(adapter->pdev);
4457 adapter->num_vfs = old_vfs;
4458 }
4459
4460 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4461 be_calculate_pf_pool_rss_tables(adapter);
4462 dev_info(&adapter->pdev->dev,
4463 "RSS can be enabled for all VFs if num_vfs <= %d\n",
4464 be_max_pf_pool_rss_tables(adapter));
4465 }
4466 return 0;
4467 }
4468
4469 static void be_alloc_sriov_res(struct be_adapter *adapter)
4470 {
4471 int old_vfs = pci_num_vf(adapter->pdev);
4472 struct be_resources vft_res = {0};
4473 int status;
4474
4475 be_get_sriov_config(adapter);
4476
4477 if (!old_vfs)
4478 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4479
4480 /* When the HW is in SRIOV capable configuration, the PF-pool
4481 * resources are given to PF during driver load, if there are no
4482 * old VFs. This facility is not available in BE3 FW.
4483 * Also, this is done by FW in Lancer chip.
4484 */
4485 if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4486 be_calculate_vf_res(adapter, 0, &vft_res);
4487 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4488 &vft_res);
4489 if (status)
4490 dev_err(&adapter->pdev->dev,
4491 "Failed to optimize SRIOV resources\n");
4492 }
4493 }
4494
4495 static int be_get_resources(struct be_adapter *adapter)
4496 {
4497 struct device *dev = &adapter->pdev->dev;
4498 struct be_resources res = {0};
4499 int status;
4500
4501 /* For Lancer, SH etc read per-function resource limits from FW.
4502 * GET_FUNC_CONFIG returns per function guaranteed limits.
4503 * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4504 */
4505 if (BEx_chip(adapter)) {
4506 BEx_get_resources(adapter, &res);
4507 } else {
4508 status = be_cmd_get_func_config(adapter, &res);
4509 if (status)
4510 return status;
4511
4512 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4513 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4514 !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4515 res.max_rss_qs -= 1;
4516 }
4517
4518 /* If RoCE is supported stash away half the EQs for RoCE */
4519 res.max_nic_evt_qs = be_roce_supported(adapter) ?
4520 res.max_evt_qs / 2 : res.max_evt_qs;
4521 adapter->res = res;
4522
4523 /* If FW supports RSS default queue, then skip creating non-RSS
4524 * queue for non-IP traffic.
4525 */
4526 adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4527 BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4528
4529 dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4530 be_max_txqs(adapter), be_max_rxqs(adapter),
4531 be_max_rss(adapter), be_max_nic_eqs(adapter),
4532 be_max_vfs(adapter));
4533 dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4534 be_max_uc(adapter), be_max_mc(adapter),
4535 be_max_vlans(adapter));
4536
4537 /* Ensure RX and TX queues are created in pairs at init time */
4538 adapter->cfg_num_rx_irqs =
4539 min_t(u16, netif_get_num_default_rss_queues(),
4540 be_max_qp_irqs(adapter));
4541 adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4542 return 0;
4543 }
4544
4545 static int be_get_config(struct be_adapter *adapter)
4546 {
4547 int status, level;
4548 u16 profile_id;
4549
4550 status = be_cmd_get_cntl_attributes(adapter);
4551 if (status)
4552 return status;
4553
4554 status = be_cmd_query_fw_cfg(adapter);
4555 if (status)
4556 return status;
4557
4558 if (!lancer_chip(adapter) && be_physfn(adapter))
4559 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4560
4561 if (BEx_chip(adapter)) {
4562 level = be_cmd_get_fw_log_level(adapter);
4563 adapter->msg_enable =
4564 level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4565 }
4566
4567 be_cmd_get_acpi_wol_cap(adapter);
4568 pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4569 pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4570
4571 be_cmd_query_port_name(adapter);
4572
4573 if (be_physfn(adapter)) {
4574 status = be_cmd_get_active_profile(adapter, &profile_id);
4575 if (!status)
4576 dev_info(&adapter->pdev->dev,
4577 "Using profile 0x%x\n", profile_id);
4578 }
4579
4580 return 0;
4581 }
4582
4583 static int be_mac_setup(struct be_adapter *adapter)
4584 {
4585 u8 mac[ETH_ALEN];
4586 int status;
4587
4588 if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4589 status = be_cmd_get_perm_mac(adapter, mac);
4590 if (status)
4591 return status;
4592
4593 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4594 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4595
4596 /* Initial MAC for BE3 VFs is already programmed by PF */
4597 if (BEx_chip(adapter) && be_virtfn(adapter))
4598 memcpy(adapter->dev_mac, mac, ETH_ALEN);
4599 }
4600
4601 return 0;
4602 }
4603
4604 static void be_schedule_worker(struct be_adapter *adapter)
4605 {
4606 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4607 adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4608 }
4609
4610 static void be_destroy_err_recovery_workq(void)
4611 {
4612 if (!be_err_recovery_workq)
4613 return;
4614
4615 flush_workqueue(be_err_recovery_workq);
4616 destroy_workqueue(be_err_recovery_workq);
4617 be_err_recovery_workq = NULL;
4618 }
4619
4620 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4621 {
4622 struct be_error_recovery *err_rec = &adapter->error_recovery;
4623
4624 if (!be_err_recovery_workq)
4625 return;
4626
4627 queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4628 msecs_to_jiffies(delay));
4629 adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4630 }
4631
4632 static int be_setup_queues(struct be_adapter *adapter)
4633 {
4634 struct net_device *netdev = adapter->netdev;
4635 int status;
4636
4637 status = be_evt_queues_create(adapter);
4638 if (status)
4639 goto err;
4640
4641 status = be_tx_qs_create(adapter);
4642 if (status)
4643 goto err;
4644
4645 status = be_rx_cqs_create(adapter);
4646 if (status)
4647 goto err;
4648
4649 status = be_mcc_queues_create(adapter);
4650 if (status)
4651 goto err;
4652
4653 status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4654 if (status)
4655 goto err;
4656
4657 status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4658 if (status)
4659 goto err;
4660
4661 return 0;
4662 err:
4663 dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4664 return status;
4665 }
4666
4667 static int be_if_create(struct be_adapter *adapter)
4668 {
4669 u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4670 u32 cap_flags = be_if_cap_flags(adapter);
4671 int status;
4672
4673 /* alloc required memory for other filtering fields */
4674 adapter->pmac_id = kcalloc(be_max_uc(adapter),
4675 sizeof(*adapter->pmac_id), GFP_KERNEL);
4676 if (!adapter->pmac_id)
4677 return -ENOMEM;
4678
4679 adapter->mc_list = kcalloc(be_max_mc(adapter),
4680 sizeof(*adapter->mc_list), GFP_KERNEL);
4681 if (!adapter->mc_list)
4682 return -ENOMEM;
4683
4684 adapter->uc_list = kcalloc(be_max_uc(adapter),
4685 sizeof(*adapter->uc_list), GFP_KERNEL);
4686 if (!adapter->uc_list)
4687 return -ENOMEM;
4688
4689 if (adapter->cfg_num_rx_irqs == 1)
4690 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4691
4692 en_flags &= cap_flags;
4693 /* will enable all the needed filter flags in be_open() */
4694 status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4695 &adapter->if_handle, 0);
4696
4697 if (status)
4698 return status;
4699
4700 return 0;
4701 }
4702
4703 int be_update_queues(struct be_adapter *adapter)
4704 {
4705 struct net_device *netdev = adapter->netdev;
4706 int status;
4707
4708 if (netif_running(netdev))
4709 be_close(netdev);
4710
4711 be_cancel_worker(adapter);
4712
4713 /* If any vectors have been shared with RoCE we cannot re-program
4714 * the MSIx table.
4715 */
4716 if (!adapter->num_msix_roce_vec)
4717 be_msix_disable(adapter);
4718
4719 be_clear_queues(adapter);
4720 status = be_cmd_if_destroy(adapter, adapter->if_handle, 0);
4721 if (status)
4722 return status;
4723
4724 if (!msix_enabled(adapter)) {
4725 status = be_msix_enable(adapter);
4726 if (status)
4727 return status;
4728 }
4729
4730 status = be_if_create(adapter);
4731 if (status)
4732 return status;
4733
4734 status = be_setup_queues(adapter);
4735 if (status)
4736 return status;
4737
4738 be_schedule_worker(adapter);
4739
4740 /* The IF was destroyed and re-created. We need to clear
4741 * all promiscuous flags valid for the destroyed IF.
4742 * Without this promisc mode is not restored during
4743 * be_open() because the driver thinks that it is
4744 * already enabled in HW.
4745 */
4746 adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4747
4748 if (netif_running(netdev))
4749 status = be_open(netdev);
4750
4751 return status;
4752 }
4753
4754 static inline int fw_major_num(const char *fw_ver)
4755 {
4756 int fw_major = 0, i;
4757
4758 i = sscanf(fw_ver, "%d.", &fw_major);
4759 if (i != 1)
4760 return 0;
4761
4762 return fw_major;
4763 }
4764
4765 /* If it is error recovery, FLR the PF
4766 * Else if any VFs are already enabled don't FLR the PF
4767 */
4768 static bool be_reset_required(struct be_adapter *adapter)
4769 {
4770 if (be_error_recovering(adapter))
4771 return true;
4772 else
4773 return pci_num_vf(adapter->pdev) == 0;
4774 }
4775
4776 /* Wait for the FW to be ready and perform the required initialization */
4777 static int be_func_init(struct be_adapter *adapter)
4778 {
4779 int status;
4780
4781 status = be_fw_wait_ready(adapter);
4782 if (status)
4783 return status;
4784
4785 /* FW is now ready; clear errors to allow cmds/doorbell */
4786 be_clear_error(adapter, BE_CLEAR_ALL);
4787
4788 if (be_reset_required(adapter)) {
4789 status = be_cmd_reset_function(adapter);
4790 if (status)
4791 return status;
4792
4793 /* Wait for interrupts to quiesce after an FLR */
4794 msleep(100);
4795 }
4796
4797 /* Tell FW we're ready to fire cmds */
4798 status = be_cmd_fw_init(adapter);
4799 if (status)
4800 return status;
4801
4802 /* Allow interrupts for other ULPs running on NIC function */
4803 be_intr_set(adapter, true);
4804
4805 return 0;
4806 }
4807
4808 static int be_setup(struct be_adapter *adapter)
4809 {
4810 struct device *dev = &adapter->pdev->dev;
4811 int status;
4812
4813 status = be_func_init(adapter);
4814 if (status)
4815 return status;
4816
4817 be_setup_init(adapter);
4818
4819 if (!lancer_chip(adapter))
4820 be_cmd_req_native_mode(adapter);
4821
4822 /* invoke this cmd first to get pf_num and vf_num which are needed
4823 * for issuing profile related cmds
4824 */
4825 if (!BEx_chip(adapter)) {
4826 status = be_cmd_get_func_config(adapter, NULL);
4827 if (status)
4828 return status;
4829 }
4830
4831 status = be_get_config(adapter);
4832 if (status)
4833 goto err;
4834
4835 if (!BE2_chip(adapter) && be_physfn(adapter))
4836 be_alloc_sriov_res(adapter);
4837
4838 status = be_get_resources(adapter);
4839 if (status)
4840 goto err;
4841
4842 status = be_msix_enable(adapter);
4843 if (status)
4844 goto err;
4845
4846 /* will enable all the needed filter flags in be_open() */
4847 status = be_if_create(adapter);
4848 if (status)
4849 goto err;
4850
4851 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4852 rtnl_lock();
4853 status = be_setup_queues(adapter);
4854 rtnl_unlock();
4855 if (status)
4856 goto err;
4857
4858 be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4859
4860 status = be_mac_setup(adapter);
4861 if (status)
4862 goto err;
4863
4864 be_cmd_get_fw_ver(adapter);
4865 dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4866
4867 if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4868 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4869 adapter->fw_ver);
4870 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4871 }
4872
4873 status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4874 adapter->rx_fc);
4875 if (status)
4876 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4877 &adapter->rx_fc);
4878
4879 dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4880 adapter->tx_fc, adapter->rx_fc);
4881
4882 if (be_physfn(adapter))
4883 be_cmd_set_logical_link_config(adapter,
4884 IFLA_VF_LINK_STATE_AUTO, 0);
4885
4886 /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4887 * confusing a linux bridge or OVS that it might be connected to.
4888 * Set the EVB to PASSTHRU mode which effectively disables the EVB
4889 * when SRIOV is not enabled.
4890 */
4891 if (BE3_chip(adapter))
4892 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4893 PORT_FWD_TYPE_PASSTHRU, 0);
4894
4895 if (adapter->num_vfs)
4896 be_vf_setup(adapter);
4897
4898 status = be_cmd_get_phy_info(adapter);
4899 if (!status && be_pause_supported(adapter))
4900 adapter->phy.fc_autoneg = 1;
4901
4902 if (be_physfn(adapter) && !lancer_chip(adapter))
4903 be_cmd_set_features(adapter);
4904
4905 be_schedule_worker(adapter);
4906 adapter->flags |= BE_FLAGS_SETUP_DONE;
4907 return 0;
4908 err:
4909 be_clear(adapter);
4910 return status;
4911 }
4912
4913 #ifdef CONFIG_NET_POLL_CONTROLLER
4914 static void be_netpoll(struct net_device *netdev)
4915 {
4916 struct be_adapter *adapter = netdev_priv(netdev);
4917 struct be_eq_obj *eqo;
4918 int i;
4919
4920 for_all_evt_queues(adapter, eqo, i) {
4921 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4922 napi_schedule(&eqo->napi);
4923 }
4924 }
4925 #endif
4926
4927 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4928 {
4929 const struct firmware *fw;
4930 int status;
4931
4932 if (!netif_running(adapter->netdev)) {
4933 dev_err(&adapter->pdev->dev,
4934 "Firmware load not allowed (interface is down)\n");
4935 return -ENETDOWN;
4936 }
4937
4938 status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4939 if (status)
4940 goto fw_exit;
4941
4942 dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4943
4944 if (lancer_chip(adapter))
4945 status = lancer_fw_download(adapter, fw);
4946 else
4947 status = be_fw_download(adapter, fw);
4948
4949 if (!status)
4950 be_cmd_get_fw_ver(adapter);
4951
4952 fw_exit:
4953 release_firmware(fw);
4954 return status;
4955 }
4956
4957 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4958 u16 flags, struct netlink_ext_ack *extack)
4959 {
4960 struct be_adapter *adapter = netdev_priv(dev);
4961 struct nlattr *attr, *br_spec;
4962 int rem;
4963 int status = 0;
4964 u16 mode = 0;
4965
4966 if (!sriov_enabled(adapter))
4967 return -EOPNOTSUPP;
4968
4969 br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4970 if (!br_spec)
4971 return -EINVAL;
4972
4973 nla_for_each_nested(attr, br_spec, rem) {
4974 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4975 continue;
4976
4977 if (nla_len(attr) < sizeof(mode))
4978 return -EINVAL;
4979
4980 mode = nla_get_u16(attr);
4981 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4982 return -EOPNOTSUPP;
4983
4984 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4985 return -EINVAL;
4986
4987 status = be_cmd_set_hsw_config(adapter, 0, 0,
4988 adapter->if_handle,
4989 mode == BRIDGE_MODE_VEPA ?
4990 PORT_FWD_TYPE_VEPA :
4991 PORT_FWD_TYPE_VEB, 0);
4992 if (status)
4993 goto err;
4994
4995 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4996 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4997
4998 return status;
4999 }
5000 err:
5001 dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5002 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5003
5004 return status;
5005 }
5006
5007 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5008 struct net_device *dev, u32 filter_mask,
5009 int nlflags)
5010 {
5011 struct be_adapter *adapter = netdev_priv(dev);
5012 int status = 0;
5013 u8 hsw_mode;
5014
5015 /* BE and Lancer chips support VEB mode only */
5016 if (BEx_chip(adapter) || lancer_chip(adapter)) {
5017 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5018 if (!pci_sriov_get_totalvfs(adapter->pdev))
5019 return 0;
5020 hsw_mode = PORT_FWD_TYPE_VEB;
5021 } else {
5022 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5023 adapter->if_handle, &hsw_mode,
5024 NULL);
5025 if (status)
5026 return 0;
5027
5028 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5029 return 0;
5030 }
5031
5032 return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5033 hsw_mode == PORT_FWD_TYPE_VEPA ?
5034 BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5035 0, 0, nlflags, filter_mask, NULL);
5036 }
5037
5038 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5039 void (*func)(struct work_struct *))
5040 {
5041 struct be_cmd_work *work;
5042
5043 work = kzalloc(sizeof(*work), GFP_ATOMIC);
5044 if (!work) {
5045 dev_err(&adapter->pdev->dev,
5046 "be_work memory allocation failed\n");
5047 return NULL;
5048 }
5049
5050 INIT_WORK(&work->work, func);
5051 work->adapter = adapter;
5052 return work;
5053 }
5054
5055 /* VxLAN offload Notes:
5056 *
5057 * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5058 * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5059 * is expected to work across all types of IP tunnels once exported. Skyhawk
5060 * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5061 * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5062 * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5063 * those other tunnels are unexported on the fly through ndo_features_check().
5064 *
5065 * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5066 * adds more than one port, disable offloads and re-enable them again when
5067 * there's only one port left. We maintain a list of ports for this purpose.
5068 */
5069 static void be_work_add_vxlan_port(struct work_struct *work)
5070 {
5071 struct be_cmd_work *cmd_work =
5072 container_of(work, struct be_cmd_work, work);
5073 struct be_adapter *adapter = cmd_work->adapter;
5074 struct device *dev = &adapter->pdev->dev;
5075 __be16 port = cmd_work->info.vxlan_port;
5076 struct be_vxlan_port *vxlan_port;
5077 int status;
5078
5079 /* Bump up the alias count if it is an existing port */
5080 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5081 if (vxlan_port->port == port) {
5082 vxlan_port->port_aliases++;
5083 goto done;
5084 }
5085 }
5086
5087 /* Add a new port to our list. We don't need a lock here since port
5088 * add/delete are done only in the context of a single-threaded work
5089 * queue (be_wq).
5090 */
5091 vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5092 if (!vxlan_port)
5093 goto done;
5094
5095 vxlan_port->port = port;
5096 INIT_LIST_HEAD(&vxlan_port->list);
5097 list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5098 adapter->vxlan_port_count++;
5099
5100 if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5101 dev_info(dev,
5102 "Only one UDP port supported for VxLAN offloads\n");
5103 dev_info(dev, "Disabling VxLAN offloads\n");
5104 goto err;
5105 }
5106
5107 if (adapter->vxlan_port_count > 1)
5108 goto done;
5109
5110 status = be_enable_vxlan_offloads(adapter);
5111 if (!status)
5112 goto done;
5113
5114 err:
5115 be_disable_vxlan_offloads(adapter);
5116 done:
5117 kfree(cmd_work);
5118 return;
5119 }
5120
5121 static void be_work_del_vxlan_port(struct work_struct *work)
5122 {
5123 struct be_cmd_work *cmd_work =
5124 container_of(work, struct be_cmd_work, work);
5125 struct be_adapter *adapter = cmd_work->adapter;
5126 __be16 port = cmd_work->info.vxlan_port;
5127 struct be_vxlan_port *vxlan_port;
5128
5129 /* Nothing to be done if a port alias is being deleted */
5130 list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5131 if (vxlan_port->port == port) {
5132 if (vxlan_port->port_aliases) {
5133 vxlan_port->port_aliases--;
5134 goto done;
5135 }
5136 break;
5137 }
5138 }
5139
5140 /* No port aliases left; delete the port from the list */
5141 list_del(&vxlan_port->list);
5142 adapter->vxlan_port_count--;
5143
5144 /* Disable VxLAN offload if this is the offloaded port */
5145 if (adapter->vxlan_port == vxlan_port->port) {
5146 WARN_ON(adapter->vxlan_port_count);
5147 be_disable_vxlan_offloads(adapter);
5148 dev_info(&adapter->pdev->dev,
5149 "Disabled VxLAN offloads for UDP port %d\n",
5150 be16_to_cpu(port));
5151 goto out;
5152 }
5153
5154 /* If only 1 port is left, re-enable VxLAN offload */
5155 if (adapter->vxlan_port_count == 1)
5156 be_enable_vxlan_offloads(adapter);
5157
5158 out:
5159 kfree(vxlan_port);
5160 done:
5161 kfree(cmd_work);
5162 }
5163
5164 static void be_cfg_vxlan_port(struct net_device *netdev,
5165 struct udp_tunnel_info *ti,
5166 void (*func)(struct work_struct *))
5167 {
5168 struct be_adapter *adapter = netdev_priv(netdev);
5169 struct be_cmd_work *cmd_work;
5170
5171 if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5172 return;
5173
5174 if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5175 return;
5176
5177 cmd_work = be_alloc_work(adapter, func);
5178 if (cmd_work) {
5179 cmd_work->info.vxlan_port = ti->port;
5180 queue_work(be_wq, &cmd_work->work);
5181 }
5182 }
5183
5184 static void be_del_vxlan_port(struct net_device *netdev,
5185 struct udp_tunnel_info *ti)
5186 {
5187 be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5188 }
5189
5190 static void be_add_vxlan_port(struct net_device *netdev,
5191 struct udp_tunnel_info *ti)
5192 {
5193 be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5194 }
5195
5196 static netdev_features_t be_features_check(struct sk_buff *skb,
5197 struct net_device *dev,
5198 netdev_features_t features)
5199 {
5200 struct be_adapter *adapter = netdev_priv(dev);
5201 u8 l4_hdr = 0;
5202
5203 if (skb_is_gso(skb)) {
5204 /* IPv6 TSO requests with extension hdrs are a problem
5205 * to Lancer and BE3 HW. Disable TSO6 feature.
5206 */
5207 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5208 features &= ~NETIF_F_TSO6;
5209
5210 /* Lancer cannot handle the packet with MSS less than 256.
5211 * Also it can't handle a TSO packet with a single segment
5212 * Disable the GSO support in such cases
5213 */
5214 if (lancer_chip(adapter) &&
5215 (skb_shinfo(skb)->gso_size < 256 ||
5216 skb_shinfo(skb)->gso_segs == 1))
5217 features &= ~NETIF_F_GSO_MASK;
5218 }
5219
5220 /* The code below restricts offload features for some tunneled and
5221 * Q-in-Q packets.
5222 * Offload features for normal (non tunnel) packets are unchanged.
5223 */
5224 features = vlan_features_check(skb, features);
5225 if (!skb->encapsulation ||
5226 !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5227 return features;
5228
5229 /* It's an encapsulated packet and VxLAN offloads are enabled. We
5230 * should disable tunnel offload features if it's not a VxLAN packet,
5231 * as tunnel offloads have been enabled only for VxLAN. This is done to
5232 * allow other tunneled traffic like GRE work fine while VxLAN
5233 * offloads are configured in Skyhawk-R.
5234 */
5235 switch (vlan_get_protocol(skb)) {
5236 case htons(ETH_P_IP):
5237 l4_hdr = ip_hdr(skb)->protocol;
5238 break;
5239 case htons(ETH_P_IPV6):
5240 l4_hdr = ipv6_hdr(skb)->nexthdr;
5241 break;
5242 default:
5243 return features;
5244 }
5245
5246 if (l4_hdr != IPPROTO_UDP ||
5247 skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5248 skb->inner_protocol != htons(ETH_P_TEB) ||
5249 skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5250 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5251 !adapter->vxlan_port ||
5252 udp_hdr(skb)->dest != adapter->vxlan_port)
5253 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5254
5255 return features;
5256 }
5257
5258 static int be_get_phys_port_id(struct net_device *dev,
5259 struct netdev_phys_item_id *ppid)
5260 {
5261 int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5262 struct be_adapter *adapter = netdev_priv(dev);
5263 u8 *id;
5264
5265 if (MAX_PHYS_ITEM_ID_LEN < id_len)
5266 return -ENOSPC;
5267
5268 ppid->id[0] = adapter->hba_port_num + 1;
5269 id = &ppid->id[1];
5270 for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5271 i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5272 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5273
5274 ppid->id_len = id_len;
5275
5276 return 0;
5277 }
5278
5279 static void be_set_rx_mode(struct net_device *dev)
5280 {
5281 struct be_adapter *adapter = netdev_priv(dev);
5282 struct be_cmd_work *work;
5283
5284 work = be_alloc_work(adapter, be_work_set_rx_mode);
5285 if (work)
5286 queue_work(be_wq, &work->work);
5287 }
5288
5289 static const struct net_device_ops be_netdev_ops = {
5290 .ndo_open = be_open,
5291 .ndo_stop = be_close,
5292 .ndo_start_xmit = be_xmit,
5293 .ndo_set_rx_mode = be_set_rx_mode,
5294 .ndo_set_mac_address = be_mac_addr_set,
5295 .ndo_get_stats64 = be_get_stats64,
5296 .ndo_validate_addr = eth_validate_addr,
5297 .ndo_vlan_rx_add_vid = be_vlan_add_vid,
5298 .ndo_vlan_rx_kill_vid = be_vlan_rem_vid,
5299 .ndo_set_vf_mac = be_set_vf_mac,
5300 .ndo_set_vf_vlan = be_set_vf_vlan,
5301 .ndo_set_vf_rate = be_set_vf_tx_rate,
5302 .ndo_get_vf_config = be_get_vf_config,
5303 .ndo_set_vf_link_state = be_set_vf_link_state,
5304 .ndo_set_vf_spoofchk = be_set_vf_spoofchk,
5305 .ndo_tx_timeout = be_tx_timeout,
5306 #ifdef CONFIG_NET_POLL_CONTROLLER
5307 .ndo_poll_controller = be_netpoll,
5308 #endif
5309 .ndo_bridge_setlink = be_ndo_bridge_setlink,
5310 .ndo_bridge_getlink = be_ndo_bridge_getlink,
5311 .ndo_udp_tunnel_add = be_add_vxlan_port,
5312 .ndo_udp_tunnel_del = be_del_vxlan_port,
5313 .ndo_features_check = be_features_check,
5314 .ndo_get_phys_port_id = be_get_phys_port_id,
5315 };
5316
5317 static void be_netdev_init(struct net_device *netdev)
5318 {
5319 struct be_adapter *adapter = netdev_priv(netdev);
5320
5321 netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5322 NETIF_F_GSO_UDP_TUNNEL |
5323 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5324 NETIF_F_HW_VLAN_CTAG_TX;
5325 if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5326 netdev->hw_features |= NETIF_F_RXHASH;
5327
5328 netdev->features |= netdev->hw_features |
5329 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5330
5331 netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5332 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5333
5334 netdev->priv_flags |= IFF_UNICAST_FLT;
5335
5336 netdev->flags |= IFF_MULTICAST;
5337
5338 netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5339
5340 netdev->netdev_ops = &be_netdev_ops;
5341
5342 netdev->ethtool_ops = &be_ethtool_ops;
5343
5344 /* MTU range: 256 - 9000 */
5345 netdev->min_mtu = BE_MIN_MTU;
5346 netdev->max_mtu = BE_MAX_MTU;
5347 }
5348
5349 static void be_cleanup(struct be_adapter *adapter)
5350 {
5351 struct net_device *netdev = adapter->netdev;
5352
5353 rtnl_lock();
5354 netif_device_detach(netdev);
5355 if (netif_running(netdev))
5356 be_close(netdev);
5357 rtnl_unlock();
5358
5359 be_clear(adapter);
5360 }
5361
5362 static int be_resume(struct be_adapter *adapter)
5363 {
5364 struct net_device *netdev = adapter->netdev;
5365 int status;
5366
5367 status = be_setup(adapter);
5368 if (status)
5369 return status;
5370
5371 rtnl_lock();
5372 if (netif_running(netdev))
5373 status = be_open(netdev);
5374 rtnl_unlock();
5375
5376 if (status)
5377 return status;
5378
5379 netif_device_attach(netdev);
5380
5381 return 0;
5382 }
5383
5384 static void be_soft_reset(struct be_adapter *adapter)
5385 {
5386 u32 val;
5387
5388 dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5389 val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5390 val |= SLIPORT_SOFTRESET_SR_MASK;
5391 iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5392 }
5393
5394 static bool be_err_is_recoverable(struct be_adapter *adapter)
5395 {
5396 struct be_error_recovery *err_rec = &adapter->error_recovery;
5397 unsigned long initial_idle_time =
5398 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5399 unsigned long recovery_interval =
5400 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5401 u16 ue_err_code;
5402 u32 val;
5403
5404 val = be_POST_stage_get(adapter);
5405 if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5406 return false;
5407 ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5408 if (ue_err_code == 0)
5409 return false;
5410
5411 dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5412 ue_err_code);
5413
5414 if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5415 dev_err(&adapter->pdev->dev,
5416 "Cannot recover within %lu sec from driver load\n",
5417 jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5418 return false;
5419 }
5420
5421 if (err_rec->last_recovery_time && time_before_eq(
5422 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5423 dev_err(&adapter->pdev->dev,
5424 "Cannot recover within %lu sec from last recovery\n",
5425 jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5426 return false;
5427 }
5428
5429 if (ue_err_code == err_rec->last_err_code) {
5430 dev_err(&adapter->pdev->dev,
5431 "Cannot recover from a consecutive TPE error\n");
5432 return false;
5433 }
5434
5435 err_rec->last_recovery_time = jiffies;
5436 err_rec->last_err_code = ue_err_code;
5437 return true;
5438 }
5439
5440 static int be_tpe_recover(struct be_adapter *adapter)
5441 {
5442 struct be_error_recovery *err_rec = &adapter->error_recovery;
5443 int status = -EAGAIN;
5444 u32 val;
5445
5446 switch (err_rec->recovery_state) {
5447 case ERR_RECOVERY_ST_NONE:
5448 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5449 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5450 break;
5451
5452 case ERR_RECOVERY_ST_DETECT:
5453 val = be_POST_stage_get(adapter);
5454 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5455 POST_STAGE_RECOVERABLE_ERR) {
5456 dev_err(&adapter->pdev->dev,
5457 "Unrecoverable HW error detected: 0x%x\n", val);
5458 status = -EINVAL;
5459 err_rec->resched_delay = 0;
5460 break;
5461 }
5462
5463 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5464
5465 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5466 * milliseconds before it checks for final error status in
5467 * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5468 * If it does, then PF0 initiates a Soft Reset.
5469 */
5470 if (adapter->pf_num == 0) {
5471 err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5472 err_rec->resched_delay = err_rec->ue_to_reset_time -
5473 ERR_RECOVERY_UE_DETECT_DURATION;
5474 break;
5475 }
5476
5477 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5478 err_rec->resched_delay = err_rec->ue_to_poll_time -
5479 ERR_RECOVERY_UE_DETECT_DURATION;
5480 break;
5481
5482 case ERR_RECOVERY_ST_RESET:
5483 if (!be_err_is_recoverable(adapter)) {
5484 dev_err(&adapter->pdev->dev,
5485 "Failed to meet recovery criteria\n");
5486 status = -EIO;
5487 err_rec->resched_delay = 0;
5488 break;
5489 }
5490 be_soft_reset(adapter);
5491 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5492 err_rec->resched_delay = err_rec->ue_to_poll_time -
5493 err_rec->ue_to_reset_time;
5494 break;
5495
5496 case ERR_RECOVERY_ST_PRE_POLL:
5497 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5498 err_rec->resched_delay = 0;
5499 status = 0; /* done */
5500 break;
5501
5502 default:
5503 status = -EINVAL;
5504 err_rec->resched_delay = 0;
5505 break;
5506 }
5507
5508 return status;
5509 }
5510
5511 static int be_err_recover(struct be_adapter *adapter)
5512 {
5513 int status;
5514
5515 if (!lancer_chip(adapter)) {
5516 if (!adapter->error_recovery.recovery_supported ||
5517 adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5518 return -EIO;
5519 status = be_tpe_recover(adapter);
5520 if (status)
5521 goto err;
5522 }
5523
5524 /* Wait for adapter to reach quiescent state before
5525 * destroying queues
5526 */
5527 status = be_fw_wait_ready(adapter);
5528 if (status)
5529 goto err;
5530
5531 adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5532
5533 be_cleanup(adapter);
5534
5535 status = be_resume(adapter);
5536 if (status)
5537 goto err;
5538
5539 adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5540
5541 err:
5542 return status;
5543 }
5544
5545 static void be_err_detection_task(struct work_struct *work)
5546 {
5547 struct be_error_recovery *err_rec =
5548 container_of(work, struct be_error_recovery,
5549 err_detection_work.work);
5550 struct be_adapter *adapter =
5551 container_of(err_rec, struct be_adapter,
5552 error_recovery);
5553 u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5554 struct device *dev = &adapter->pdev->dev;
5555 int recovery_status;
5556
5557 be_detect_error(adapter);
5558 if (!be_check_error(adapter, BE_ERROR_HW))
5559 goto reschedule_task;
5560
5561 recovery_status = be_err_recover(adapter);
5562 if (!recovery_status) {
5563 err_rec->recovery_retries = 0;
5564 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5565 dev_info(dev, "Adapter recovery successful\n");
5566 goto reschedule_task;
5567 } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5568 /* BEx/SH recovery state machine */
5569 if (adapter->pf_num == 0 &&
5570 err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5571 dev_err(&adapter->pdev->dev,
5572 "Adapter recovery in progress\n");
5573 resched_delay = err_rec->resched_delay;
5574 goto reschedule_task;
5575 } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5576 /* For VFs, check if PF have allocated resources
5577 * every second.
5578 */
5579 dev_err(dev, "Re-trying adapter recovery\n");
5580 goto reschedule_task;
5581 } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5582 ERR_RECOVERY_MAX_RETRY_COUNT) {
5583 /* In case of another error during recovery, it takes 30 sec
5584 * for adapter to come out of error. Retry error recovery after
5585 * this time interval.
5586 */
5587 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5588 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5589 goto reschedule_task;
5590 } else {
5591 dev_err(dev, "Adapter recovery failed\n");
5592 dev_err(dev, "Please reboot server to recover\n");
5593 }
5594
5595 return;
5596
5597 reschedule_task:
5598 be_schedule_err_detection(adapter, resched_delay);
5599 }
5600
5601 static void be_log_sfp_info(struct be_adapter *adapter)
5602 {
5603 int status;
5604
5605 status = be_cmd_query_sfp_info(adapter);
5606 if (!status) {
5607 dev_err(&adapter->pdev->dev,
5608 "Port %c: %s Vendor: %s part no: %s",
5609 adapter->port_name,
5610 be_misconfig_evt_port_state[adapter->phy_state],
5611 adapter->phy.vendor_name,
5612 adapter->phy.vendor_pn);
5613 }
5614 adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5615 }
5616
5617 static void be_worker(struct work_struct *work)
5618 {
5619 struct be_adapter *adapter =
5620 container_of(work, struct be_adapter, work.work);
5621 struct be_rx_obj *rxo;
5622 int i;
5623
5624 if (be_physfn(adapter) &&
5625 MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5626 be_cmd_get_die_temperature(adapter);
5627
5628 /* when interrupts are not yet enabled, just reap any pending
5629 * mcc completions
5630 */
5631 if (!netif_running(adapter->netdev)) {
5632 local_bh_disable();
5633 be_process_mcc(adapter);
5634 local_bh_enable();
5635 goto reschedule;
5636 }
5637
5638 if (!adapter->stats_cmd_sent) {
5639 if (lancer_chip(adapter))
5640 lancer_cmd_get_pport_stats(adapter,
5641 &adapter->stats_cmd);
5642 else
5643 be_cmd_get_stats(adapter, &adapter->stats_cmd);
5644 }
5645
5646 for_all_rx_queues(adapter, rxo, i) {
5647 /* Replenish RX-queues starved due to memory
5648 * allocation failures.
5649 */
5650 if (rxo->rx_post_starved)
5651 be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5652 }
5653
5654 /* EQ-delay update for Skyhawk is done while notifying EQ */
5655 if (!skyhawk_chip(adapter))
5656 be_eqd_update(adapter, false);
5657
5658 if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5659 be_log_sfp_info(adapter);
5660
5661 reschedule:
5662 adapter->work_counter++;
5663 queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5664 }
5665
5666 static void be_unmap_pci_bars(struct be_adapter *adapter)
5667 {
5668 if (adapter->csr)
5669 pci_iounmap(adapter->pdev, adapter->csr);
5670 if (adapter->db)
5671 pci_iounmap(adapter->pdev, adapter->db);
5672 if (adapter->pcicfg && adapter->pcicfg_mapped)
5673 pci_iounmap(adapter->pdev, adapter->pcicfg);
5674 }
5675
5676 static int db_bar(struct be_adapter *adapter)
5677 {
5678 if (lancer_chip(adapter) || be_virtfn(adapter))
5679 return 0;
5680 else
5681 return 4;
5682 }
5683
5684 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5685 {
5686 if (skyhawk_chip(adapter)) {
5687 adapter->roce_db.size = 4096;
5688 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5689 db_bar(adapter));
5690 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5691 db_bar(adapter));
5692 }
5693 return 0;
5694 }
5695
5696 static int be_map_pci_bars(struct be_adapter *adapter)
5697 {
5698 struct pci_dev *pdev = adapter->pdev;
5699 u8 __iomem *addr;
5700 u32 sli_intf;
5701
5702 pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5703 adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5704 SLI_INTF_FAMILY_SHIFT;
5705 adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5706
5707 if (BEx_chip(adapter) && be_physfn(adapter)) {
5708 adapter->csr = pci_iomap(pdev, 2, 0);
5709 if (!adapter->csr)
5710 return -ENOMEM;
5711 }
5712
5713 addr = pci_iomap(pdev, db_bar(adapter), 0);
5714 if (!addr)
5715 goto pci_map_err;
5716 adapter->db = addr;
5717
5718 if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5719 if (be_physfn(adapter)) {
5720 /* PCICFG is the 2nd BAR in BE2 */
5721 addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5722 if (!addr)
5723 goto pci_map_err;
5724 adapter->pcicfg = addr;
5725 adapter->pcicfg_mapped = true;
5726 } else {
5727 adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5728 adapter->pcicfg_mapped = false;
5729 }
5730 }
5731
5732 be_roce_map_pci_bars(adapter);
5733 return 0;
5734
5735 pci_map_err:
5736 dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5737 be_unmap_pci_bars(adapter);
5738 return -ENOMEM;
5739 }
5740
5741 static void be_drv_cleanup(struct be_adapter *adapter)
5742 {
5743 struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5744 struct device *dev = &adapter->pdev->dev;
5745
5746 if (mem->va)
5747 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748
5749 mem = &adapter->rx_filter;
5750 if (mem->va)
5751 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752
5753 mem = &adapter->stats_cmd;
5754 if (mem->va)
5755 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5756 }
5757
5758 /* Allocate and initialize various fields in be_adapter struct */
5759 static int be_drv_init(struct be_adapter *adapter)
5760 {
5761 struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5762 struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5763 struct be_dma_mem *rx_filter = &adapter->rx_filter;
5764 struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5765 struct device *dev = &adapter->pdev->dev;
5766 int status = 0;
5767
5768 mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5769 mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
5770 &mbox_mem_alloc->dma,
5771 GFP_KERNEL);
5772 if (!mbox_mem_alloc->va)
5773 return -ENOMEM;
5774
5775 mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5776 mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5777 mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5778
5779 rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5780 rx_filter->va = dma_alloc_coherent(dev, rx_filter->size,
5781 &rx_filter->dma, GFP_KERNEL);
5782 if (!rx_filter->va) {
5783 status = -ENOMEM;
5784 goto free_mbox;
5785 }
5786
5787 if (lancer_chip(adapter))
5788 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5789 else if (BE2_chip(adapter))
5790 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5791 else if (BE3_chip(adapter))
5792 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5793 else
5794 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5795 stats_cmd->va = dma_alloc_coherent(dev, stats_cmd->size,
5796 &stats_cmd->dma, GFP_KERNEL);
5797 if (!stats_cmd->va) {
5798 status = -ENOMEM;
5799 goto free_rx_filter;
5800 }
5801
5802 mutex_init(&adapter->mbox_lock);
5803 mutex_init(&adapter->mcc_lock);
5804 mutex_init(&adapter->rx_filter_lock);
5805 spin_lock_init(&adapter->mcc_cq_lock);
5806 init_completion(&adapter->et_cmd_compl);
5807
5808 pci_save_state(adapter->pdev);
5809
5810 INIT_DELAYED_WORK(&adapter->work, be_worker);
5811
5812 adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5813 adapter->error_recovery.resched_delay = 0;
5814 INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5815 be_err_detection_task);
5816
5817 adapter->rx_fc = true;
5818 adapter->tx_fc = true;
5819
5820 /* Must be a power of 2 or else MODULO will BUG_ON */
5821 adapter->be_get_temp_freq = 64;
5822
5823 INIT_LIST_HEAD(&adapter->vxlan_port_list);
5824 return 0;
5825
5826 free_rx_filter:
5827 dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5828 free_mbox:
5829 dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5830 mbox_mem_alloc->dma);
5831 return status;
5832 }
5833
5834 static void be_remove(struct pci_dev *pdev)
5835 {
5836 struct be_adapter *adapter = pci_get_drvdata(pdev);
5837
5838 if (!adapter)
5839 return;
5840
5841 be_roce_dev_remove(adapter);
5842 be_intr_set(adapter, false);
5843
5844 be_cancel_err_detection(adapter);
5845
5846 unregister_netdev(adapter->netdev);
5847
5848 be_clear(adapter);
5849
5850 if (!pci_vfs_assigned(adapter->pdev))
5851 be_cmd_reset_function(adapter);
5852
5853 /* tell fw we're done with firing cmds */
5854 be_cmd_fw_clean(adapter);
5855
5856 be_unmap_pci_bars(adapter);
5857 be_drv_cleanup(adapter);
5858
5859 pci_disable_pcie_error_reporting(pdev);
5860
5861 pci_release_regions(pdev);
5862 pci_disable_device(pdev);
5863
5864 free_netdev(adapter->netdev);
5865 }
5866
5867 static ssize_t be_hwmon_show_temp(struct device *dev,
5868 struct device_attribute *dev_attr,
5869 char *buf)
5870 {
5871 struct be_adapter *adapter = dev_get_drvdata(dev);
5872
5873 /* Unit: millidegree Celsius */
5874 if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5875 return -EIO;
5876 else
5877 return sprintf(buf, "%u\n",
5878 adapter->hwmon_info.be_on_die_temp * 1000);
5879 }
5880
5881 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5882 be_hwmon_show_temp, NULL, 1);
5883
5884 static struct attribute *be_hwmon_attrs[] = {
5885 &sensor_dev_attr_temp1_input.dev_attr.attr,
5886 NULL
5887 };
5888
5889 ATTRIBUTE_GROUPS(be_hwmon);
5890
5891 static char *mc_name(struct be_adapter *adapter)
5892 {
5893 char *str = ""; /* default */
5894
5895 switch (adapter->mc_type) {
5896 case UMC:
5897 str = "UMC";
5898 break;
5899 case FLEX10:
5900 str = "FLEX10";
5901 break;
5902 case vNIC1:
5903 str = "vNIC-1";
5904 break;
5905 case nPAR:
5906 str = "nPAR";
5907 break;
5908 case UFP:
5909 str = "UFP";
5910 break;
5911 case vNIC2:
5912 str = "vNIC-2";
5913 break;
5914 default:
5915 str = "";
5916 }
5917
5918 return str;
5919 }
5920
5921 static inline char *func_name(struct be_adapter *adapter)
5922 {
5923 return be_physfn(adapter) ? "PF" : "VF";
5924 }
5925
5926 static inline char *nic_name(struct pci_dev *pdev)
5927 {
5928 switch (pdev->device) {
5929 case OC_DEVICE_ID1:
5930 return OC_NAME;
5931 case OC_DEVICE_ID2:
5932 return OC_NAME_BE;
5933 case OC_DEVICE_ID3:
5934 case OC_DEVICE_ID4:
5935 return OC_NAME_LANCER;
5936 case BE_DEVICE_ID2:
5937 return BE3_NAME;
5938 case OC_DEVICE_ID5:
5939 case OC_DEVICE_ID6:
5940 return OC_NAME_SH;
5941 default:
5942 return BE_NAME;
5943 }
5944 }
5945
5946 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5947 {
5948 struct be_adapter *adapter;
5949 struct net_device *netdev;
5950 int status = 0;
5951
5952 dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5953
5954 status = pci_enable_device(pdev);
5955 if (status)
5956 goto do_none;
5957
5958 status = pci_request_regions(pdev, DRV_NAME);
5959 if (status)
5960 goto disable_dev;
5961 pci_set_master(pdev);
5962
5963 netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5964 if (!netdev) {
5965 status = -ENOMEM;
5966 goto rel_reg;
5967 }
5968 adapter = netdev_priv(netdev);
5969 adapter->pdev = pdev;
5970 pci_set_drvdata(pdev, adapter);
5971 adapter->netdev = netdev;
5972 SET_NETDEV_DEV(netdev, &pdev->dev);
5973
5974 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5975 if (!status) {
5976 netdev->features |= NETIF_F_HIGHDMA;
5977 } else {
5978 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5979 if (status) {
5980 dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5981 goto free_netdev;
5982 }
5983 }
5984
5985 status = pci_enable_pcie_error_reporting(pdev);
5986 if (!status)
5987 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5988
5989 status = be_map_pci_bars(adapter);
5990 if (status)
5991 goto free_netdev;
5992
5993 status = be_drv_init(adapter);
5994 if (status)
5995 goto unmap_bars;
5996
5997 status = be_setup(adapter);
5998 if (status)
5999 goto drv_cleanup;
6000
6001 be_netdev_init(netdev);
6002 status = register_netdev(netdev);
6003 if (status != 0)
6004 goto unsetup;
6005
6006 be_roce_dev_add(adapter);
6007
6008 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6009 adapter->error_recovery.probe_time = jiffies;
6010
6011 /* On Die temperature not supported for VF. */
6012 if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6013 adapter->hwmon_info.hwmon_dev =
6014 devm_hwmon_device_register_with_groups(&pdev->dev,
6015 DRV_NAME,
6016 adapter,
6017 be_hwmon_groups);
6018 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6019 }
6020
6021 dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6022 func_name(adapter), mc_name(adapter), adapter->port_name);
6023
6024 return 0;
6025
6026 unsetup:
6027 be_clear(adapter);
6028 drv_cleanup:
6029 be_drv_cleanup(adapter);
6030 unmap_bars:
6031 be_unmap_pci_bars(adapter);
6032 free_netdev:
6033 free_netdev(netdev);
6034 rel_reg:
6035 pci_release_regions(pdev);
6036 disable_dev:
6037 pci_disable_device(pdev);
6038 do_none:
6039 dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040 return status;
6041 }
6042
6043 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6044 {
6045 struct be_adapter *adapter = pci_get_drvdata(pdev);
6046
6047 be_intr_set(adapter, false);
6048 be_cancel_err_detection(adapter);
6049
6050 be_cleanup(adapter);
6051
6052 pci_save_state(pdev);
6053 pci_disable_device(pdev);
6054 pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055 return 0;
6056 }
6057
6058 static int be_pci_resume(struct pci_dev *pdev)
6059 {
6060 struct be_adapter *adapter = pci_get_drvdata(pdev);
6061 int status = 0;
6062
6063 status = pci_enable_device(pdev);
6064 if (status)
6065 return status;
6066
6067 pci_restore_state(pdev);
6068
6069 status = be_resume(adapter);
6070 if (status)
6071 return status;
6072
6073 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6074
6075 return 0;
6076 }
6077
6078 /*
6079 * An FLR will stop BE from DMAing any data.
6080 */
6081 static void be_shutdown(struct pci_dev *pdev)
6082 {
6083 struct be_adapter *adapter = pci_get_drvdata(pdev);
6084
6085 if (!adapter)
6086 return;
6087
6088 be_roce_dev_shutdown(adapter);
6089 cancel_delayed_work_sync(&adapter->work);
6090 be_cancel_err_detection(adapter);
6091
6092 netif_device_detach(adapter->netdev);
6093
6094 be_cmd_reset_function(adapter);
6095
6096 pci_disable_device(pdev);
6097 }
6098
6099 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100 pci_channel_state_t state)
6101 {
6102 struct be_adapter *adapter = pci_get_drvdata(pdev);
6103
6104 dev_err(&adapter->pdev->dev, "EEH error detected\n");
6105
6106 be_roce_dev_remove(adapter);
6107
6108 if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109 be_set_error(adapter, BE_ERROR_EEH);
6110
6111 be_cancel_err_detection(adapter);
6112
6113 be_cleanup(adapter);
6114 }
6115
6116 if (state == pci_channel_io_perm_failure)
6117 return PCI_ERS_RESULT_DISCONNECT;
6118
6119 pci_disable_device(pdev);
6120
6121 /* The error could cause the FW to trigger a flash debug dump.
6122 * Resetting the card while flash dump is in progress
6123 * can cause it not to recover; wait for it to finish.
6124 * Wait only for first function as it is needed only once per
6125 * adapter.
6126 */
6127 if (pdev->devfn == 0)
6128 ssleep(30);
6129
6130 return PCI_ERS_RESULT_NEED_RESET;
6131 }
6132
6133 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6134 {
6135 struct be_adapter *adapter = pci_get_drvdata(pdev);
6136 int status;
6137
6138 dev_info(&adapter->pdev->dev, "EEH reset\n");
6139
6140 status = pci_enable_device(pdev);
6141 if (status)
6142 return PCI_ERS_RESULT_DISCONNECT;
6143
6144 pci_set_master(pdev);
6145 pci_restore_state(pdev);
6146
6147 /* Check if card is ok and fw is ready */
6148 dev_info(&adapter->pdev->dev,
6149 "Waiting for FW to be ready after EEH reset\n");
6150 status = be_fw_wait_ready(adapter);
6151 if (status)
6152 return PCI_ERS_RESULT_DISCONNECT;
6153
6154 be_clear_error(adapter, BE_CLEAR_ALL);
6155 return PCI_ERS_RESULT_RECOVERED;
6156 }
6157
6158 static void be_eeh_resume(struct pci_dev *pdev)
6159 {
6160 int status = 0;
6161 struct be_adapter *adapter = pci_get_drvdata(pdev);
6162
6163 dev_info(&adapter->pdev->dev, "EEH resume\n");
6164
6165 pci_save_state(pdev);
6166
6167 status = be_resume(adapter);
6168 if (status)
6169 goto err;
6170
6171 be_roce_dev_add(adapter);
6172
6173 be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6174 return;
6175 err:
6176 dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6177 }
6178
6179 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6180 {
6181 struct be_adapter *adapter = pci_get_drvdata(pdev);
6182 struct be_resources vft_res = {0};
6183 int status;
6184
6185 if (!num_vfs)
6186 be_vf_clear(adapter);
6187
6188 adapter->num_vfs = num_vfs;
6189
6190 if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6191 dev_warn(&pdev->dev,
6192 "Cannot disable VFs while they are assigned\n");
6193 return -EBUSY;
6194 }
6195
6196 /* When the HW is in SRIOV capable configuration, the PF-pool resources
6197 * are equally distributed across the max-number of VFs. The user may
6198 * request only a subset of the max-vfs to be enabled.
6199 * Based on num_vfs, redistribute the resources across num_vfs so that
6200 * each VF will have access to more number of resources.
6201 * This facility is not available in BE3 FW.
6202 * Also, this is done by FW in Lancer chip.
6203 */
6204 if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6205 be_calculate_vf_res(adapter, adapter->num_vfs,
6206 &vft_res);
6207 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6208 adapter->num_vfs, &vft_res);
6209 if (status)
6210 dev_err(&pdev->dev,
6211 "Failed to optimize SR-IOV resources\n");
6212 }
6213
6214 status = be_get_resources(adapter);
6215 if (status)
6216 return be_cmd_status(status);
6217
6218 /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6219 rtnl_lock();
6220 status = be_update_queues(adapter);
6221 rtnl_unlock();
6222 if (status)
6223 return be_cmd_status(status);
6224
6225 if (adapter->num_vfs)
6226 status = be_vf_setup(adapter);
6227
6228 if (!status)
6229 return adapter->num_vfs;
6230
6231 return 0;
6232 }
6233
6234 static const struct pci_error_handlers be_eeh_handlers = {
6235 .error_detected = be_eeh_err_detected,
6236 .slot_reset = be_eeh_reset,
6237 .resume = be_eeh_resume,
6238 };
6239
6240 static struct pci_driver be_driver = {
6241 .name = DRV_NAME,
6242 .id_table = be_dev_ids,
6243 .probe = be_probe,
6244 .remove = be_remove,
6245 .suspend = be_suspend,
6246 .resume = be_pci_resume,
6247 .shutdown = be_shutdown,
6248 .sriov_configure = be_pci_sriov_configure,
6249 .err_handler = &be_eeh_handlers
6250 };
6251
6252 static int __init be_init_module(void)
6253 {
6254 int status;
6255
6256 if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6257 rx_frag_size != 2048) {
6258 printk(KERN_WARNING DRV_NAME
6259 " : Module param rx_frag_size must be 2048/4096/8192."
6260 " Using 2048\n");
6261 rx_frag_size = 2048;
6262 }
6263
6264 if (num_vfs > 0) {
6265 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6266 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6267 }
6268
6269 be_wq = create_singlethread_workqueue("be_wq");
6270 if (!be_wq) {
6271 pr_warn(DRV_NAME "workqueue creation failed\n");
6272 return -1;
6273 }
6274
6275 be_err_recovery_workq =
6276 create_singlethread_workqueue("be_err_recover");
6277 if (!be_err_recovery_workq)
6278 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6279
6280 status = pci_register_driver(&be_driver);
6281 if (status) {
6282 destroy_workqueue(be_wq);
6283 be_destroy_err_recovery_workq();
6284 }
6285 return status;
6286 }
6287 module_init(be_init_module);
6288
6289 static void __exit be_exit_module(void)
6290 {
6291 pci_unregister_driver(&be_driver);
6292
6293 be_destroy_err_recovery_workq();
6294
6295 if (be_wq)
6296 destroy_workqueue(be_wq);
6297 }
6298 module_exit(be_exit_module);