1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2018, Intel Corporation. */
4 /* Intel(R) Ethernet Connection E800 Series Linux Driver */
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <generated/utsrelease.h>
13 #include "ice_dcb_lib.h"
14 #include "ice_dcb_nl.h"
15 #include "ice_devlink.h"
16 /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
17 * ice tracepoint functions. This must be done exactly once across the
20 #define CREATE_TRACE_POINTS
21 #include "ice_trace.h"
23 #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver"
24 static const char ice_driver_string
[] = DRV_SUMMARY
;
25 static const char ice_copyright
[] = "Copyright (c) 2018, Intel Corporation.";
27 /* DDP Package file located in firmware search paths (e.g. /lib/firmware/) */
28 #define ICE_DDP_PKG_PATH "intel/ice/ddp/"
29 #define ICE_DDP_PKG_FILE ICE_DDP_PKG_PATH "ice.pkg"
31 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
32 MODULE_DESCRIPTION(DRV_SUMMARY
);
33 MODULE_LICENSE("GPL v2");
34 MODULE_FIRMWARE(ICE_DDP_PKG_FILE
);
36 static int debug
= -1;
37 module_param(debug
, int, 0644);
38 #ifndef CONFIG_DYNAMIC_DEBUG
39 MODULE_PARM_DESC(debug
, "netif level (0=none,...,16=all), hw debug_mask (0x8XXXXXXX)");
41 MODULE_PARM_DESC(debug
, "netif level (0=none,...,16=all)");
42 #endif /* !CONFIG_DYNAMIC_DEBUG */
44 static DEFINE_IDA(ice_aux_ida
);
46 static struct workqueue_struct
*ice_wq
;
47 static const struct net_device_ops ice_netdev_safe_mode_ops
;
48 static const struct net_device_ops ice_netdev_ops
;
49 static int ice_vsi_open(struct ice_vsi
*vsi
);
51 static void ice_rebuild(struct ice_pf
*pf
, enum ice_reset_req reset_type
);
53 static void ice_vsi_release_all(struct ice_pf
*pf
);
55 bool netif_is_ice(struct net_device
*dev
)
57 return dev
&& (dev
->netdev_ops
== &ice_netdev_ops
);
61 * ice_get_tx_pending - returns number of Tx descriptors not processed
62 * @ring: the ring of descriptors
64 static u16
ice_get_tx_pending(struct ice_ring
*ring
)
68 head
= ring
->next_to_clean
;
69 tail
= ring
->next_to_use
;
72 return (head
< tail
) ?
73 tail
- head
: (tail
+ ring
->count
- head
);
78 * ice_check_for_hang_subtask - check for and recover hung queues
79 * @pf: pointer to PF struct
81 static void ice_check_for_hang_subtask(struct ice_pf
*pf
)
83 struct ice_vsi
*vsi
= NULL
;
89 ice_for_each_vsi(pf
, v
)
90 if (pf
->vsi
[v
] && pf
->vsi
[v
]->type
== ICE_VSI_PF
) {
95 if (!vsi
|| test_bit(ICE_VSI_DOWN
, vsi
->state
))
98 if (!(vsi
->netdev
&& netif_carrier_ok(vsi
->netdev
)))
103 for (i
= 0; i
< vsi
->num_txq
; i
++) {
104 struct ice_ring
*tx_ring
= vsi
->tx_rings
[i
];
106 if (tx_ring
&& tx_ring
->desc
) {
107 /* If packet counter has not changed the queue is
108 * likely stalled, so force an interrupt for this
111 * prev_pkt would be negative if there was no
114 packets
= tx_ring
->stats
.pkts
& INT_MAX
;
115 if (tx_ring
->tx_stats
.prev_pkt
== packets
) {
116 /* Trigger sw interrupt to revive the queue */
117 ice_trigger_sw_intr(hw
, tx_ring
->q_vector
);
121 /* Memory barrier between read of packet count and call
122 * to ice_get_tx_pending()
125 tx_ring
->tx_stats
.prev_pkt
=
126 ice_get_tx_pending(tx_ring
) ? packets
: -1;
132 * ice_init_mac_fltr - Set initial MAC filters
133 * @pf: board private structure
135 * Set initial set of MAC filters for PF VSI; configure filters for permanent
136 * address and broadcast address. If an error is encountered, netdevice will be
139 static int ice_init_mac_fltr(struct ice_pf
*pf
)
141 enum ice_status status
;
145 vsi
= ice_get_main_vsi(pf
);
149 perm_addr
= vsi
->port_info
->mac
.perm_addr
;
150 status
= ice_fltr_add_mac_and_broadcast(vsi
, perm_addr
, ICE_FWD_TO_VSI
);
158 * ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
159 * @netdev: the net device on which the sync is happening
160 * @addr: MAC address to sync
162 * This is a callback function which is called by the in kernel device sync
163 * functions (like __dev_uc_sync, __dev_mc_sync, etc). This function only
164 * populates the tmp_sync_list, which is later used by ice_add_mac to add the
165 * MAC filters from the hardware.
167 static int ice_add_mac_to_sync_list(struct net_device
*netdev
, const u8
*addr
)
169 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
170 struct ice_vsi
*vsi
= np
->vsi
;
172 if (ice_fltr_add_mac_to_list(vsi
, &vsi
->tmp_sync_list
, addr
,
180 * ice_add_mac_to_unsync_list - creates list of MAC addresses to be unsynced
181 * @netdev: the net device on which the unsync is happening
182 * @addr: MAC address to unsync
184 * This is a callback function which is called by the in kernel device unsync
185 * functions (like __dev_uc_unsync, __dev_mc_unsync, etc). This function only
186 * populates the tmp_unsync_list, which is later used by ice_remove_mac to
187 * delete the MAC filters from the hardware.
189 static int ice_add_mac_to_unsync_list(struct net_device
*netdev
, const u8
*addr
)
191 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
192 struct ice_vsi
*vsi
= np
->vsi
;
194 /* Under some circumstances, we might receive a request to delete our
195 * own device address from our uc list. Because we store the device
196 * address in the VSI's MAC filter list, we need to ignore such
197 * requests and not delete our device address from this list.
199 if (ether_addr_equal(addr
, netdev
->dev_addr
))
202 if (ice_fltr_add_mac_to_list(vsi
, &vsi
->tmp_unsync_list
, addr
,
210 * ice_vsi_fltr_changed - check if filter state changed
211 * @vsi: VSI to be checked
213 * returns true if filter state has changed, false otherwise.
215 static bool ice_vsi_fltr_changed(struct ice_vsi
*vsi
)
217 return test_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
) ||
218 test_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
) ||
219 test_bit(ICE_VSI_VLAN_FLTR_CHANGED
, vsi
->state
);
223 * ice_cfg_promisc - Enable or disable promiscuous mode for a given PF
224 * @vsi: the VSI being configured
225 * @promisc_m: mask of promiscuous config bits
226 * @set_promisc: enable or disable promisc flag request
229 static int ice_cfg_promisc(struct ice_vsi
*vsi
, u8 promisc_m
, bool set_promisc
)
231 struct ice_hw
*hw
= &vsi
->back
->hw
;
232 enum ice_status status
= 0;
234 if (vsi
->type
!= ICE_VSI_PF
)
237 if (vsi
->num_vlan
> 1) {
238 status
= ice_set_vlan_vsi_promisc(hw
, vsi
->idx
, promisc_m
,
242 status
= ice_set_vsi_promisc(hw
, vsi
->idx
, promisc_m
,
245 status
= ice_clear_vsi_promisc(hw
, vsi
->idx
, promisc_m
,
256 * ice_vsi_sync_fltr - Update the VSI filter list to the HW
257 * @vsi: ptr to the VSI
259 * Push any outstanding VSI filter changes through the AdminQ.
261 static int ice_vsi_sync_fltr(struct ice_vsi
*vsi
)
263 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
264 struct net_device
*netdev
= vsi
->netdev
;
265 bool promisc_forced_on
= false;
266 struct ice_pf
*pf
= vsi
->back
;
267 struct ice_hw
*hw
= &pf
->hw
;
268 enum ice_status status
= 0;
269 u32 changed_flags
= 0;
276 while (test_and_set_bit(ICE_CFG_BUSY
, vsi
->state
))
277 usleep_range(1000, 2000);
279 changed_flags
= vsi
->current_netdev_flags
^ vsi
->netdev
->flags
;
280 vsi
->current_netdev_flags
= vsi
->netdev
->flags
;
282 INIT_LIST_HEAD(&vsi
->tmp_sync_list
);
283 INIT_LIST_HEAD(&vsi
->tmp_unsync_list
);
285 if (ice_vsi_fltr_changed(vsi
)) {
286 clear_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
287 clear_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
288 clear_bit(ICE_VSI_VLAN_FLTR_CHANGED
, vsi
->state
);
290 /* grab the netdev's addr_list_lock */
291 netif_addr_lock_bh(netdev
);
292 __dev_uc_sync(netdev
, ice_add_mac_to_sync_list
,
293 ice_add_mac_to_unsync_list
);
294 __dev_mc_sync(netdev
, ice_add_mac_to_sync_list
,
295 ice_add_mac_to_unsync_list
);
296 /* our temp lists are populated. release lock */
297 netif_addr_unlock_bh(netdev
);
300 /* Remove MAC addresses in the unsync list */
301 status
= ice_fltr_remove_mac_list(vsi
, &vsi
->tmp_unsync_list
);
302 ice_fltr_free_list(dev
, &vsi
->tmp_unsync_list
);
304 netdev_err(netdev
, "Failed to delete MAC filters\n");
305 /* if we failed because of alloc failures, just bail */
306 if (status
== ICE_ERR_NO_MEMORY
) {
312 /* Add MAC addresses in the sync list */
313 status
= ice_fltr_add_mac_list(vsi
, &vsi
->tmp_sync_list
);
314 ice_fltr_free_list(dev
, &vsi
->tmp_sync_list
);
315 /* If filter is added successfully or already exists, do not go into
316 * 'if' condition and report it as error. Instead continue processing
317 * rest of the function.
319 if (status
&& status
!= ICE_ERR_ALREADY_EXISTS
) {
320 netdev_err(netdev
, "Failed to add MAC filters\n");
321 /* If there is no more space for new umac filters, VSI
322 * should go into promiscuous mode. There should be some
323 * space reserved for promiscuous filters.
325 if (hw
->adminq
.sq_last_status
== ICE_AQ_RC_ENOSPC
&&
326 !test_and_set_bit(ICE_FLTR_OVERFLOW_PROMISC
,
328 promisc_forced_on
= true;
329 netdev_warn(netdev
, "Reached MAC filter limit, forcing promisc mode on VSI %d\n",
336 /* check for changes in promiscuous modes */
337 if (changed_flags
& IFF_ALLMULTI
) {
338 if (vsi
->current_netdev_flags
& IFF_ALLMULTI
) {
339 if (vsi
->num_vlan
> 1)
340 promisc_m
= ICE_MCAST_VLAN_PROMISC_BITS
;
342 promisc_m
= ICE_MCAST_PROMISC_BITS
;
344 err
= ice_cfg_promisc(vsi
, promisc_m
, true);
346 netdev_err(netdev
, "Error setting Multicast promiscuous mode on VSI %i\n",
348 vsi
->current_netdev_flags
&= ~IFF_ALLMULTI
;
352 /* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
353 if (vsi
->num_vlan
> 1)
354 promisc_m
= ICE_MCAST_VLAN_PROMISC_BITS
;
356 promisc_m
= ICE_MCAST_PROMISC_BITS
;
358 err
= ice_cfg_promisc(vsi
, promisc_m
, false);
360 netdev_err(netdev
, "Error clearing Multicast promiscuous mode on VSI %i\n",
362 vsi
->current_netdev_flags
|= IFF_ALLMULTI
;
368 if (((changed_flags
& IFF_PROMISC
) || promisc_forced_on
) ||
369 test_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
)) {
370 clear_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
);
371 if (vsi
->current_netdev_flags
& IFF_PROMISC
) {
372 /* Apply Rx filter rule to get traffic from wire */
373 if (!ice_is_dflt_vsi_in_use(pf
->first_sw
)) {
374 err
= ice_set_dflt_vsi(pf
->first_sw
, vsi
);
375 if (err
&& err
!= -EEXIST
) {
376 netdev_err(netdev
, "Error %d setting default VSI %i Rx rule\n",
378 vsi
->current_netdev_flags
&=
382 ice_cfg_vlan_pruning(vsi
, false, false);
385 /* Clear Rx filter to remove traffic from wire */
386 if (ice_is_vsi_dflt_vsi(pf
->first_sw
, vsi
)) {
387 err
= ice_clear_dflt_vsi(pf
->first_sw
);
389 netdev_err(netdev
, "Error %d clearing default VSI %i Rx rule\n",
391 vsi
->current_netdev_flags
|=
395 if (vsi
->num_vlan
> 1)
396 ice_cfg_vlan_pruning(vsi
, true, false);
403 set_bit(ICE_VSI_PROMISC_CHANGED
, vsi
->state
);
406 /* if something went wrong then set the changed flag so we try again */
407 set_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
408 set_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
410 clear_bit(ICE_CFG_BUSY
, vsi
->state
);
415 * ice_sync_fltr_subtask - Sync the VSI filter list with HW
416 * @pf: board private structure
418 static void ice_sync_fltr_subtask(struct ice_pf
*pf
)
422 if (!pf
|| !(test_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
)))
425 clear_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
);
427 ice_for_each_vsi(pf
, v
)
428 if (pf
->vsi
[v
] && ice_vsi_fltr_changed(pf
->vsi
[v
]) &&
429 ice_vsi_sync_fltr(pf
->vsi
[v
])) {
430 /* come back and try again later */
431 set_bit(ICE_FLAG_FLTR_SYNC
, pf
->flags
);
437 * ice_pf_dis_all_vsi - Pause all VSIs on a PF
439 * @locked: is the rtnl_lock already held
441 static void ice_pf_dis_all_vsi(struct ice_pf
*pf
, bool locked
)
446 ice_for_each_vsi(pf
, v
)
448 ice_dis_vsi(pf
->vsi
[v
], locked
);
450 for (node
= 0; node
< ICE_MAX_PF_AGG_NODES
; node
++)
451 pf
->pf_agg_node
[node
].num_vsis
= 0;
453 for (node
= 0; node
< ICE_MAX_VF_AGG_NODES
; node
++)
454 pf
->vf_agg_node
[node
].num_vsis
= 0;
458 * ice_prepare_for_reset - prep for the core to reset
459 * @pf: board private structure
461 * Inform or close all dependent features in prep for reset.
464 ice_prepare_for_reset(struct ice_pf
*pf
)
466 struct ice_hw
*hw
= &pf
->hw
;
469 /* already prepared for reset */
470 if (test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
))
473 ice_unplug_aux_dev(pf
);
475 /* Notify VFs of impending reset */
476 if (ice_check_sq_alive(hw
, &hw
->mailboxq
))
477 ice_vc_notify_reset(pf
);
479 /* Disable VFs until reset is completed */
480 ice_for_each_vf(pf
, i
)
481 ice_set_vf_state_qs_dis(&pf
->vf
[i
]);
483 /* clear SW filtering DB */
484 ice_clear_hw_tbls(hw
);
485 /* disable the VSIs and their queues that are not already DOWN */
486 ice_pf_dis_all_vsi(pf
, false);
488 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
492 ice_sched_clear_port(hw
->port_info
);
494 ice_shutdown_all_ctrlq(hw
);
496 set_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
500 * ice_do_reset - Initiate one of many types of resets
501 * @pf: board private structure
502 * @reset_type: reset type requested
503 * before this function was called.
505 static void ice_do_reset(struct ice_pf
*pf
, enum ice_reset_req reset_type
)
507 struct device
*dev
= ice_pf_to_dev(pf
);
508 struct ice_hw
*hw
= &pf
->hw
;
510 dev_dbg(dev
, "reset_type 0x%x requested\n", reset_type
);
512 ice_prepare_for_reset(pf
);
514 /* trigger the reset */
515 if (ice_reset(hw
, reset_type
)) {
516 dev_err(dev
, "reset %d failed\n", reset_type
);
517 set_bit(ICE_RESET_FAILED
, pf
->state
);
518 clear_bit(ICE_RESET_OICR_RECV
, pf
->state
);
519 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
520 clear_bit(ICE_PFR_REQ
, pf
->state
);
521 clear_bit(ICE_CORER_REQ
, pf
->state
);
522 clear_bit(ICE_GLOBR_REQ
, pf
->state
);
523 wake_up(&pf
->reset_wait_queue
);
527 /* PFR is a bit of a special case because it doesn't result in an OICR
528 * interrupt. So for PFR, rebuild after the reset and clear the reset-
529 * associated state bits.
531 if (reset_type
== ICE_RESET_PFR
) {
533 ice_rebuild(pf
, reset_type
);
534 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
535 clear_bit(ICE_PFR_REQ
, pf
->state
);
536 wake_up(&pf
->reset_wait_queue
);
537 ice_reset_all_vfs(pf
, true);
542 * ice_reset_subtask - Set up for resetting the device and driver
543 * @pf: board private structure
545 static void ice_reset_subtask(struct ice_pf
*pf
)
547 enum ice_reset_req reset_type
= ICE_RESET_INVAL
;
549 /* When a CORER/GLOBR/EMPR is about to happen, the hardware triggers an
550 * OICR interrupt. The OICR handler (ice_misc_intr) determines what type
551 * of reset is pending and sets bits in pf->state indicating the reset
552 * type and ICE_RESET_OICR_RECV. So, if the latter bit is set
553 * prepare for pending reset if not already (for PF software-initiated
554 * global resets the software should already be prepared for it as
555 * indicated by ICE_PREPARED_FOR_RESET; for global resets initiated
556 * by firmware or software on other PFs, that bit is not set so prepare
557 * for the reset now), poll for reset done, rebuild and return.
559 if (test_bit(ICE_RESET_OICR_RECV
, pf
->state
)) {
560 /* Perform the largest reset requested */
561 if (test_and_clear_bit(ICE_CORER_RECV
, pf
->state
))
562 reset_type
= ICE_RESET_CORER
;
563 if (test_and_clear_bit(ICE_GLOBR_RECV
, pf
->state
))
564 reset_type
= ICE_RESET_GLOBR
;
565 if (test_and_clear_bit(ICE_EMPR_RECV
, pf
->state
))
566 reset_type
= ICE_RESET_EMPR
;
567 /* return if no valid reset type requested */
568 if (reset_type
== ICE_RESET_INVAL
)
570 ice_prepare_for_reset(pf
);
572 /* make sure we are ready to rebuild */
573 if (ice_check_reset(&pf
->hw
)) {
574 set_bit(ICE_RESET_FAILED
, pf
->state
);
576 /* done with reset. start rebuild */
577 pf
->hw
.reset_ongoing
= false;
578 ice_rebuild(pf
, reset_type
);
579 /* clear bit to resume normal operations, but
580 * ICE_NEEDS_RESTART bit is set in case rebuild failed
582 clear_bit(ICE_RESET_OICR_RECV
, pf
->state
);
583 clear_bit(ICE_PREPARED_FOR_RESET
, pf
->state
);
584 clear_bit(ICE_PFR_REQ
, pf
->state
);
585 clear_bit(ICE_CORER_REQ
, pf
->state
);
586 clear_bit(ICE_GLOBR_REQ
, pf
->state
);
587 wake_up(&pf
->reset_wait_queue
);
588 ice_reset_all_vfs(pf
, true);
594 /* No pending resets to finish processing. Check for new resets */
595 if (test_bit(ICE_PFR_REQ
, pf
->state
))
596 reset_type
= ICE_RESET_PFR
;
597 if (test_bit(ICE_CORER_REQ
, pf
->state
))
598 reset_type
= ICE_RESET_CORER
;
599 if (test_bit(ICE_GLOBR_REQ
, pf
->state
))
600 reset_type
= ICE_RESET_GLOBR
;
601 /* If no valid reset type requested just return */
602 if (reset_type
== ICE_RESET_INVAL
)
605 /* reset if not already down or busy */
606 if (!test_bit(ICE_DOWN
, pf
->state
) &&
607 !test_bit(ICE_CFG_BUSY
, pf
->state
)) {
608 ice_do_reset(pf
, reset_type
);
613 * ice_print_topo_conflict - print topology conflict message
614 * @vsi: the VSI whose topology status is being checked
616 static void ice_print_topo_conflict(struct ice_vsi
*vsi
)
618 switch (vsi
->port_info
->phy
.link_info
.topo_media_conflict
) {
619 case ICE_AQ_LINK_TOPO_CONFLICT
:
620 case ICE_AQ_LINK_MEDIA_CONFLICT
:
621 case ICE_AQ_LINK_TOPO_UNREACH_PRT
:
622 case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT
:
623 case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA
:
624 netdev_info(vsi
->netdev
, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
626 case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA
:
627 netdev_info(vsi
->netdev
, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
635 * ice_print_link_msg - print link up or down message
636 * @vsi: the VSI whose link status is being queried
637 * @isup: boolean for if the link is now up or down
639 void ice_print_link_msg(struct ice_vsi
*vsi
, bool isup
)
641 struct ice_aqc_get_phy_caps_data
*caps
;
642 const char *an_advertised
;
643 enum ice_status status
;
653 if (vsi
->current_isup
== isup
)
656 vsi
->current_isup
= isup
;
659 netdev_info(vsi
->netdev
, "NIC Link is Down\n");
663 switch (vsi
->port_info
->phy
.link_info
.link_speed
) {
664 case ICE_AQ_LINK_SPEED_100GB
:
667 case ICE_AQ_LINK_SPEED_50GB
:
670 case ICE_AQ_LINK_SPEED_40GB
:
673 case ICE_AQ_LINK_SPEED_25GB
:
676 case ICE_AQ_LINK_SPEED_20GB
:
679 case ICE_AQ_LINK_SPEED_10GB
:
682 case ICE_AQ_LINK_SPEED_5GB
:
685 case ICE_AQ_LINK_SPEED_2500MB
:
688 case ICE_AQ_LINK_SPEED_1000MB
:
691 case ICE_AQ_LINK_SPEED_100MB
:
699 switch (vsi
->port_info
->fc
.current_mode
) {
703 case ICE_FC_TX_PAUSE
:
706 case ICE_FC_RX_PAUSE
:
717 /* Get FEC mode based on negotiated link info */
718 switch (vsi
->port_info
->phy
.link_info
.fec_info
) {
719 case ICE_AQ_LINK_25G_RS_528_FEC_EN
:
720 case ICE_AQ_LINK_25G_RS_544_FEC_EN
:
723 case ICE_AQ_LINK_25G_KR_FEC_EN
:
724 fec
= "FC-FEC/BASE-R";
731 /* check if autoneg completed, might be false due to not supported */
732 if (vsi
->port_info
->phy
.link_info
.an_info
& ICE_AQ_AN_COMPLETED
)
737 /* Get FEC mode requested based on PHY caps last SW configuration */
738 caps
= kzalloc(sizeof(*caps
), GFP_KERNEL
);
741 an_advertised
= "Unknown";
745 status
= ice_aq_get_phy_caps(vsi
->port_info
, false,
746 ICE_AQC_REPORT_ACTIVE_CFG
, caps
, NULL
);
748 netdev_info(vsi
->netdev
, "Get phy capability failed.\n");
750 an_advertised
= ice_is_phy_caps_an_enabled(caps
) ? "On" : "Off";
752 if (caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_RS_528_REQ
||
753 caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_RS_544_REQ
)
755 else if (caps
->link_fec_options
& ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ
||
756 caps
->link_fec_options
& ICE_AQC_PHY_FEC_25G_KR_REQ
)
757 fec_req
= "FC-FEC/BASE-R";
764 netdev_info(vsi
->netdev
, "NIC Link is up %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg Advertised: %s, Autoneg Negotiated: %s, Flow Control: %s\n",
765 speed
, fec_req
, fec
, an_advertised
, an
, fc
);
766 ice_print_topo_conflict(vsi
);
770 * ice_vsi_link_event - update the VSI's netdev
771 * @vsi: the VSI on which the link event occurred
772 * @link_up: whether or not the VSI needs to be set up or down
774 static void ice_vsi_link_event(struct ice_vsi
*vsi
, bool link_up
)
779 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) || !vsi
->netdev
)
782 if (vsi
->type
== ICE_VSI_PF
) {
783 if (link_up
== netif_carrier_ok(vsi
->netdev
))
787 netif_carrier_on(vsi
->netdev
);
788 netif_tx_wake_all_queues(vsi
->netdev
);
790 netif_carrier_off(vsi
->netdev
);
791 netif_tx_stop_all_queues(vsi
->netdev
);
797 * ice_set_dflt_mib - send a default config MIB to the FW
798 * @pf: private PF struct
800 * This function sends a default configuration MIB to the FW.
802 * If this function errors out at any point, the driver is still able to
803 * function. The main impact is that LFC may not operate as expected.
804 * Therefore an error state in this function should be treated with a DBG
805 * message and continue on with driver rebuild/reenable.
807 static void ice_set_dflt_mib(struct ice_pf
*pf
)
809 struct device
*dev
= ice_pf_to_dev(pf
);
810 u8 mib_type
, *buf
, *lldpmib
= NULL
;
811 u16 len
, typelen
, offset
= 0;
812 struct ice_lldp_org_tlv
*tlv
;
813 struct ice_hw
*hw
= &pf
->hw
;
816 mib_type
= SET_LOCAL_MIB_TYPE_LOCAL_MIB
;
817 lldpmib
= kzalloc(ICE_LLDPDU_SIZE
, GFP_KERNEL
);
819 dev_dbg(dev
, "%s Failed to allocate MIB memory\n",
824 /* Add ETS CFG TLV */
825 tlv
= (struct ice_lldp_org_tlv
*)lldpmib
;
826 typelen
= ((ICE_TLV_TYPE_ORG
<< ICE_LLDP_TLV_TYPE_S
) |
827 ICE_IEEE_ETS_TLV_LEN
);
828 tlv
->typelen
= htons(typelen
);
829 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
830 ICE_IEEE_SUBTYPE_ETS_CFG
);
831 tlv
->ouisubtype
= htonl(ouisubtype
);
836 /* ETS CFG all UPs map to TC 0. Next 4 (1 - 4) Octets = 0.
837 * Octets 5 - 12 are BW values, set octet 5 to 100% BW.
838 * Octets 13 - 20 are TSA values - leave as zeros
841 len
= (typelen
& ICE_LLDP_TLV_LEN_M
) >> ICE_LLDP_TLV_LEN_S
;
843 tlv
= (struct ice_lldp_org_tlv
*)
844 ((char *)tlv
+ sizeof(tlv
->typelen
) + len
);
846 /* Add ETS REC TLV */
848 tlv
->typelen
= htons(typelen
);
850 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
851 ICE_IEEE_SUBTYPE_ETS_REC
);
852 tlv
->ouisubtype
= htonl(ouisubtype
);
854 /* First octet of buf is reserved
855 * Octets 1 - 4 map UP to TC - all UPs map to zero
856 * Octets 5 - 12 are BW values - set TC 0 to 100%.
857 * Octets 13 - 20 are TSA value - leave as zeros
861 tlv
= (struct ice_lldp_org_tlv
*)
862 ((char *)tlv
+ sizeof(tlv
->typelen
) + len
);
864 /* Add PFC CFG TLV */
865 typelen
= ((ICE_TLV_TYPE_ORG
<< ICE_LLDP_TLV_TYPE_S
) |
866 ICE_IEEE_PFC_TLV_LEN
);
867 tlv
->typelen
= htons(typelen
);
869 ouisubtype
= ((ICE_IEEE_8021QAZ_OUI
<< ICE_LLDP_TLV_OUI_S
) |
870 ICE_IEEE_SUBTYPE_PFC_CFG
);
871 tlv
->ouisubtype
= htonl(ouisubtype
);
873 /* Octet 1 left as all zeros - PFC disabled */
875 len
= (typelen
& ICE_LLDP_TLV_LEN_M
) >> ICE_LLDP_TLV_LEN_S
;
878 if (ice_aq_set_lldp_mib(hw
, mib_type
, (void *)lldpmib
, offset
, NULL
))
879 dev_dbg(dev
, "%s Failed to set default LLDP MIB\n", __func__
);
885 * ice_check_module_power
886 * @pf: pointer to PF struct
887 * @link_cfg_err: bitmap from the link info structure
889 * check module power level returned by a previous call to aq_get_link_info
890 * and print error messages if module power level is not supported
892 static void ice_check_module_power(struct ice_pf
*pf
, u8 link_cfg_err
)
894 /* if module power level is supported, clear the flag */
895 if (!(link_cfg_err
& (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT
|
896 ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED
))) {
897 clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
901 /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
902 * above block didn't clear this bit, there's nothing to do
904 if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
))
907 if (link_cfg_err
& ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT
) {
908 dev_err(ice_pf_to_dev(pf
), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
909 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
910 } else if (link_cfg_err
& ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED
) {
911 dev_err(ice_pf_to_dev(pf
), "The module's power requirements exceed the device's power supply. Cannot start link\n");
912 set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED
, pf
->flags
);
917 * ice_link_event - process the link event
918 * @pf: PF that the link event is associated with
919 * @pi: port_info for the port that the link event is associated with
920 * @link_up: true if the physical link is up and false if it is down
921 * @link_speed: current link speed received from the link event
923 * Returns 0 on success and negative on failure
926 ice_link_event(struct ice_pf
*pf
, struct ice_port_info
*pi
, bool link_up
,
929 struct device
*dev
= ice_pf_to_dev(pf
);
930 struct ice_phy_info
*phy_info
;
931 enum ice_status status
;
937 phy_info
->link_info_old
= phy_info
->link_info
;
939 old_link
= !!(phy_info
->link_info_old
.link_info
& ICE_AQ_LINK_UP
);
940 old_link_speed
= phy_info
->link_info_old
.link_speed
;
942 /* update the link info structures and re-enable link events,
943 * don't bail on failure due to other book keeping needed
945 status
= ice_update_link_info(pi
);
947 dev_dbg(dev
, "Failed to update link status on port %d, err %s aq_err %s\n",
948 pi
->lport
, ice_stat_str(status
),
949 ice_aq_str(pi
->hw
->adminq
.sq_last_status
));
951 ice_check_module_power(pf
, pi
->phy
.link_info
.link_cfg_err
);
953 /* Check if the link state is up after updating link info, and treat
954 * this event as an UP event since the link is actually UP now.
956 if (phy_info
->link_info
.link_info
& ICE_AQ_LINK_UP
)
959 vsi
= ice_get_main_vsi(pf
);
960 if (!vsi
|| !vsi
->port_info
)
963 /* turn off PHY if media was removed */
964 if (!test_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
) &&
965 !(pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
)) {
966 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
967 ice_set_link(vsi
, false);
970 /* if the old link up/down and speed is the same as the new */
971 if (link_up
== old_link
&& link_speed
== old_link_speed
)
974 if (ice_is_dcb_active(pf
)) {
975 if (test_bit(ICE_FLAG_DCB_ENA
, pf
->flags
))
979 ice_set_dflt_mib(pf
);
981 ice_vsi_link_event(vsi
, link_up
);
982 ice_print_link_msg(vsi
, link_up
);
984 ice_vc_notify_link_state(pf
);
990 * ice_watchdog_subtask - periodic tasks not using event driven scheduling
991 * @pf: board private structure
993 static void ice_watchdog_subtask(struct ice_pf
*pf
)
997 /* if interface is down do nothing */
998 if (test_bit(ICE_DOWN
, pf
->state
) ||
999 test_bit(ICE_CFG_BUSY
, pf
->state
))
1002 /* make sure we don't do these things too often */
1003 if (time_before(jiffies
,
1004 pf
->serv_tmr_prev
+ pf
->serv_tmr_period
))
1007 pf
->serv_tmr_prev
= jiffies
;
1009 /* Update the stats for active netdevs so the network stack
1010 * can look at updated numbers whenever it cares to
1012 ice_update_pf_stats(pf
);
1013 ice_for_each_vsi(pf
, i
)
1014 if (pf
->vsi
[i
] && pf
->vsi
[i
]->netdev
)
1015 ice_update_vsi_stats(pf
->vsi
[i
]);
1019 * ice_init_link_events - enable/initialize link events
1020 * @pi: pointer to the port_info instance
1022 * Returns -EIO on failure, 0 on success
1024 static int ice_init_link_events(struct ice_port_info
*pi
)
1028 mask
= ~((u16
)(ICE_AQ_LINK_EVENT_UPDOWN
| ICE_AQ_LINK_EVENT_MEDIA_NA
|
1029 ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL
));
1031 if (ice_aq_set_event_mask(pi
->hw
, pi
->lport
, mask
, NULL
)) {
1032 dev_dbg(ice_hw_to_dev(pi
->hw
), "Failed to set link event mask for port %d\n",
1037 if (ice_aq_get_link_info(pi
, true, NULL
, NULL
)) {
1038 dev_dbg(ice_hw_to_dev(pi
->hw
), "Failed to enable link events for port %d\n",
1047 * ice_handle_link_event - handle link event via ARQ
1048 * @pf: PF that the link event is associated with
1049 * @event: event structure containing link status info
1052 ice_handle_link_event(struct ice_pf
*pf
, struct ice_rq_event_info
*event
)
1054 struct ice_aqc_get_link_status_data
*link_data
;
1055 struct ice_port_info
*port_info
;
1058 link_data
= (struct ice_aqc_get_link_status_data
*)event
->msg_buf
;
1059 port_info
= pf
->hw
.port_info
;
1063 status
= ice_link_event(pf
, port_info
,
1064 !!(link_data
->link_info
& ICE_AQ_LINK_UP
),
1065 le16_to_cpu(link_data
->link_speed
));
1067 dev_dbg(ice_pf_to_dev(pf
), "Could not process link event, error %d\n",
1073 enum ice_aq_task_state
{
1074 ICE_AQ_TASK_WAITING
= 0,
1075 ICE_AQ_TASK_COMPLETE
,
1076 ICE_AQ_TASK_CANCELED
,
1079 struct ice_aq_task
{
1080 struct hlist_node entry
;
1083 struct ice_rq_event_info
*event
;
1084 enum ice_aq_task_state state
;
1088 * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
1089 * @pf: pointer to the PF private structure
1090 * @opcode: the opcode to wait for
1091 * @timeout: how long to wait, in jiffies
1092 * @event: storage for the event info
1094 * Waits for a specific AdminQ completion event on the ARQ for a given PF. The
1095 * current thread will be put to sleep until the specified event occurs or
1096 * until the given timeout is reached.
1098 * To obtain only the descriptor contents, pass an event without an allocated
1099 * msg_buf. If the complete data buffer is desired, allocate the
1100 * event->msg_buf with enough space ahead of time.
1102 * Returns: zero on success, or a negative error code on failure.
1104 int ice_aq_wait_for_event(struct ice_pf
*pf
, u16 opcode
, unsigned long timeout
,
1105 struct ice_rq_event_info
*event
)
1107 struct device
*dev
= ice_pf_to_dev(pf
);
1108 struct ice_aq_task
*task
;
1109 unsigned long start
;
1113 task
= kzalloc(sizeof(*task
), GFP_KERNEL
);
1117 INIT_HLIST_NODE(&task
->entry
);
1118 task
->opcode
= opcode
;
1119 task
->event
= event
;
1120 task
->state
= ICE_AQ_TASK_WAITING
;
1122 spin_lock_bh(&pf
->aq_wait_lock
);
1123 hlist_add_head(&task
->entry
, &pf
->aq_wait_list
);
1124 spin_unlock_bh(&pf
->aq_wait_lock
);
1128 ret
= wait_event_interruptible_timeout(pf
->aq_wait_queue
, task
->state
,
1130 switch (task
->state
) {
1131 case ICE_AQ_TASK_WAITING
:
1132 err
= ret
< 0 ? ret
: -ETIMEDOUT
;
1134 case ICE_AQ_TASK_CANCELED
:
1135 err
= ret
< 0 ? ret
: -ECANCELED
;
1137 case ICE_AQ_TASK_COMPLETE
:
1138 err
= ret
< 0 ? ret
: 0;
1141 WARN(1, "Unexpected AdminQ wait task state %u", task
->state
);
1146 dev_dbg(dev
, "Waited %u msecs (max %u msecs) for firmware response to op 0x%04x\n",
1147 jiffies_to_msecs(jiffies
- start
),
1148 jiffies_to_msecs(timeout
),
1151 spin_lock_bh(&pf
->aq_wait_lock
);
1152 hlist_del(&task
->entry
);
1153 spin_unlock_bh(&pf
->aq_wait_lock
);
1160 * ice_aq_check_events - Check if any thread is waiting for an AdminQ event
1161 * @pf: pointer to the PF private structure
1162 * @opcode: the opcode of the event
1163 * @event: the event to check
1165 * Loops over the current list of pending threads waiting for an AdminQ event.
1166 * For each matching task, copy the contents of the event into the task
1167 * structure and wake up the thread.
1169 * If multiple threads wait for the same opcode, they will all be woken up.
1171 * Note that event->msg_buf will only be duplicated if the event has a buffer
1172 * with enough space already allocated. Otherwise, only the descriptor and
1173 * message length will be copied.
1175 * Returns: true if an event was found, false otherwise
1177 static void ice_aq_check_events(struct ice_pf
*pf
, u16 opcode
,
1178 struct ice_rq_event_info
*event
)
1180 struct ice_aq_task
*task
;
1183 spin_lock_bh(&pf
->aq_wait_lock
);
1184 hlist_for_each_entry(task
, &pf
->aq_wait_list
, entry
) {
1185 if (task
->state
|| task
->opcode
!= opcode
)
1188 memcpy(&task
->event
->desc
, &event
->desc
, sizeof(event
->desc
));
1189 task
->event
->msg_len
= event
->msg_len
;
1191 /* Only copy the data buffer if a destination was set */
1192 if (task
->event
->msg_buf
&&
1193 task
->event
->buf_len
> event
->buf_len
) {
1194 memcpy(task
->event
->msg_buf
, event
->msg_buf
,
1196 task
->event
->buf_len
= event
->buf_len
;
1199 task
->state
= ICE_AQ_TASK_COMPLETE
;
1202 spin_unlock_bh(&pf
->aq_wait_lock
);
1205 wake_up(&pf
->aq_wait_queue
);
1209 * ice_aq_cancel_waiting_tasks - Immediately cancel all waiting tasks
1210 * @pf: the PF private structure
1212 * Set all waiting tasks to ICE_AQ_TASK_CANCELED, and wake up their threads.
1213 * This will then cause ice_aq_wait_for_event to exit with -ECANCELED.
1215 static void ice_aq_cancel_waiting_tasks(struct ice_pf
*pf
)
1217 struct ice_aq_task
*task
;
1219 spin_lock_bh(&pf
->aq_wait_lock
);
1220 hlist_for_each_entry(task
, &pf
->aq_wait_list
, entry
)
1221 task
->state
= ICE_AQ_TASK_CANCELED
;
1222 spin_unlock_bh(&pf
->aq_wait_lock
);
1224 wake_up(&pf
->aq_wait_queue
);
1228 * __ice_clean_ctrlq - helper function to clean controlq rings
1229 * @pf: ptr to struct ice_pf
1230 * @q_type: specific Control queue type
1232 static int __ice_clean_ctrlq(struct ice_pf
*pf
, enum ice_ctl_q q_type
)
1234 struct device
*dev
= ice_pf_to_dev(pf
);
1235 struct ice_rq_event_info event
;
1236 struct ice_hw
*hw
= &pf
->hw
;
1237 struct ice_ctl_q_info
*cq
;
1242 /* Do not clean control queue if/when PF reset fails */
1243 if (test_bit(ICE_RESET_FAILED
, pf
->state
))
1247 case ICE_CTL_Q_ADMIN
:
1255 case ICE_CTL_Q_MAILBOX
:
1258 /* we are going to try to detect a malicious VF, so set the
1259 * state to begin detection
1261 hw
->mbx_snapshot
.mbx_buf
.state
= ICE_MAL_VF_DETECT_STATE_NEW_SNAPSHOT
;
1264 dev_warn(dev
, "Unknown control queue type 0x%x\n", q_type
);
1268 /* check for error indications - PF_xx_AxQLEN register layout for
1269 * FW/MBX/SB are identical so just use defines for PF_FW_AxQLEN.
1271 val
= rd32(hw
, cq
->rq
.len
);
1272 if (val
& (PF_FW_ARQLEN_ARQVFE_M
| PF_FW_ARQLEN_ARQOVFL_M
|
1273 PF_FW_ARQLEN_ARQCRIT_M
)) {
1275 if (val
& PF_FW_ARQLEN_ARQVFE_M
)
1276 dev_dbg(dev
, "%s Receive Queue VF Error detected\n",
1278 if (val
& PF_FW_ARQLEN_ARQOVFL_M
) {
1279 dev_dbg(dev
, "%s Receive Queue Overflow Error detected\n",
1282 if (val
& PF_FW_ARQLEN_ARQCRIT_M
)
1283 dev_dbg(dev
, "%s Receive Queue Critical Error detected\n",
1285 val
&= ~(PF_FW_ARQLEN_ARQVFE_M
| PF_FW_ARQLEN_ARQOVFL_M
|
1286 PF_FW_ARQLEN_ARQCRIT_M
);
1288 wr32(hw
, cq
->rq
.len
, val
);
1291 val
= rd32(hw
, cq
->sq
.len
);
1292 if (val
& (PF_FW_ATQLEN_ATQVFE_M
| PF_FW_ATQLEN_ATQOVFL_M
|
1293 PF_FW_ATQLEN_ATQCRIT_M
)) {
1295 if (val
& PF_FW_ATQLEN_ATQVFE_M
)
1296 dev_dbg(dev
, "%s Send Queue VF Error detected\n",
1298 if (val
& PF_FW_ATQLEN_ATQOVFL_M
) {
1299 dev_dbg(dev
, "%s Send Queue Overflow Error detected\n",
1302 if (val
& PF_FW_ATQLEN_ATQCRIT_M
)
1303 dev_dbg(dev
, "%s Send Queue Critical Error detected\n",
1305 val
&= ~(PF_FW_ATQLEN_ATQVFE_M
| PF_FW_ATQLEN_ATQOVFL_M
|
1306 PF_FW_ATQLEN_ATQCRIT_M
);
1308 wr32(hw
, cq
->sq
.len
, val
);
1311 event
.buf_len
= cq
->rq_buf_size
;
1312 event
.msg_buf
= kzalloc(event
.buf_len
, GFP_KERNEL
);
1317 enum ice_status ret
;
1320 ret
= ice_clean_rq_elem(hw
, cq
, &event
, &pending
);
1321 if (ret
== ICE_ERR_AQ_NO_WORK
)
1324 dev_err(dev
, "%s Receive Queue event error %s\n", qtype
,
1329 opcode
= le16_to_cpu(event
.desc
.opcode
);
1331 /* Notify any thread that might be waiting for this event */
1332 ice_aq_check_events(pf
, opcode
, &event
);
1335 case ice_aqc_opc_get_link_status
:
1336 if (ice_handle_link_event(pf
, &event
))
1337 dev_err(dev
, "Could not handle link event\n");
1339 case ice_aqc_opc_event_lan_overflow
:
1340 ice_vf_lan_overflow_event(pf
, &event
);
1342 case ice_mbx_opc_send_msg_to_pf
:
1343 if (!ice_is_malicious_vf(pf
, &event
, i
, pending
))
1344 ice_vc_process_vf_msg(pf
, &event
);
1346 case ice_aqc_opc_fw_logging
:
1347 ice_output_fw_log(hw
, &event
.desc
, event
.msg_buf
);
1349 case ice_aqc_opc_lldp_set_mib_change
:
1350 ice_dcb_process_lldp_set_mib_change(pf
, &event
);
1353 dev_dbg(dev
, "%s Receive Queue unknown event 0x%04x ignored\n",
1357 } while (pending
&& (i
++ < ICE_DFLT_IRQ_WORK
));
1359 kfree(event
.msg_buf
);
1361 return pending
&& (i
== ICE_DFLT_IRQ_WORK
);
1365 * ice_ctrlq_pending - check if there is a difference between ntc and ntu
1366 * @hw: pointer to hardware info
1367 * @cq: control queue information
1369 * returns true if there are pending messages in a queue, false if there aren't
1371 static bool ice_ctrlq_pending(struct ice_hw
*hw
, struct ice_ctl_q_info
*cq
)
1375 ntu
= (u16
)(rd32(hw
, cq
->rq
.head
) & cq
->rq
.head_mask
);
1376 return cq
->rq
.next_to_clean
!= ntu
;
1380 * ice_clean_adminq_subtask - clean the AdminQ rings
1381 * @pf: board private structure
1383 static void ice_clean_adminq_subtask(struct ice_pf
*pf
)
1385 struct ice_hw
*hw
= &pf
->hw
;
1387 if (!test_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
))
1390 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_ADMIN
))
1393 clear_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
);
1395 /* There might be a situation where new messages arrive to a control
1396 * queue between processing the last message and clearing the
1397 * EVENT_PENDING bit. So before exiting, check queue head again (using
1398 * ice_ctrlq_pending) and process new messages if any.
1400 if (ice_ctrlq_pending(hw
, &hw
->adminq
))
1401 __ice_clean_ctrlq(pf
, ICE_CTL_Q_ADMIN
);
1407 * ice_clean_mailboxq_subtask - clean the MailboxQ rings
1408 * @pf: board private structure
1410 static void ice_clean_mailboxq_subtask(struct ice_pf
*pf
)
1412 struct ice_hw
*hw
= &pf
->hw
;
1414 if (!test_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
))
1417 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_MAILBOX
))
1420 clear_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
);
1422 if (ice_ctrlq_pending(hw
, &hw
->mailboxq
))
1423 __ice_clean_ctrlq(pf
, ICE_CTL_Q_MAILBOX
);
1429 * ice_clean_sbq_subtask - clean the Sideband Queue rings
1430 * @pf: board private structure
1432 static void ice_clean_sbq_subtask(struct ice_pf
*pf
)
1434 struct ice_hw
*hw
= &pf
->hw
;
1436 /* Nothing to do here if sideband queue is not supported */
1437 if (!ice_is_sbq_supported(hw
)) {
1438 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
1442 if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
))
1445 if (__ice_clean_ctrlq(pf
, ICE_CTL_Q_SB
))
1448 clear_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
1450 if (ice_ctrlq_pending(hw
, &hw
->sbq
))
1451 __ice_clean_ctrlq(pf
, ICE_CTL_Q_SB
);
1457 * ice_service_task_schedule - schedule the service task to wake up
1458 * @pf: board private structure
1460 * If not already scheduled, this puts the task into the work queue.
1462 void ice_service_task_schedule(struct ice_pf
*pf
)
1464 if (!test_bit(ICE_SERVICE_DIS
, pf
->state
) &&
1465 !test_and_set_bit(ICE_SERVICE_SCHED
, pf
->state
) &&
1466 !test_bit(ICE_NEEDS_RESTART
, pf
->state
))
1467 queue_work(ice_wq
, &pf
->serv_task
);
1471 * ice_service_task_complete - finish up the service task
1472 * @pf: board private structure
1474 static void ice_service_task_complete(struct ice_pf
*pf
)
1476 WARN_ON(!test_bit(ICE_SERVICE_SCHED
, pf
->state
));
1478 /* force memory (pf->state) to sync before next service task */
1479 smp_mb__before_atomic();
1480 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
1484 * ice_service_task_stop - stop service task and cancel works
1485 * @pf: board private structure
1487 * Return 0 if the ICE_SERVICE_DIS bit was not already set,
1490 static int ice_service_task_stop(struct ice_pf
*pf
)
1494 ret
= test_and_set_bit(ICE_SERVICE_DIS
, pf
->state
);
1496 if (pf
->serv_tmr
.function
)
1497 del_timer_sync(&pf
->serv_tmr
);
1498 if (pf
->serv_task
.func
)
1499 cancel_work_sync(&pf
->serv_task
);
1501 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
1506 * ice_service_task_restart - restart service task and schedule works
1507 * @pf: board private structure
1509 * This function is needed for suspend and resume works (e.g WoL scenario)
1511 static void ice_service_task_restart(struct ice_pf
*pf
)
1513 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
1514 ice_service_task_schedule(pf
);
1518 * ice_service_timer - timer callback to schedule service task
1519 * @t: pointer to timer_list
1521 static void ice_service_timer(struct timer_list
*t
)
1523 struct ice_pf
*pf
= from_timer(pf
, t
, serv_tmr
);
1525 mod_timer(&pf
->serv_tmr
, round_jiffies(pf
->serv_tmr_period
+ jiffies
));
1526 ice_service_task_schedule(pf
);
1530 * ice_handle_mdd_event - handle malicious driver detect event
1531 * @pf: pointer to the PF structure
1533 * Called from service task. OICR interrupt handler indicates MDD event.
1534 * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log
1535 * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events
1536 * disable the queue, the PF can be configured to reset the VF using ethtool
1537 * private flag mdd-auto-reset-vf.
1539 static void ice_handle_mdd_event(struct ice_pf
*pf
)
1541 struct device
*dev
= ice_pf_to_dev(pf
);
1542 struct ice_hw
*hw
= &pf
->hw
;
1546 if (!test_and_clear_bit(ICE_MDD_EVENT_PENDING
, pf
->state
)) {
1547 /* Since the VF MDD event logging is rate limited, check if
1548 * there are pending MDD events.
1550 ice_print_vfs_mdd_events(pf
);
1554 /* find what triggered an MDD event */
1555 reg
= rd32(hw
, GL_MDET_TX_PQM
);
1556 if (reg
& GL_MDET_TX_PQM_VALID_M
) {
1557 u8 pf_num
= (reg
& GL_MDET_TX_PQM_PF_NUM_M
) >>
1558 GL_MDET_TX_PQM_PF_NUM_S
;
1559 u16 vf_num
= (reg
& GL_MDET_TX_PQM_VF_NUM_M
) >>
1560 GL_MDET_TX_PQM_VF_NUM_S
;
1561 u8 event
= (reg
& GL_MDET_TX_PQM_MAL_TYPE_M
) >>
1562 GL_MDET_TX_PQM_MAL_TYPE_S
;
1563 u16 queue
= ((reg
& GL_MDET_TX_PQM_QNUM_M
) >>
1564 GL_MDET_TX_PQM_QNUM_S
);
1566 if (netif_msg_tx_err(pf
))
1567 dev_info(dev
, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1568 event
, queue
, pf_num
, vf_num
);
1569 wr32(hw
, GL_MDET_TX_PQM
, 0xffffffff);
1572 reg
= rd32(hw
, GL_MDET_TX_TCLAN
);
1573 if (reg
& GL_MDET_TX_TCLAN_VALID_M
) {
1574 u8 pf_num
= (reg
& GL_MDET_TX_TCLAN_PF_NUM_M
) >>
1575 GL_MDET_TX_TCLAN_PF_NUM_S
;
1576 u16 vf_num
= (reg
& GL_MDET_TX_TCLAN_VF_NUM_M
) >>
1577 GL_MDET_TX_TCLAN_VF_NUM_S
;
1578 u8 event
= (reg
& GL_MDET_TX_TCLAN_MAL_TYPE_M
) >>
1579 GL_MDET_TX_TCLAN_MAL_TYPE_S
;
1580 u16 queue
= ((reg
& GL_MDET_TX_TCLAN_QNUM_M
) >>
1581 GL_MDET_TX_TCLAN_QNUM_S
);
1583 if (netif_msg_tx_err(pf
))
1584 dev_info(dev
, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n",
1585 event
, queue
, pf_num
, vf_num
);
1586 wr32(hw
, GL_MDET_TX_TCLAN
, 0xffffffff);
1589 reg
= rd32(hw
, GL_MDET_RX
);
1590 if (reg
& GL_MDET_RX_VALID_M
) {
1591 u8 pf_num
= (reg
& GL_MDET_RX_PF_NUM_M
) >>
1592 GL_MDET_RX_PF_NUM_S
;
1593 u16 vf_num
= (reg
& GL_MDET_RX_VF_NUM_M
) >>
1594 GL_MDET_RX_VF_NUM_S
;
1595 u8 event
= (reg
& GL_MDET_RX_MAL_TYPE_M
) >>
1596 GL_MDET_RX_MAL_TYPE_S
;
1597 u16 queue
= ((reg
& GL_MDET_RX_QNUM_M
) >>
1600 if (netif_msg_rx_err(pf
))
1601 dev_info(dev
, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n",
1602 event
, queue
, pf_num
, vf_num
);
1603 wr32(hw
, GL_MDET_RX
, 0xffffffff);
1606 /* check to see if this PF caused an MDD event */
1607 reg
= rd32(hw
, PF_MDET_TX_PQM
);
1608 if (reg
& PF_MDET_TX_PQM_VALID_M
) {
1609 wr32(hw
, PF_MDET_TX_PQM
, 0xFFFF);
1610 if (netif_msg_tx_err(pf
))
1611 dev_info(dev
, "Malicious Driver Detection event TX_PQM detected on PF\n");
1614 reg
= rd32(hw
, PF_MDET_TX_TCLAN
);
1615 if (reg
& PF_MDET_TX_TCLAN_VALID_M
) {
1616 wr32(hw
, PF_MDET_TX_TCLAN
, 0xFFFF);
1617 if (netif_msg_tx_err(pf
))
1618 dev_info(dev
, "Malicious Driver Detection event TX_TCLAN detected on PF\n");
1621 reg
= rd32(hw
, PF_MDET_RX
);
1622 if (reg
& PF_MDET_RX_VALID_M
) {
1623 wr32(hw
, PF_MDET_RX
, 0xFFFF);
1624 if (netif_msg_rx_err(pf
))
1625 dev_info(dev
, "Malicious Driver Detection event RX detected on PF\n");
1628 /* Check to see if one of the VFs caused an MDD event, and then
1629 * increment counters and set print pending
1631 ice_for_each_vf(pf
, i
) {
1632 struct ice_vf
*vf
= &pf
->vf
[i
];
1634 reg
= rd32(hw
, VP_MDET_TX_PQM(i
));
1635 if (reg
& VP_MDET_TX_PQM_VALID_M
) {
1636 wr32(hw
, VP_MDET_TX_PQM(i
), 0xFFFF);
1637 vf
->mdd_tx_events
.count
++;
1638 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1639 if (netif_msg_tx_err(pf
))
1640 dev_info(dev
, "Malicious Driver Detection event TX_PQM detected on VF %d\n",
1644 reg
= rd32(hw
, VP_MDET_TX_TCLAN(i
));
1645 if (reg
& VP_MDET_TX_TCLAN_VALID_M
) {
1646 wr32(hw
, VP_MDET_TX_TCLAN(i
), 0xFFFF);
1647 vf
->mdd_tx_events
.count
++;
1648 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1649 if (netif_msg_tx_err(pf
))
1650 dev_info(dev
, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n",
1654 reg
= rd32(hw
, VP_MDET_TX_TDPU(i
));
1655 if (reg
& VP_MDET_TX_TDPU_VALID_M
) {
1656 wr32(hw
, VP_MDET_TX_TDPU(i
), 0xFFFF);
1657 vf
->mdd_tx_events
.count
++;
1658 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1659 if (netif_msg_tx_err(pf
))
1660 dev_info(dev
, "Malicious Driver Detection event TX_TDPU detected on VF %d\n",
1664 reg
= rd32(hw
, VP_MDET_RX(i
));
1665 if (reg
& VP_MDET_RX_VALID_M
) {
1666 wr32(hw
, VP_MDET_RX(i
), 0xFFFF);
1667 vf
->mdd_rx_events
.count
++;
1668 set_bit(ICE_MDD_VF_PRINT_PENDING
, pf
->state
);
1669 if (netif_msg_rx_err(pf
))
1670 dev_info(dev
, "Malicious Driver Detection event RX detected on VF %d\n",
1673 /* Since the queue is disabled on VF Rx MDD events, the
1674 * PF can be configured to reset the VF through ethtool
1675 * private flag mdd-auto-reset-vf.
1677 if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF
, pf
->flags
)) {
1678 /* VF MDD event counters will be cleared by
1679 * reset, so print the event prior to reset.
1681 ice_print_vf_rx_mdd_event(vf
);
1682 mutex_lock(&pf
->vf
[i
].cfg_lock
);
1683 ice_reset_vf(&pf
->vf
[i
], false);
1684 mutex_unlock(&pf
->vf
[i
].cfg_lock
);
1689 ice_print_vfs_mdd_events(pf
);
1693 * ice_force_phys_link_state - Force the physical link state
1694 * @vsi: VSI to force the physical link state to up/down
1695 * @link_up: true/false indicates to set the physical link to up/down
1697 * Force the physical link state by getting the current PHY capabilities from
1698 * hardware and setting the PHY config based on the determined capabilities. If
1699 * link changes a link event will be triggered because both the Enable Automatic
1700 * Link Update and LESM Enable bits are set when setting the PHY capabilities.
1702 * Returns 0 on success, negative on failure
1704 static int ice_force_phys_link_state(struct ice_vsi
*vsi
, bool link_up
)
1706 struct ice_aqc_get_phy_caps_data
*pcaps
;
1707 struct ice_aqc_set_phy_cfg_data
*cfg
;
1708 struct ice_port_info
*pi
;
1712 if (!vsi
|| !vsi
->port_info
|| !vsi
->back
)
1714 if (vsi
->type
!= ICE_VSI_PF
)
1717 dev
= ice_pf_to_dev(vsi
->back
);
1719 pi
= vsi
->port_info
;
1721 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
1725 retcode
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_ACTIVE_CFG
, pcaps
,
1728 dev_err(dev
, "Failed to get phy capabilities, VSI %d error %d\n",
1729 vsi
->vsi_num
, retcode
);
1734 /* No change in link */
1735 if (link_up
== !!(pcaps
->caps
& ICE_AQC_PHY_EN_LINK
) &&
1736 link_up
== !!(pi
->phy
.link_info
.link_info
& ICE_AQ_LINK_UP
))
1739 /* Use the current user PHY configuration. The current user PHY
1740 * configuration is initialized during probe from PHY capabilities
1741 * software mode, and updated on set PHY configuration.
1743 cfg
= kmemdup(&pi
->phy
.curr_user_phy_cfg
, sizeof(*cfg
), GFP_KERNEL
);
1749 cfg
->caps
|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT
;
1751 cfg
->caps
|= ICE_AQ_PHY_ENA_LINK
;
1753 cfg
->caps
&= ~ICE_AQ_PHY_ENA_LINK
;
1755 retcode
= ice_aq_set_phy_cfg(&vsi
->back
->hw
, pi
, cfg
, NULL
);
1757 dev_err(dev
, "Failed to set phy config, VSI %d error %d\n",
1758 vsi
->vsi_num
, retcode
);
1769 * ice_init_nvm_phy_type - Initialize the NVM PHY type
1770 * @pi: port info structure
1772 * Initialize nvm_phy_type_[low|high] for link lenient mode support
1774 static int ice_init_nvm_phy_type(struct ice_port_info
*pi
)
1776 struct ice_aqc_get_phy_caps_data
*pcaps
;
1777 struct ice_pf
*pf
= pi
->hw
->back
;
1778 enum ice_status status
;
1781 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
1785 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA
, pcaps
,
1789 dev_err(ice_pf_to_dev(pf
), "Get PHY capability failed.\n");
1794 pf
->nvm_phy_type_hi
= pcaps
->phy_type_high
;
1795 pf
->nvm_phy_type_lo
= pcaps
->phy_type_low
;
1803 * ice_init_link_dflt_override - Initialize link default override
1804 * @pi: port info structure
1806 * Initialize link default override and PHY total port shutdown during probe
1808 static void ice_init_link_dflt_override(struct ice_port_info
*pi
)
1810 struct ice_link_default_override_tlv
*ldo
;
1811 struct ice_pf
*pf
= pi
->hw
->back
;
1813 ldo
= &pf
->link_dflt_override
;
1814 if (ice_get_link_default_override(ldo
, pi
))
1817 if (!(ldo
->options
& ICE_LINK_OVERRIDE_PORT_DIS
))
1820 /* Enable Total Port Shutdown (override/replace link-down-on-close
1821 * ethtool private flag) for ports with Port Disable bit set.
1823 set_bit(ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA
, pf
->flags
);
1824 set_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
);
1828 * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
1829 * @pi: port info structure
1831 * If default override is enabled, initialize the user PHY cfg speed and FEC
1832 * settings using the default override mask from the NVM.
1834 * The PHY should only be configured with the default override settings the
1835 * first time media is available. The ICE_LINK_DEFAULT_OVERRIDE_PENDING state
1836 * is used to indicate that the user PHY cfg default override is initialized
1837 * and the PHY has not been configured with the default override settings. The
1838 * state is set here, and cleared in ice_configure_phy the first time the PHY is
1841 * This function should be called only if the FW doesn't support default
1842 * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
1844 static void ice_init_phy_cfg_dflt_override(struct ice_port_info
*pi
)
1846 struct ice_link_default_override_tlv
*ldo
;
1847 struct ice_aqc_set_phy_cfg_data
*cfg
;
1848 struct ice_phy_info
*phy
= &pi
->phy
;
1849 struct ice_pf
*pf
= pi
->hw
->back
;
1851 ldo
= &pf
->link_dflt_override
;
1853 /* If link default override is enabled, use to mask NVM PHY capabilities
1854 * for speed and FEC default configuration.
1856 cfg
= &phy
->curr_user_phy_cfg
;
1858 if (ldo
->phy_type_low
|| ldo
->phy_type_high
) {
1859 cfg
->phy_type_low
= pf
->nvm_phy_type_lo
&
1860 cpu_to_le64(ldo
->phy_type_low
);
1861 cfg
->phy_type_high
= pf
->nvm_phy_type_hi
&
1862 cpu_to_le64(ldo
->phy_type_high
);
1864 cfg
->link_fec_opt
= ldo
->fec_options
;
1865 phy
->curr_user_fec_req
= ICE_FEC_AUTO
;
1867 set_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING
, pf
->state
);
1871 * ice_init_phy_user_cfg - Initialize the PHY user configuration
1872 * @pi: port info structure
1874 * Initialize the current user PHY configuration, speed, FEC, and FC requested
1875 * mode to default. The PHY defaults are from get PHY capabilities topology
1876 * with media so call when media is first available. An error is returned if
1877 * called when media is not available. The PHY initialization completed state is
1880 * These configurations are used when setting PHY
1881 * configuration. The user PHY configuration is updated on set PHY
1882 * configuration. Returns 0 on success, negative on failure
1884 static int ice_init_phy_user_cfg(struct ice_port_info
*pi
)
1886 struct ice_aqc_get_phy_caps_data
*pcaps
;
1887 struct ice_phy_info
*phy
= &pi
->phy
;
1888 struct ice_pf
*pf
= pi
->hw
->back
;
1889 enum ice_status status
;
1892 if (!(phy
->link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
))
1895 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
1899 if (ice_fw_supports_report_dflt_cfg(pi
->hw
))
1900 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_DFLT_CFG
,
1903 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA
,
1906 dev_err(ice_pf_to_dev(pf
), "Get PHY capability failed.\n");
1911 ice_copy_phy_caps_to_cfg(pi
, pcaps
, &pi
->phy
.curr_user_phy_cfg
);
1913 /* check if lenient mode is supported and enabled */
1914 if (ice_fw_supports_link_override(pi
->hw
) &&
1915 !(pcaps
->module_compliance_enforcement
&
1916 ICE_AQC_MOD_ENFORCE_STRICT_MODE
)) {
1917 set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA
, pf
->flags
);
1919 /* if the FW supports default PHY configuration mode, then the driver
1920 * does not have to apply link override settings. If not,
1921 * initialize user PHY configuration with link override values
1923 if (!ice_fw_supports_report_dflt_cfg(pi
->hw
) &&
1924 (pf
->link_dflt_override
.options
& ICE_LINK_OVERRIDE_EN
)) {
1925 ice_init_phy_cfg_dflt_override(pi
);
1930 /* if link default override is not enabled, set user flow control and
1931 * FEC settings based on what get_phy_caps returned
1933 phy
->curr_user_fec_req
= ice_caps_to_fec_mode(pcaps
->caps
,
1934 pcaps
->link_fec_options
);
1935 phy
->curr_user_fc_req
= ice_caps_to_fc_mode(pcaps
->caps
);
1938 phy
->curr_user_speed_req
= ICE_AQ_LINK_SPEED_M
;
1939 set_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
);
1946 * ice_configure_phy - configure PHY
1949 * Set the PHY configuration. If the current PHY configuration is the same as
1950 * the curr_user_phy_cfg, then do nothing to avoid link flap. Otherwise
1951 * configure the based get PHY capabilities for topology with media.
1953 static int ice_configure_phy(struct ice_vsi
*vsi
)
1955 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
1956 struct ice_port_info
*pi
= vsi
->port_info
;
1957 struct ice_aqc_get_phy_caps_data
*pcaps
;
1958 struct ice_aqc_set_phy_cfg_data
*cfg
;
1959 struct ice_phy_info
*phy
= &pi
->phy
;
1960 struct ice_pf
*pf
= vsi
->back
;
1961 enum ice_status status
;
1964 /* Ensure we have media as we cannot configure a medialess port */
1965 if (!(phy
->link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
))
1968 ice_print_topo_conflict(vsi
);
1970 if (phy
->link_info
.topo_media_conflict
== ICE_AQ_LINK_TOPO_UNSUPP_MEDIA
)
1973 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
))
1974 return ice_force_phys_link_state(vsi
, true);
1976 pcaps
= kzalloc(sizeof(*pcaps
), GFP_KERNEL
);
1980 /* Get current PHY config */
1981 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_ACTIVE_CFG
, pcaps
,
1984 dev_err(dev
, "Failed to get PHY configuration, VSI %d error %s\n",
1985 vsi
->vsi_num
, ice_stat_str(status
));
1990 /* If PHY enable link is configured and configuration has not changed,
1991 * there's nothing to do
1993 if (pcaps
->caps
& ICE_AQC_PHY_EN_LINK
&&
1994 ice_phy_caps_equals_cfg(pcaps
, &phy
->curr_user_phy_cfg
))
1997 /* Use PHY topology as baseline for configuration */
1998 memset(pcaps
, 0, sizeof(*pcaps
));
1999 if (ice_fw_supports_report_dflt_cfg(pi
->hw
))
2000 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_DFLT_CFG
,
2003 status
= ice_aq_get_phy_caps(pi
, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA
,
2006 dev_err(dev
, "Failed to get PHY caps, VSI %d error %s\n",
2007 vsi
->vsi_num
, ice_stat_str(status
));
2012 cfg
= kzalloc(sizeof(*cfg
), GFP_KERNEL
);
2018 ice_copy_phy_caps_to_cfg(pi
, pcaps
, cfg
);
2020 /* Speed - If default override pending, use curr_user_phy_cfg set in
2021 * ice_init_phy_user_cfg_ldo.
2023 if (test_and_clear_bit(ICE_LINK_DEFAULT_OVERRIDE_PENDING
,
2024 vsi
->back
->state
)) {
2025 cfg
->phy_type_low
= phy
->curr_user_phy_cfg
.phy_type_low
;
2026 cfg
->phy_type_high
= phy
->curr_user_phy_cfg
.phy_type_high
;
2028 u64 phy_low
= 0, phy_high
= 0;
2030 ice_update_phy_type(&phy_low
, &phy_high
,
2031 pi
->phy
.curr_user_speed_req
);
2032 cfg
->phy_type_low
= pcaps
->phy_type_low
& cpu_to_le64(phy_low
);
2033 cfg
->phy_type_high
= pcaps
->phy_type_high
&
2034 cpu_to_le64(phy_high
);
2037 /* Can't provide what was requested; use PHY capabilities */
2038 if (!cfg
->phy_type_low
&& !cfg
->phy_type_high
) {
2039 cfg
->phy_type_low
= pcaps
->phy_type_low
;
2040 cfg
->phy_type_high
= pcaps
->phy_type_high
;
2044 ice_cfg_phy_fec(pi
, cfg
, phy
->curr_user_fec_req
);
2046 /* Can't provide what was requested; use PHY capabilities */
2047 if (cfg
->link_fec_opt
!=
2048 (cfg
->link_fec_opt
& pcaps
->link_fec_options
)) {
2049 cfg
->caps
|= pcaps
->caps
& ICE_AQC_PHY_EN_AUTO_FEC
;
2050 cfg
->link_fec_opt
= pcaps
->link_fec_options
;
2053 /* Flow Control - always supported; no need to check against
2056 ice_cfg_phy_fc(pi
, cfg
, phy
->curr_user_fc_req
);
2058 /* Enable link and link update */
2059 cfg
->caps
|= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT
| ICE_AQ_PHY_ENA_LINK
;
2061 status
= ice_aq_set_phy_cfg(&pf
->hw
, pi
, cfg
, NULL
);
2063 dev_err(dev
, "Failed to set phy config, VSI %d error %s\n",
2064 vsi
->vsi_num
, ice_stat_str(status
));
2075 * ice_check_media_subtask - Check for media
2076 * @pf: pointer to PF struct
2078 * If media is available, then initialize PHY user configuration if it is not
2079 * been, and configure the PHY if the interface is up.
2081 static void ice_check_media_subtask(struct ice_pf
*pf
)
2083 struct ice_port_info
*pi
;
2084 struct ice_vsi
*vsi
;
2087 /* No need to check for media if it's already present */
2088 if (!test_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
))
2091 vsi
= ice_get_main_vsi(pf
);
2095 /* Refresh link info and check if media is present */
2096 pi
= vsi
->port_info
;
2097 err
= ice_update_link_info(pi
);
2101 ice_check_module_power(pf
, pi
->phy
.link_info
.link_cfg_err
);
2103 if (pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
) {
2104 if (!test_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
))
2105 ice_init_phy_user_cfg(pi
);
2107 /* PHY settings are reset on media insertion, reconfigure
2108 * PHY to preserve settings.
2110 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) &&
2111 test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, vsi
->back
->flags
))
2114 err
= ice_configure_phy(vsi
);
2116 clear_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
2118 /* A Link Status Event will be generated; the event handler
2119 * will complete bringing the interface up
2125 * ice_service_task - manage and run subtasks
2126 * @work: pointer to work_struct contained by the PF struct
2128 static void ice_service_task(struct work_struct
*work
)
2130 struct ice_pf
*pf
= container_of(work
, struct ice_pf
, serv_task
);
2131 unsigned long start_time
= jiffies
;
2135 /* process reset requests first */
2136 ice_reset_subtask(pf
);
2138 /* bail if a reset/recovery cycle is pending or rebuild failed */
2139 if (ice_is_reset_in_progress(pf
->state
) ||
2140 test_bit(ICE_SUSPENDED
, pf
->state
) ||
2141 test_bit(ICE_NEEDS_RESTART
, pf
->state
)) {
2142 ice_service_task_complete(pf
);
2146 if (test_and_clear_bit(ICE_AUX_ERR_PENDING
, pf
->state
)) {
2147 struct iidc_event
*event
;
2149 event
= kzalloc(sizeof(*event
), GFP_KERNEL
);
2151 set_bit(IIDC_EVENT_CRIT_ERR
, event
->type
);
2152 /* report the entire OICR value to AUX driver */
2153 swap(event
->reg
, pf
->oicr_err_reg
);
2154 ice_send_event_to_aux(pf
, event
);
2159 if (test_bit(ICE_FLAG_PLUG_AUX_DEV
, pf
->flags
)) {
2160 /* Plug aux device per request */
2161 ice_plug_aux_dev(pf
);
2163 /* Mark plugging as done but check whether unplug was
2164 * requested during ice_plug_aux_dev() call
2165 * (e.g. from ice_clear_rdma_cap()) and if so then
2168 if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV
, pf
->flags
))
2169 ice_unplug_aux_dev(pf
);
2172 if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED
, pf
->flags
)) {
2173 struct iidc_event
*event
;
2175 event
= kzalloc(sizeof(*event
), GFP_KERNEL
);
2177 set_bit(IIDC_EVENT_AFTER_MTU_CHANGE
, event
->type
);
2178 ice_send_event_to_aux(pf
, event
);
2183 ice_clean_adminq_subtask(pf
);
2184 ice_check_media_subtask(pf
);
2185 ice_check_for_hang_subtask(pf
);
2186 ice_sync_fltr_subtask(pf
);
2187 ice_handle_mdd_event(pf
);
2188 ice_watchdog_subtask(pf
);
2190 if (ice_is_safe_mode(pf
)) {
2191 ice_service_task_complete(pf
);
2195 ice_process_vflr_event(pf
);
2196 ice_clean_mailboxq_subtask(pf
);
2197 ice_clean_sbq_subtask(pf
);
2198 ice_sync_arfs_fltrs(pf
);
2199 ice_flush_fdir_ctx(pf
);
2201 /* Clear ICE_SERVICE_SCHED flag to allow scheduling next event */
2202 ice_service_task_complete(pf
);
2204 /* If the tasks have taken longer than one service timer period
2205 * or there is more work to be done, reset the service timer to
2206 * schedule the service task now.
2208 if (time_after(jiffies
, (start_time
+ pf
->serv_tmr_period
)) ||
2209 test_bit(ICE_MDD_EVENT_PENDING
, pf
->state
) ||
2210 test_bit(ICE_VFLR_EVENT_PENDING
, pf
->state
) ||
2211 test_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
) ||
2212 test_bit(ICE_FD_VF_FLUSH_CTX
, pf
->state
) ||
2213 test_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
) ||
2214 test_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
))
2215 mod_timer(&pf
->serv_tmr
, jiffies
);
2219 * ice_set_ctrlq_len - helper function to set controlq length
2220 * @hw: pointer to the HW instance
2222 static void ice_set_ctrlq_len(struct ice_hw
*hw
)
2224 hw
->adminq
.num_rq_entries
= ICE_AQ_LEN
;
2225 hw
->adminq
.num_sq_entries
= ICE_AQ_LEN
;
2226 hw
->adminq
.rq_buf_size
= ICE_AQ_MAX_BUF_LEN
;
2227 hw
->adminq
.sq_buf_size
= ICE_AQ_MAX_BUF_LEN
;
2228 hw
->mailboxq
.num_rq_entries
= PF_MBX_ARQLEN_ARQLEN_M
;
2229 hw
->mailboxq
.num_sq_entries
= ICE_MBXSQ_LEN
;
2230 hw
->mailboxq
.rq_buf_size
= ICE_MBXQ_MAX_BUF_LEN
;
2231 hw
->mailboxq
.sq_buf_size
= ICE_MBXQ_MAX_BUF_LEN
;
2232 hw
->sbq
.num_rq_entries
= ICE_SBQ_LEN
;
2233 hw
->sbq
.num_sq_entries
= ICE_SBQ_LEN
;
2234 hw
->sbq
.rq_buf_size
= ICE_SBQ_MAX_BUF_LEN
;
2235 hw
->sbq
.sq_buf_size
= ICE_SBQ_MAX_BUF_LEN
;
2239 * ice_schedule_reset - schedule a reset
2240 * @pf: board private structure
2241 * @reset: reset being requested
2243 int ice_schedule_reset(struct ice_pf
*pf
, enum ice_reset_req reset
)
2245 struct device
*dev
= ice_pf_to_dev(pf
);
2247 /* bail out if earlier reset has failed */
2248 if (test_bit(ICE_RESET_FAILED
, pf
->state
)) {
2249 dev_dbg(dev
, "earlier reset has failed\n");
2252 /* bail if reset/recovery already in progress */
2253 if (ice_is_reset_in_progress(pf
->state
)) {
2254 dev_dbg(dev
, "Reset already in progress\n");
2258 ice_unplug_aux_dev(pf
);
2262 set_bit(ICE_PFR_REQ
, pf
->state
);
2264 case ICE_RESET_CORER
:
2265 set_bit(ICE_CORER_REQ
, pf
->state
);
2267 case ICE_RESET_GLOBR
:
2268 set_bit(ICE_GLOBR_REQ
, pf
->state
);
2274 ice_service_task_schedule(pf
);
2279 * ice_irq_affinity_notify - Callback for affinity changes
2280 * @notify: context as to what irq was changed
2281 * @mask: the new affinity mask
2283 * This is a callback function used by the irq_set_affinity_notifier function
2284 * so that we may register to receive changes to the irq affinity masks.
2287 ice_irq_affinity_notify(struct irq_affinity_notify
*notify
,
2288 const cpumask_t
*mask
)
2290 struct ice_q_vector
*q_vector
=
2291 container_of(notify
, struct ice_q_vector
, affinity_notify
);
2293 cpumask_copy(&q_vector
->affinity_mask
, mask
);
2297 * ice_irq_affinity_release - Callback for affinity notifier release
2298 * @ref: internal core kernel usage
2300 * This is a callback function used by the irq_set_affinity_notifier function
2301 * to inform the current notification subscriber that they will no longer
2302 * receive notifications.
2304 static void ice_irq_affinity_release(struct kref __always_unused
*ref
) {}
2307 * ice_vsi_ena_irq - Enable IRQ for the given VSI
2308 * @vsi: the VSI being configured
2310 static int ice_vsi_ena_irq(struct ice_vsi
*vsi
)
2312 struct ice_hw
*hw
= &vsi
->back
->hw
;
2315 ice_for_each_q_vector(vsi
, i
)
2316 ice_irq_dynamic_ena(hw
, vsi
, vsi
->q_vectors
[i
]);
2323 * ice_vsi_req_irq_msix - get MSI-X vectors from the OS for the VSI
2324 * @vsi: the VSI being configured
2325 * @basename: name for the vector
2327 static int ice_vsi_req_irq_msix(struct ice_vsi
*vsi
, char *basename
)
2329 int q_vectors
= vsi
->num_q_vectors
;
2330 struct ice_pf
*pf
= vsi
->back
;
2331 int base
= vsi
->base_vector
;
2338 dev
= ice_pf_to_dev(pf
);
2339 for (vector
= 0; vector
< q_vectors
; vector
++) {
2340 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[vector
];
2342 irq_num
= pf
->msix_entries
[base
+ vector
].vector
;
2344 if (q_vector
->tx
.ring
&& q_vector
->rx
.ring
) {
2345 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2346 "%s-%s-%d", basename
, "TxRx", rx_int_idx
++);
2348 } else if (q_vector
->rx
.ring
) {
2349 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2350 "%s-%s-%d", basename
, "rx", rx_int_idx
++);
2351 } else if (q_vector
->tx
.ring
) {
2352 snprintf(q_vector
->name
, sizeof(q_vector
->name
) - 1,
2353 "%s-%s-%d", basename
, "tx", tx_int_idx
++);
2355 /* skip this unused q_vector */
2358 if (vsi
->type
== ICE_VSI_CTRL
&& vsi
->vf_id
!= ICE_INVAL_VFID
)
2359 err
= devm_request_irq(dev
, irq_num
, vsi
->irq_handler
,
2360 IRQF_SHARED
, q_vector
->name
,
2363 err
= devm_request_irq(dev
, irq_num
, vsi
->irq_handler
,
2364 0, q_vector
->name
, q_vector
);
2366 netdev_err(vsi
->netdev
, "MSIX request_irq failed, error: %d\n",
2371 /* register for affinity change notifications */
2372 if (!IS_ENABLED(CONFIG_RFS_ACCEL
)) {
2373 struct irq_affinity_notify
*affinity_notify
;
2375 affinity_notify
= &q_vector
->affinity_notify
;
2376 affinity_notify
->notify
= ice_irq_affinity_notify
;
2377 affinity_notify
->release
= ice_irq_affinity_release
;
2378 irq_set_affinity_notifier(irq_num
, affinity_notify
);
2381 /* assign the mask for this irq */
2382 irq_set_affinity_hint(irq_num
, &q_vector
->affinity_mask
);
2385 vsi
->irqs_ready
= true;
2391 irq_num
= pf
->msix_entries
[base
+ vector
].vector
;
2392 if (!IS_ENABLED(CONFIG_RFS_ACCEL
))
2393 irq_set_affinity_notifier(irq_num
, NULL
);
2394 irq_set_affinity_hint(irq_num
, NULL
);
2395 devm_free_irq(dev
, irq_num
, &vsi
->q_vectors
[vector
]);
2401 * ice_xdp_alloc_setup_rings - Allocate and setup Tx rings for XDP
2402 * @vsi: VSI to setup Tx rings used by XDP
2404 * Return 0 on success and negative value on error
2406 static int ice_xdp_alloc_setup_rings(struct ice_vsi
*vsi
)
2408 struct device
*dev
= ice_pf_to_dev(vsi
->back
);
2411 for (i
= 0; i
< vsi
->num_xdp_txq
; i
++) {
2412 u16 xdp_q_idx
= vsi
->alloc_txq
+ i
;
2413 struct ice_ring
*xdp_ring
;
2415 xdp_ring
= kzalloc(sizeof(*xdp_ring
), GFP_KERNEL
);
2418 goto free_xdp_rings
;
2420 xdp_ring
->q_index
= xdp_q_idx
;
2421 xdp_ring
->reg_idx
= vsi
->txq_map
[xdp_q_idx
];
2422 xdp_ring
->ring_active
= false;
2423 xdp_ring
->vsi
= vsi
;
2424 xdp_ring
->netdev
= NULL
;
2425 xdp_ring
->dev
= dev
;
2426 xdp_ring
->count
= vsi
->num_tx_desc
;
2427 WRITE_ONCE(vsi
->xdp_rings
[i
], xdp_ring
);
2428 if (ice_setup_tx_ring(xdp_ring
))
2429 goto free_xdp_rings
;
2430 ice_set_ring_xdp(xdp_ring
);
2431 xdp_ring
->xsk_pool
= ice_xsk_pool(xdp_ring
);
2438 if (vsi
->xdp_rings
[i
] && vsi
->xdp_rings
[i
]->desc
)
2439 ice_free_tx_ring(vsi
->xdp_rings
[i
]);
2444 * ice_vsi_assign_bpf_prog - set or clear bpf prog pointer on VSI
2445 * @vsi: VSI to set the bpf prog on
2446 * @prog: the bpf prog pointer
2448 static void ice_vsi_assign_bpf_prog(struct ice_vsi
*vsi
, struct bpf_prog
*prog
)
2450 struct bpf_prog
*old_prog
;
2453 old_prog
= xchg(&vsi
->xdp_prog
, prog
);
2455 bpf_prog_put(old_prog
);
2457 ice_for_each_rxq(vsi
, i
)
2458 WRITE_ONCE(vsi
->rx_rings
[i
]->xdp_prog
, vsi
->xdp_prog
);
2462 * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP
2463 * @vsi: VSI to bring up Tx rings used by XDP
2464 * @prog: bpf program that will be assigned to VSI
2466 * Return 0 on success and negative value on error
2468 int ice_prepare_xdp_rings(struct ice_vsi
*vsi
, struct bpf_prog
*prog
)
2470 u16 max_txqs
[ICE_MAX_TRAFFIC_CLASS
] = { 0 };
2471 int xdp_rings_rem
= vsi
->num_xdp_txq
;
2472 struct ice_pf
*pf
= vsi
->back
;
2473 struct ice_qs_cfg xdp_qs_cfg
= {
2474 .qs_mutex
= &pf
->avail_q_mutex
,
2475 .pf_map
= pf
->avail_txqs
,
2476 .pf_map_size
= pf
->max_pf_txqs
,
2477 .q_count
= vsi
->num_xdp_txq
,
2478 .scatter_count
= ICE_MAX_SCATTER_TXQS
,
2479 .vsi_map
= vsi
->txq_map
,
2480 .vsi_map_offset
= vsi
->alloc_txq
,
2481 .mapping_mode
= ICE_VSI_MAP_CONTIG
2483 enum ice_status status
;
2487 dev
= ice_pf_to_dev(pf
);
2488 vsi
->xdp_rings
= devm_kcalloc(dev
, vsi
->num_xdp_txq
,
2489 sizeof(*vsi
->xdp_rings
), GFP_KERNEL
);
2490 if (!vsi
->xdp_rings
)
2493 vsi
->xdp_mapping_mode
= xdp_qs_cfg
.mapping_mode
;
2494 if (__ice_vsi_get_qs(&xdp_qs_cfg
))
2497 if (ice_xdp_alloc_setup_rings(vsi
))
2498 goto clear_xdp_rings
;
2500 /* follow the logic from ice_vsi_map_rings_to_vectors */
2501 ice_for_each_q_vector(vsi
, v_idx
) {
2502 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[v_idx
];
2503 int xdp_rings_per_v
, q_id
, q_base
;
2505 xdp_rings_per_v
= DIV_ROUND_UP(xdp_rings_rem
,
2506 vsi
->num_q_vectors
- v_idx
);
2507 q_base
= vsi
->num_xdp_txq
- xdp_rings_rem
;
2509 for (q_id
= q_base
; q_id
< (q_base
+ xdp_rings_per_v
); q_id
++) {
2510 struct ice_ring
*xdp_ring
= vsi
->xdp_rings
[q_id
];
2512 xdp_ring
->q_vector
= q_vector
;
2513 xdp_ring
->next
= q_vector
->tx
.ring
;
2514 q_vector
->tx
.ring
= xdp_ring
;
2516 xdp_rings_rem
-= xdp_rings_per_v
;
2519 /* omit the scheduler update if in reset path; XDP queues will be
2520 * taken into account at the end of ice_vsi_rebuild, where
2521 * ice_cfg_vsi_lan is being called
2523 if (ice_is_reset_in_progress(pf
->state
))
2526 /* tell the Tx scheduler that right now we have
2529 for (i
= 0; i
< vsi
->tc_cfg
.numtc
; i
++)
2530 max_txqs
[i
] = vsi
->num_txq
+ vsi
->num_xdp_txq
;
2532 status
= ice_cfg_vsi_lan(vsi
->port_info
, vsi
->idx
, vsi
->tc_cfg
.ena_tc
,
2535 dev_err(dev
, "Failed VSI LAN queue config for XDP, error: %s\n",
2536 ice_stat_str(status
));
2537 goto clear_xdp_rings
;
2540 /* assign the prog only when it's not already present on VSI;
2541 * this flow is a subject of both ethtool -L and ndo_bpf flows;
2542 * VSI rebuild that happens under ethtool -L can expose us to
2543 * the bpf_prog refcount issues as we would be swapping same
2544 * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put
2545 * on it as it would be treated as an 'old_prog'; for ndo_bpf
2546 * this is not harmful as dev_xdp_install bumps the refcount
2547 * before calling the op exposed by the driver;
2549 if (!ice_is_xdp_ena_vsi(vsi
))
2550 ice_vsi_assign_bpf_prog(vsi
, prog
);
2554 for (i
= 0; i
< vsi
->num_xdp_txq
; i
++)
2555 if (vsi
->xdp_rings
[i
]) {
2556 kfree_rcu(vsi
->xdp_rings
[i
], rcu
);
2557 vsi
->xdp_rings
[i
] = NULL
;
2561 mutex_lock(&pf
->avail_q_mutex
);
2562 for (i
= 0; i
< vsi
->num_xdp_txq
; i
++) {
2563 clear_bit(vsi
->txq_map
[i
+ vsi
->alloc_txq
], pf
->avail_txqs
);
2564 vsi
->txq_map
[i
+ vsi
->alloc_txq
] = ICE_INVAL_Q_INDEX
;
2566 mutex_unlock(&pf
->avail_q_mutex
);
2568 devm_kfree(dev
, vsi
->xdp_rings
);
2573 * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings
2574 * @vsi: VSI to remove XDP rings
2576 * Detach XDP rings from irq vectors, clean up the PF bitmap and free
2579 int ice_destroy_xdp_rings(struct ice_vsi
*vsi
)
2581 u16 max_txqs
[ICE_MAX_TRAFFIC_CLASS
] = { 0 };
2582 struct ice_pf
*pf
= vsi
->back
;
2585 /* q_vectors are freed in reset path so there's no point in detaching
2586 * rings; in case of rebuild being triggered not from reset bits
2587 * in pf->state won't be set, so additionally check first q_vector
2590 if (ice_is_reset_in_progress(pf
->state
) || !vsi
->q_vectors
[0])
2593 ice_for_each_q_vector(vsi
, v_idx
) {
2594 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[v_idx
];
2595 struct ice_ring
*ring
;
2597 ice_for_each_ring(ring
, q_vector
->tx
)
2598 if (!ring
->tx_buf
|| !ice_ring_is_xdp(ring
))
2601 /* restore the value of last node prior to XDP setup */
2602 q_vector
->tx
.ring
= ring
;
2606 mutex_lock(&pf
->avail_q_mutex
);
2607 for (i
= 0; i
< vsi
->num_xdp_txq
; i
++) {
2608 clear_bit(vsi
->txq_map
[i
+ vsi
->alloc_txq
], pf
->avail_txqs
);
2609 vsi
->txq_map
[i
+ vsi
->alloc_txq
] = ICE_INVAL_Q_INDEX
;
2611 mutex_unlock(&pf
->avail_q_mutex
);
2613 for (i
= 0; i
< vsi
->num_xdp_txq
; i
++)
2614 if (vsi
->xdp_rings
[i
]) {
2615 if (vsi
->xdp_rings
[i
]->desc
) {
2617 ice_free_tx_ring(vsi
->xdp_rings
[i
]);
2619 kfree_rcu(vsi
->xdp_rings
[i
], rcu
);
2620 vsi
->xdp_rings
[i
] = NULL
;
2623 devm_kfree(ice_pf_to_dev(pf
), vsi
->xdp_rings
);
2624 vsi
->xdp_rings
= NULL
;
2626 if (ice_is_reset_in_progress(pf
->state
) || !vsi
->q_vectors
[0])
2629 ice_vsi_assign_bpf_prog(vsi
, NULL
);
2631 /* notify Tx scheduler that we destroyed XDP queues and bring
2632 * back the old number of child nodes
2634 for (i
= 0; i
< vsi
->tc_cfg
.numtc
; i
++)
2635 max_txqs
[i
] = vsi
->num_txq
;
2637 /* change number of XDP Tx queues to 0 */
2638 vsi
->num_xdp_txq
= 0;
2640 return ice_cfg_vsi_lan(vsi
->port_info
, vsi
->idx
, vsi
->tc_cfg
.ena_tc
,
2645 * ice_vsi_rx_napi_schedule - Schedule napi on RX queues from VSI
2646 * @vsi: VSI to schedule napi on
2648 static void ice_vsi_rx_napi_schedule(struct ice_vsi
*vsi
)
2652 ice_for_each_rxq(vsi
, i
) {
2653 struct ice_ring
*rx_ring
= vsi
->rx_rings
[i
];
2655 if (rx_ring
->xsk_pool
)
2656 napi_schedule(&rx_ring
->q_vector
->napi
);
2661 * ice_xdp_setup_prog - Add or remove XDP eBPF program
2662 * @vsi: VSI to setup XDP for
2663 * @prog: XDP program
2664 * @extack: netlink extended ack
2667 ice_xdp_setup_prog(struct ice_vsi
*vsi
, struct bpf_prog
*prog
,
2668 struct netlink_ext_ack
*extack
)
2670 int frame_size
= vsi
->netdev
->mtu
+ ICE_ETH_PKT_HDR_PAD
;
2671 bool if_running
= netif_running(vsi
->netdev
);
2672 int ret
= 0, xdp_ring_err
= 0;
2674 if (frame_size
> vsi
->rx_buf_len
) {
2675 NL_SET_ERR_MSG_MOD(extack
, "MTU too large for loading XDP");
2679 /* need to stop netdev while setting up the program for Rx rings */
2680 if (if_running
&& !test_and_set_bit(ICE_VSI_DOWN
, vsi
->state
)) {
2681 ret
= ice_down(vsi
);
2683 NL_SET_ERR_MSG_MOD(extack
, "Preparing device for XDP attach failed");
2688 if (!ice_is_xdp_ena_vsi(vsi
) && prog
) {
2689 vsi
->num_xdp_txq
= vsi
->alloc_rxq
;
2690 xdp_ring_err
= ice_prepare_xdp_rings(vsi
, prog
);
2692 NL_SET_ERR_MSG_MOD(extack
, "Setting up XDP Tx resources failed");
2693 } else if (ice_is_xdp_ena_vsi(vsi
) && !prog
) {
2694 xdp_ring_err
= ice_destroy_xdp_rings(vsi
);
2696 NL_SET_ERR_MSG_MOD(extack
, "Freeing XDP Tx resources failed");
2698 /* safe to call even when prog == vsi->xdp_prog as
2699 * dev_xdp_install in net/core/dev.c incremented prog's
2700 * refcount so corresponding bpf_prog_put won't cause
2703 ice_vsi_assign_bpf_prog(vsi
, prog
);
2710 ice_vsi_rx_napi_schedule(vsi
);
2712 return (ret
|| xdp_ring_err
) ? -ENOMEM
: 0;
2716 * ice_xdp_safe_mode - XDP handler for safe mode
2720 static int ice_xdp_safe_mode(struct net_device __always_unused
*dev
,
2721 struct netdev_bpf
*xdp
)
2723 NL_SET_ERR_MSG_MOD(xdp
->extack
,
2724 "Please provide working DDP firmware package in order to use XDP\n"
2725 "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
2730 * ice_xdp - implements XDP handler
2734 static int ice_xdp(struct net_device
*dev
, struct netdev_bpf
*xdp
)
2736 struct ice_netdev_priv
*np
= netdev_priv(dev
);
2737 struct ice_vsi
*vsi
= np
->vsi
;
2739 if (vsi
->type
!= ICE_VSI_PF
) {
2740 NL_SET_ERR_MSG_MOD(xdp
->extack
, "XDP can be loaded only on PF VSI");
2744 switch (xdp
->command
) {
2745 case XDP_SETUP_PROG
:
2746 return ice_xdp_setup_prog(vsi
, xdp
->prog
, xdp
->extack
);
2747 case XDP_SETUP_XSK_POOL
:
2748 return ice_xsk_pool_setup(vsi
, xdp
->xsk
.pool
,
2756 * ice_ena_misc_vector - enable the non-queue interrupts
2757 * @pf: board private structure
2759 static void ice_ena_misc_vector(struct ice_pf
*pf
)
2761 struct ice_hw
*hw
= &pf
->hw
;
2764 /* Disable anti-spoof detection interrupt to prevent spurious event
2765 * interrupts during a function reset. Anti-spoof functionally is
2768 val
= rd32(hw
, GL_MDCK_TX_TDPU
);
2769 val
|= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M
;
2770 wr32(hw
, GL_MDCK_TX_TDPU
, val
);
2772 /* clear things first */
2773 wr32(hw
, PFINT_OICR_ENA
, 0); /* disable all */
2774 rd32(hw
, PFINT_OICR
); /* read to clear */
2776 val
= (PFINT_OICR_ECC_ERR_M
|
2777 PFINT_OICR_MAL_DETECT_M
|
2779 PFINT_OICR_PCI_EXCEPTION_M
|
2781 PFINT_OICR_HMC_ERR_M
|
2782 PFINT_OICR_PE_PUSH_M
|
2783 PFINT_OICR_PE_CRITERR_M
);
2785 wr32(hw
, PFINT_OICR_ENA
, val
);
2787 /* SW_ITR_IDX = 0, but don't change INTENA */
2788 wr32(hw
, GLINT_DYN_CTL(pf
->oicr_idx
),
2789 GLINT_DYN_CTL_SW_ITR_INDX_M
| GLINT_DYN_CTL_INTENA_MSK_M
);
2793 * ice_misc_intr - misc interrupt handler
2794 * @irq: interrupt number
2795 * @data: pointer to a q_vector
2797 static irqreturn_t
ice_misc_intr(int __always_unused irq
, void *data
)
2799 struct ice_pf
*pf
= (struct ice_pf
*)data
;
2800 struct ice_hw
*hw
= &pf
->hw
;
2801 irqreturn_t ret
= IRQ_NONE
;
2805 dev
= ice_pf_to_dev(pf
);
2806 set_bit(ICE_ADMINQ_EVENT_PENDING
, pf
->state
);
2807 set_bit(ICE_MAILBOXQ_EVENT_PENDING
, pf
->state
);
2808 set_bit(ICE_SIDEBANDQ_EVENT_PENDING
, pf
->state
);
2810 oicr
= rd32(hw
, PFINT_OICR
);
2811 ena_mask
= rd32(hw
, PFINT_OICR_ENA
);
2813 if (oicr
& PFINT_OICR_SWINT_M
) {
2814 ena_mask
&= ~PFINT_OICR_SWINT_M
;
2818 if (oicr
& PFINT_OICR_MAL_DETECT_M
) {
2819 ena_mask
&= ~PFINT_OICR_MAL_DETECT_M
;
2820 set_bit(ICE_MDD_EVENT_PENDING
, pf
->state
);
2822 if (oicr
& PFINT_OICR_VFLR_M
) {
2823 /* disable any further VFLR event notifications */
2824 if (test_bit(ICE_VF_RESETS_DISABLED
, pf
->state
)) {
2825 u32 reg
= rd32(hw
, PFINT_OICR_ENA
);
2827 reg
&= ~PFINT_OICR_VFLR_M
;
2828 wr32(hw
, PFINT_OICR_ENA
, reg
);
2830 ena_mask
&= ~PFINT_OICR_VFLR_M
;
2831 set_bit(ICE_VFLR_EVENT_PENDING
, pf
->state
);
2835 if (oicr
& PFINT_OICR_GRST_M
) {
2838 /* we have a reset warning */
2839 ena_mask
&= ~PFINT_OICR_GRST_M
;
2840 reset
= (rd32(hw
, GLGEN_RSTAT
) & GLGEN_RSTAT_RESET_TYPE_M
) >>
2841 GLGEN_RSTAT_RESET_TYPE_S
;
2843 if (reset
== ICE_RESET_CORER
)
2845 else if (reset
== ICE_RESET_GLOBR
)
2847 else if (reset
== ICE_RESET_EMPR
)
2850 dev_dbg(dev
, "Invalid reset type %d\n", reset
);
2852 /* If a reset cycle isn't already in progress, we set a bit in
2853 * pf->state so that the service task can start a reset/rebuild.
2855 if (!test_and_set_bit(ICE_RESET_OICR_RECV
, pf
->state
)) {
2856 if (reset
== ICE_RESET_CORER
)
2857 set_bit(ICE_CORER_RECV
, pf
->state
);
2858 else if (reset
== ICE_RESET_GLOBR
)
2859 set_bit(ICE_GLOBR_RECV
, pf
->state
);
2861 set_bit(ICE_EMPR_RECV
, pf
->state
);
2863 /* There are couple of different bits at play here.
2864 * hw->reset_ongoing indicates whether the hardware is
2865 * in reset. This is set to true when a reset interrupt
2866 * is received and set back to false after the driver
2867 * has determined that the hardware is out of reset.
2869 * ICE_RESET_OICR_RECV in pf->state indicates
2870 * that a post reset rebuild is required before the
2871 * driver is operational again. This is set above.
2873 * As this is the start of the reset/rebuild cycle, set
2874 * both to indicate that.
2876 hw
->reset_ongoing
= true;
2880 if (oicr
& PFINT_OICR_TSYN_TX_M
) {
2881 ena_mask
&= ~PFINT_OICR_TSYN_TX_M
;
2882 ice_ptp_process_ts(pf
);
2885 if (oicr
& PFINT_OICR_TSYN_EVNT_M
) {
2886 u8 tmr_idx
= hw
->func_caps
.ts_func_info
.tmr_index_owned
;
2887 u32 gltsyn_stat
= rd32(hw
, GLTSYN_STAT(tmr_idx
));
2889 /* Save EVENTs from GTSYN register */
2890 pf
->ptp
.ext_ts_irq
|= gltsyn_stat
& (GLTSYN_STAT_EVENT0_M
|
2891 GLTSYN_STAT_EVENT1_M
|
2892 GLTSYN_STAT_EVENT2_M
);
2893 ena_mask
&= ~PFINT_OICR_TSYN_EVNT_M
;
2894 kthread_queue_work(pf
->ptp
.kworker
, &pf
->ptp
.extts_work
);
2897 #define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
2898 if (oicr
& ICE_AUX_CRIT_ERR
) {
2899 pf
->oicr_err_reg
|= oicr
;
2900 set_bit(ICE_AUX_ERR_PENDING
, pf
->state
);
2901 ena_mask
&= ~ICE_AUX_CRIT_ERR
;
2904 /* Report any remaining unexpected interrupts */
2907 dev_dbg(dev
, "unhandled interrupt oicr=0x%08x\n", oicr
);
2908 /* If a critical error is pending there is no choice but to
2911 if (oicr
& (PFINT_OICR_PCI_EXCEPTION_M
|
2912 PFINT_OICR_ECC_ERR_M
)) {
2913 set_bit(ICE_PFR_REQ
, pf
->state
);
2914 ice_service_task_schedule(pf
);
2919 ice_service_task_schedule(pf
);
2920 ice_irq_dynamic_ena(hw
, NULL
, NULL
);
2926 * ice_dis_ctrlq_interrupts - disable control queue interrupts
2927 * @hw: pointer to HW structure
2929 static void ice_dis_ctrlq_interrupts(struct ice_hw
*hw
)
2931 /* disable Admin queue Interrupt causes */
2932 wr32(hw
, PFINT_FW_CTL
,
2933 rd32(hw
, PFINT_FW_CTL
) & ~PFINT_FW_CTL_CAUSE_ENA_M
);
2935 /* disable Mailbox queue Interrupt causes */
2936 wr32(hw
, PFINT_MBX_CTL
,
2937 rd32(hw
, PFINT_MBX_CTL
) & ~PFINT_MBX_CTL_CAUSE_ENA_M
);
2939 wr32(hw
, PFINT_SB_CTL
,
2940 rd32(hw
, PFINT_SB_CTL
) & ~PFINT_SB_CTL_CAUSE_ENA_M
);
2942 /* disable Control queue Interrupt causes */
2943 wr32(hw
, PFINT_OICR_CTL
,
2944 rd32(hw
, PFINT_OICR_CTL
) & ~PFINT_OICR_CTL_CAUSE_ENA_M
);
2950 * ice_free_irq_msix_misc - Unroll misc vector setup
2951 * @pf: board private structure
2953 static void ice_free_irq_msix_misc(struct ice_pf
*pf
)
2955 struct ice_hw
*hw
= &pf
->hw
;
2957 ice_dis_ctrlq_interrupts(hw
);
2959 /* disable OICR interrupt */
2960 wr32(hw
, PFINT_OICR_ENA
, 0);
2963 if (pf
->msix_entries
) {
2964 synchronize_irq(pf
->msix_entries
[pf
->oicr_idx
].vector
);
2965 devm_free_irq(ice_pf_to_dev(pf
),
2966 pf
->msix_entries
[pf
->oicr_idx
].vector
, pf
);
2969 pf
->num_avail_sw_msix
+= 1;
2970 ice_free_res(pf
->irq_tracker
, pf
->oicr_idx
, ICE_RES_MISC_VEC_ID
);
2974 * ice_ena_ctrlq_interrupts - enable control queue interrupts
2975 * @hw: pointer to HW structure
2976 * @reg_idx: HW vector index to associate the control queue interrupts with
2978 static void ice_ena_ctrlq_interrupts(struct ice_hw
*hw
, u16 reg_idx
)
2982 val
= ((reg_idx
& PFINT_OICR_CTL_MSIX_INDX_M
) |
2983 PFINT_OICR_CTL_CAUSE_ENA_M
);
2984 wr32(hw
, PFINT_OICR_CTL
, val
);
2986 /* enable Admin queue Interrupt causes */
2987 val
= ((reg_idx
& PFINT_FW_CTL_MSIX_INDX_M
) |
2988 PFINT_FW_CTL_CAUSE_ENA_M
);
2989 wr32(hw
, PFINT_FW_CTL
, val
);
2991 /* enable Mailbox queue Interrupt causes */
2992 val
= ((reg_idx
& PFINT_MBX_CTL_MSIX_INDX_M
) |
2993 PFINT_MBX_CTL_CAUSE_ENA_M
);
2994 wr32(hw
, PFINT_MBX_CTL
, val
);
2996 /* This enables Sideband queue Interrupt causes */
2997 val
= ((reg_idx
& PFINT_SB_CTL_MSIX_INDX_M
) |
2998 PFINT_SB_CTL_CAUSE_ENA_M
);
2999 wr32(hw
, PFINT_SB_CTL
, val
);
3005 * ice_req_irq_msix_misc - Setup the misc vector to handle non queue events
3006 * @pf: board private structure
3008 * This sets up the handler for MSIX 0, which is used to manage the
3009 * non-queue interrupts, e.g. AdminQ and errors. This is not used
3010 * when in MSI or Legacy interrupt mode.
3012 static int ice_req_irq_msix_misc(struct ice_pf
*pf
)
3014 struct device
*dev
= ice_pf_to_dev(pf
);
3015 struct ice_hw
*hw
= &pf
->hw
;
3016 int oicr_idx
, err
= 0;
3018 if (!pf
->int_name
[0])
3019 snprintf(pf
->int_name
, sizeof(pf
->int_name
) - 1, "%s-%s:misc",
3020 dev_driver_string(dev
), dev_name(dev
));
3022 /* Do not request IRQ but do enable OICR interrupt since settings are
3023 * lost during reset. Note that this function is called only during
3024 * rebuild path and not while reset is in progress.
3026 if (ice_is_reset_in_progress(pf
->state
))
3029 /* reserve one vector in irq_tracker for misc interrupts */
3030 oicr_idx
= ice_get_res(pf
, pf
->irq_tracker
, 1, ICE_RES_MISC_VEC_ID
);
3034 pf
->num_avail_sw_msix
-= 1;
3035 pf
->oicr_idx
= (u16
)oicr_idx
;
3037 err
= devm_request_irq(dev
, pf
->msix_entries
[pf
->oicr_idx
].vector
,
3038 ice_misc_intr
, 0, pf
->int_name
, pf
);
3040 dev_err(dev
, "devm_request_irq for %s failed: %d\n",
3042 ice_free_res(pf
->irq_tracker
, 1, ICE_RES_MISC_VEC_ID
);
3043 pf
->num_avail_sw_msix
+= 1;
3048 ice_ena_misc_vector(pf
);
3050 ice_ena_ctrlq_interrupts(hw
, pf
->oicr_idx
);
3051 wr32(hw
, GLINT_ITR(ICE_RX_ITR
, pf
->oicr_idx
),
3052 ITR_REG_ALIGN(ICE_ITR_8K
) >> ICE_ITR_GRAN_S
);
3055 ice_irq_dynamic_ena(hw
, NULL
, NULL
);
3061 * ice_napi_add - register NAPI handler for the VSI
3062 * @vsi: VSI for which NAPI handler is to be registered
3064 * This function is only called in the driver's load path. Registering the NAPI
3065 * handler is done in ice_vsi_alloc_q_vector() for all other cases (i.e. resume,
3066 * reset/rebuild, etc.)
3068 static void ice_napi_add(struct ice_vsi
*vsi
)
3075 ice_for_each_q_vector(vsi
, v_idx
)
3076 netif_napi_add(vsi
->netdev
, &vsi
->q_vectors
[v_idx
]->napi
,
3077 ice_napi_poll
, NAPI_POLL_WEIGHT
);
3081 * ice_set_ops - set netdev and ethtools ops for the given netdev
3082 * @netdev: netdev instance
3084 static void ice_set_ops(struct net_device
*netdev
)
3086 struct ice_pf
*pf
= ice_netdev_to_pf(netdev
);
3088 if (ice_is_safe_mode(pf
)) {
3089 netdev
->netdev_ops
= &ice_netdev_safe_mode_ops
;
3090 ice_set_ethtool_safe_mode_ops(netdev
);
3094 netdev
->netdev_ops
= &ice_netdev_ops
;
3095 netdev
->udp_tunnel_nic_info
= &pf
->hw
.udp_tunnel_nic
;
3096 ice_set_ethtool_ops(netdev
);
3100 * ice_set_netdev_features - set features for the given netdev
3101 * @netdev: netdev instance
3103 static void ice_set_netdev_features(struct net_device
*netdev
)
3105 struct ice_pf
*pf
= ice_netdev_to_pf(netdev
);
3106 netdev_features_t csumo_features
;
3107 netdev_features_t vlano_features
;
3108 netdev_features_t dflt_features
;
3109 netdev_features_t tso_features
;
3111 if (ice_is_safe_mode(pf
)) {
3113 netdev
->features
= NETIF_F_SG
| NETIF_F_HIGHDMA
;
3114 netdev
->hw_features
= netdev
->features
;
3118 dflt_features
= NETIF_F_SG
|
3123 csumo_features
= NETIF_F_RXCSUM
|
3128 vlano_features
= NETIF_F_HW_VLAN_CTAG_FILTER
|
3129 NETIF_F_HW_VLAN_CTAG_TX
|
3130 NETIF_F_HW_VLAN_CTAG_RX
;
3132 tso_features
= NETIF_F_TSO
|
3136 NETIF_F_GSO_UDP_TUNNEL
|
3137 NETIF_F_GSO_GRE_CSUM
|
3138 NETIF_F_GSO_UDP_TUNNEL_CSUM
|
3139 NETIF_F_GSO_PARTIAL
|
3140 NETIF_F_GSO_IPXIP4
|
3141 NETIF_F_GSO_IPXIP6
|
3144 netdev
->gso_partial_features
|= NETIF_F_GSO_UDP_TUNNEL_CSUM
|
3145 NETIF_F_GSO_GRE_CSUM
;
3146 /* set features that user can change */
3147 netdev
->hw_features
= dflt_features
| csumo_features
|
3148 vlano_features
| tso_features
;
3150 /* add support for HW_CSUM on packets with MPLS header */
3151 netdev
->mpls_features
= NETIF_F_HW_CSUM
;
3153 /* enable features */
3154 netdev
->features
|= netdev
->hw_features
;
3155 /* encap and VLAN devices inherit default, csumo and tso features */
3156 netdev
->hw_enc_features
|= dflt_features
| csumo_features
|
3158 netdev
->vlan_features
|= dflt_features
| csumo_features
|
3163 * ice_cfg_netdev - Allocate, configure and register a netdev
3164 * @vsi: the VSI associated with the new netdev
3166 * Returns 0 on success, negative value on failure
3168 static int ice_cfg_netdev(struct ice_vsi
*vsi
)
3170 struct ice_netdev_priv
*np
;
3171 struct net_device
*netdev
;
3172 u8 mac_addr
[ETH_ALEN
];
3174 netdev
= alloc_etherdev_mqs(sizeof(*np
), vsi
->alloc_txq
,
3179 set_bit(ICE_VSI_NETDEV_ALLOCD
, vsi
->state
);
3180 vsi
->netdev
= netdev
;
3181 np
= netdev_priv(netdev
);
3184 ice_set_netdev_features(netdev
);
3186 ice_set_ops(netdev
);
3188 if (vsi
->type
== ICE_VSI_PF
) {
3189 SET_NETDEV_DEV(netdev
, ice_pf_to_dev(vsi
->back
));
3190 ether_addr_copy(mac_addr
, vsi
->port_info
->mac
.perm_addr
);
3191 ether_addr_copy(netdev
->dev_addr
, mac_addr
);
3192 ether_addr_copy(netdev
->perm_addr
, mac_addr
);
3195 netdev
->priv_flags
|= IFF_UNICAST_FLT
;
3197 /* Setup netdev TC information */
3198 ice_vsi_cfg_netdev_tc(vsi
, vsi
->tc_cfg
.ena_tc
);
3200 /* setup watchdog timeout value to be 5 second */
3201 netdev
->watchdog_timeo
= 5 * HZ
;
3203 netdev
->min_mtu
= ETH_MIN_MTU
;
3204 netdev
->max_mtu
= ICE_MAX_MTU
;
3210 * ice_fill_rss_lut - Fill the RSS lookup table with default values
3211 * @lut: Lookup table
3212 * @rss_table_size: Lookup table size
3213 * @rss_size: Range of queue number for hashing
3215 void ice_fill_rss_lut(u8
*lut
, u16 rss_table_size
, u16 rss_size
)
3219 for (i
= 0; i
< rss_table_size
; i
++)
3220 lut
[i
] = i
% rss_size
;
3224 * ice_pf_vsi_setup - Set up a PF VSI
3225 * @pf: board private structure
3226 * @pi: pointer to the port_info instance
3228 * Returns pointer to the successfully allocated VSI software struct
3229 * on success, otherwise returns NULL on failure.
3231 static struct ice_vsi
*
3232 ice_pf_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3234 return ice_vsi_setup(pf
, pi
, ICE_VSI_PF
, ICE_INVAL_VFID
);
3238 * ice_ctrl_vsi_setup - Set up a control VSI
3239 * @pf: board private structure
3240 * @pi: pointer to the port_info instance
3242 * Returns pointer to the successfully allocated VSI software struct
3243 * on success, otherwise returns NULL on failure.
3245 static struct ice_vsi
*
3246 ice_ctrl_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3248 return ice_vsi_setup(pf
, pi
, ICE_VSI_CTRL
, ICE_INVAL_VFID
);
3252 * ice_lb_vsi_setup - Set up a loopback VSI
3253 * @pf: board private structure
3254 * @pi: pointer to the port_info instance
3256 * Returns pointer to the successfully allocated VSI software struct
3257 * on success, otherwise returns NULL on failure.
3260 ice_lb_vsi_setup(struct ice_pf
*pf
, struct ice_port_info
*pi
)
3262 return ice_vsi_setup(pf
, pi
, ICE_VSI_LB
, ICE_INVAL_VFID
);
3266 * ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
3267 * @netdev: network interface to be adjusted
3268 * @proto: unused protocol
3269 * @vid: VLAN ID to be added
3271 * net_device_ops implementation for adding VLAN IDs
3274 ice_vlan_rx_add_vid(struct net_device
*netdev
, __always_unused __be16 proto
,
3277 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
3278 struct ice_vsi
*vsi
= np
->vsi
;
3281 /* VLAN 0 is added by default during load/reset */
3285 /* Enable VLAN pruning when a VLAN other than 0 is added */
3286 if (!ice_vsi_is_vlan_pruning_ena(vsi
)) {
3287 ret
= ice_cfg_vlan_pruning(vsi
, true, false);
3292 /* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
3293 * packets aren't pruned by the device's internal switch on Rx
3295 ret
= ice_vsi_add_vlan(vsi
, vid
, ICE_FWD_TO_VSI
);
3297 set_bit(ICE_VSI_VLAN_FLTR_CHANGED
, vsi
->state
);
3303 * ice_vlan_rx_kill_vid - Remove a VLAN ID filter from HW offload
3304 * @netdev: network interface to be adjusted
3305 * @proto: unused protocol
3306 * @vid: VLAN ID to be removed
3308 * net_device_ops implementation for removing VLAN IDs
3311 ice_vlan_rx_kill_vid(struct net_device
*netdev
, __always_unused __be16 proto
,
3314 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
3315 struct ice_vsi
*vsi
= np
->vsi
;
3318 /* don't allow removal of VLAN 0 */
3322 /* Make sure ice_vsi_kill_vlan is successful before updating VLAN
3325 ret
= ice_vsi_kill_vlan(vsi
, vid
);
3329 /* Disable pruning when VLAN 0 is the only VLAN rule */
3330 if (vsi
->num_vlan
== 1 && ice_vsi_is_vlan_pruning_ena(vsi
))
3331 ret
= ice_cfg_vlan_pruning(vsi
, false, false);
3333 set_bit(ICE_VSI_VLAN_FLTR_CHANGED
, vsi
->state
);
3338 * ice_setup_pf_sw - Setup the HW switch on startup or after reset
3339 * @pf: board private structure
3341 * Returns 0 on success, negative value on failure
3343 static int ice_setup_pf_sw(struct ice_pf
*pf
)
3345 struct ice_vsi
*vsi
;
3348 if (ice_is_reset_in_progress(pf
->state
))
3351 vsi
= ice_pf_vsi_setup(pf
, pf
->hw
.port_info
);
3355 status
= ice_cfg_netdev(vsi
);
3358 goto unroll_vsi_setup
;
3360 /* netdev has to be configured before setting frame size */
3361 ice_vsi_cfg_frame_size(vsi
);
3363 /* Setup DCB netlink interface */
3364 ice_dcbnl_setup(vsi
);
3366 /* registering the NAPI handler requires both the queues and
3367 * netdev to be created, which are done in ice_pf_vsi_setup()
3368 * and ice_cfg_netdev() respectively
3372 status
= ice_set_cpu_rx_rmap(vsi
);
3374 dev_err(ice_pf_to_dev(pf
), "Failed to set CPU Rx map VSI %d error %d\n",
3375 vsi
->vsi_num
, status
);
3377 goto unroll_napi_add
;
3379 status
= ice_init_mac_fltr(pf
);
3381 goto free_cpu_rx_map
;
3386 ice_free_cpu_rx_rmap(vsi
);
3392 clear_bit(ICE_VSI_NETDEV_ALLOCD
, vsi
->state
);
3393 free_netdev(vsi
->netdev
);
3399 ice_vsi_release(vsi
);
3404 * ice_get_avail_q_count - Get count of queues in use
3405 * @pf_qmap: bitmap to get queue use count from
3406 * @lock: pointer to a mutex that protects access to pf_qmap
3407 * @size: size of the bitmap
3410 ice_get_avail_q_count(unsigned long *pf_qmap
, struct mutex
*lock
, u16 size
)
3416 for_each_clear_bit(bit
, pf_qmap
, size
)
3424 * ice_get_avail_txq_count - Get count of Tx queues in use
3425 * @pf: pointer to an ice_pf instance
3427 u16
ice_get_avail_txq_count(struct ice_pf
*pf
)
3429 return ice_get_avail_q_count(pf
->avail_txqs
, &pf
->avail_q_mutex
,
3434 * ice_get_avail_rxq_count - Get count of Rx queues in use
3435 * @pf: pointer to an ice_pf instance
3437 u16
ice_get_avail_rxq_count(struct ice_pf
*pf
)
3439 return ice_get_avail_q_count(pf
->avail_rxqs
, &pf
->avail_q_mutex
,
3444 * ice_deinit_pf - Unrolls initialziations done by ice_init_pf
3445 * @pf: board private structure to initialize
3447 static void ice_deinit_pf(struct ice_pf
*pf
)
3449 ice_service_task_stop(pf
);
3450 mutex_destroy(&pf
->adev_mutex
);
3451 mutex_destroy(&pf
->sw_mutex
);
3452 mutex_destroy(&pf
->tc_mutex
);
3453 mutex_destroy(&pf
->avail_q_mutex
);
3455 if (pf
->avail_txqs
) {
3456 bitmap_free(pf
->avail_txqs
);
3457 pf
->avail_txqs
= NULL
;
3460 if (pf
->avail_rxqs
) {
3461 bitmap_free(pf
->avail_rxqs
);
3462 pf
->avail_rxqs
= NULL
;
3466 ptp_clock_unregister(pf
->ptp
.clock
);
3470 * ice_set_pf_caps - set PFs capability flags
3471 * @pf: pointer to the PF instance
3473 static void ice_set_pf_caps(struct ice_pf
*pf
)
3475 struct ice_hw_func_caps
*func_caps
= &pf
->hw
.func_caps
;
3477 clear_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
);
3478 clear_bit(ICE_FLAG_AUX_ENA
, pf
->flags
);
3479 if (func_caps
->common_cap
.rdma
) {
3480 set_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
);
3481 set_bit(ICE_FLAG_AUX_ENA
, pf
->flags
);
3483 clear_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
3484 if (func_caps
->common_cap
.dcb
)
3485 set_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
3486 clear_bit(ICE_FLAG_SRIOV_CAPABLE
, pf
->flags
);
3487 if (func_caps
->common_cap
.sr_iov_1_1
) {
3488 set_bit(ICE_FLAG_SRIOV_CAPABLE
, pf
->flags
);
3489 pf
->num_vfs_supported
= min_t(int, func_caps
->num_allocd_vfs
,
3492 clear_bit(ICE_FLAG_RSS_ENA
, pf
->flags
);
3493 if (func_caps
->common_cap
.rss_table_size
)
3494 set_bit(ICE_FLAG_RSS_ENA
, pf
->flags
);
3496 clear_bit(ICE_FLAG_FD_ENA
, pf
->flags
);
3497 if (func_caps
->fd_fltr_guar
> 0 || func_caps
->fd_fltr_best_effort
> 0) {
3500 /* ctrl_vsi_idx will be set to a valid value when flow director
3501 * is setup by ice_init_fdir
3503 pf
->ctrl_vsi_idx
= ICE_NO_VSI
;
3504 set_bit(ICE_FLAG_FD_ENA
, pf
->flags
);
3505 /* force guaranteed filter pool for PF */
3506 ice_alloc_fd_guar_item(&pf
->hw
, &unused
,
3507 func_caps
->fd_fltr_guar
);
3508 /* force shared filter pool for PF */
3509 ice_alloc_fd_shrd_item(&pf
->hw
, &unused
,
3510 func_caps
->fd_fltr_best_effort
);
3513 clear_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
);
3514 if (func_caps
->common_cap
.ieee_1588
)
3515 set_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
);
3517 pf
->max_pf_txqs
= func_caps
->common_cap
.num_txq
;
3518 pf
->max_pf_rxqs
= func_caps
->common_cap
.num_rxq
;
3522 * ice_init_pf - Initialize general software structures (struct ice_pf)
3523 * @pf: board private structure to initialize
3525 static int ice_init_pf(struct ice_pf
*pf
)
3527 ice_set_pf_caps(pf
);
3529 mutex_init(&pf
->sw_mutex
);
3530 mutex_init(&pf
->tc_mutex
);
3531 mutex_init(&pf
->adev_mutex
);
3533 INIT_HLIST_HEAD(&pf
->aq_wait_list
);
3534 spin_lock_init(&pf
->aq_wait_lock
);
3535 init_waitqueue_head(&pf
->aq_wait_queue
);
3537 init_waitqueue_head(&pf
->reset_wait_queue
);
3539 /* setup service timer and periodic service task */
3540 timer_setup(&pf
->serv_tmr
, ice_service_timer
, 0);
3541 pf
->serv_tmr_period
= HZ
;
3542 INIT_WORK(&pf
->serv_task
, ice_service_task
);
3543 clear_bit(ICE_SERVICE_SCHED
, pf
->state
);
3545 mutex_init(&pf
->avail_q_mutex
);
3546 pf
->avail_txqs
= bitmap_zalloc(pf
->max_pf_txqs
, GFP_KERNEL
);
3547 if (!pf
->avail_txqs
)
3550 pf
->avail_rxqs
= bitmap_zalloc(pf
->max_pf_rxqs
, GFP_KERNEL
);
3551 if (!pf
->avail_rxqs
) {
3552 devm_kfree(ice_pf_to_dev(pf
), pf
->avail_txqs
);
3553 pf
->avail_txqs
= NULL
;
3561 * ice_ena_msix_range - Request a range of MSIX vectors from the OS
3562 * @pf: board private structure
3564 * compute the number of MSIX vectors required (v_budget) and request from
3565 * the OS. Return the number of vectors reserved or negative on failure
3567 static int ice_ena_msix_range(struct ice_pf
*pf
)
3569 int num_cpus
, v_left
, v_actual
, v_other
, v_budget
= 0;
3570 struct device
*dev
= ice_pf_to_dev(pf
);
3573 v_left
= pf
->hw
.func_caps
.common_cap
.num_msix_vectors
;
3574 num_cpus
= num_online_cpus();
3576 /* reserve for LAN miscellaneous handler */
3577 needed
= ICE_MIN_LAN_OICR_MSIX
;
3578 if (v_left
< needed
)
3579 goto no_hw_vecs_left_err
;
3583 /* reserve for flow director */
3584 if (test_bit(ICE_FLAG_FD_ENA
, pf
->flags
)) {
3585 needed
= ICE_FDIR_MSIX
;
3586 if (v_left
< needed
)
3587 goto no_hw_vecs_left_err
;
3592 /* total used for non-traffic vectors */
3595 /* reserve vectors for LAN traffic */
3597 if (v_left
< needed
)
3598 goto no_hw_vecs_left_err
;
3599 pf
->num_lan_msix
= needed
;
3603 /* reserve vectors for RDMA auxiliary driver */
3604 if (test_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
)) {
3605 needed
= num_cpus
+ ICE_RDMA_NUM_AEQ_MSIX
;
3606 if (v_left
< needed
)
3607 goto no_hw_vecs_left_err
;
3608 pf
->num_rdma_msix
= needed
;
3613 pf
->msix_entries
= devm_kcalloc(dev
, v_budget
,
3614 sizeof(*pf
->msix_entries
), GFP_KERNEL
);
3615 if (!pf
->msix_entries
) {
3620 for (i
= 0; i
< v_budget
; i
++)
3621 pf
->msix_entries
[i
].entry
= i
;
3623 /* actually reserve the vectors */
3624 v_actual
= pci_enable_msix_range(pf
->pdev
, pf
->msix_entries
,
3625 ICE_MIN_MSIX
, v_budget
);
3627 dev_err(dev
, "unable to reserve MSI-X vectors\n");
3632 if (v_actual
< v_budget
) {
3633 dev_warn(dev
, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
3634 v_budget
, v_actual
);
3636 if (v_actual
< ICE_MIN_MSIX
) {
3637 /* error if we can't get minimum vectors */
3638 pci_disable_msix(pf
->pdev
);
3642 int v_remain
= v_actual
- v_other
;
3643 int v_rdma
= 0, v_min_rdma
= 0;
3645 if (test_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
)) {
3646 /* Need at least 1 interrupt in addition to
3649 v_rdma
= ICE_RDMA_NUM_AEQ_MSIX
+ 1;
3650 v_min_rdma
= ICE_MIN_RDMA_MSIX
;
3653 if (v_actual
== ICE_MIN_MSIX
||
3654 v_remain
< ICE_MIN_LAN_TXRX_MSIX
+ v_min_rdma
) {
3655 dev_warn(dev
, "Not enough MSI-X vectors to support RDMA.\n");
3656 clear_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
);
3658 pf
->num_rdma_msix
= 0;
3659 pf
->num_lan_msix
= ICE_MIN_LAN_TXRX_MSIX
;
3660 } else if ((v_remain
< ICE_MIN_LAN_TXRX_MSIX
+ v_rdma
) ||
3661 (v_remain
- v_rdma
< v_rdma
)) {
3662 /* Support minimum RDMA and give remaining
3663 * vectors to LAN MSIX
3665 pf
->num_rdma_msix
= v_min_rdma
;
3666 pf
->num_lan_msix
= v_remain
- v_min_rdma
;
3668 /* Split remaining MSIX with RDMA after
3669 * accounting for AEQ MSIX
3671 pf
->num_rdma_msix
= (v_remain
- ICE_RDMA_NUM_AEQ_MSIX
) / 2 +
3672 ICE_RDMA_NUM_AEQ_MSIX
;
3673 pf
->num_lan_msix
= v_remain
- pf
->num_rdma_msix
;
3676 dev_notice(dev
, "Enabled %d MSI-X vectors for LAN traffic.\n",
3679 if (test_bit(ICE_FLAG_RDMA_ENA
, pf
->flags
))
3680 dev_notice(dev
, "Enabled %d MSI-X vectors for RDMA.\n",
3688 devm_kfree(dev
, pf
->msix_entries
);
3691 no_hw_vecs_left_err
:
3692 dev_err(dev
, "not enough device MSI-X vectors. requested = %d, available = %d\n",
3696 pf
->num_rdma_msix
= 0;
3697 pf
->num_lan_msix
= 0;
3702 * ice_dis_msix - Disable MSI-X interrupt setup in OS
3703 * @pf: board private structure
3705 static void ice_dis_msix(struct ice_pf
*pf
)
3707 pci_disable_msix(pf
->pdev
);
3708 devm_kfree(ice_pf_to_dev(pf
), pf
->msix_entries
);
3709 pf
->msix_entries
= NULL
;
3713 * ice_clear_interrupt_scheme - Undo things done by ice_init_interrupt_scheme
3714 * @pf: board private structure
3716 static void ice_clear_interrupt_scheme(struct ice_pf
*pf
)
3720 if (pf
->irq_tracker
) {
3721 devm_kfree(ice_pf_to_dev(pf
), pf
->irq_tracker
);
3722 pf
->irq_tracker
= NULL
;
3727 * ice_init_interrupt_scheme - Determine proper interrupt scheme
3728 * @pf: board private structure to initialize
3730 static int ice_init_interrupt_scheme(struct ice_pf
*pf
)
3734 vectors
= ice_ena_msix_range(pf
);
3739 /* set up vector assignment tracking */
3740 pf
->irq_tracker
= devm_kzalloc(ice_pf_to_dev(pf
),
3741 struct_size(pf
->irq_tracker
, list
, vectors
),
3743 if (!pf
->irq_tracker
) {
3748 /* populate SW interrupts pool with number of OS granted IRQs. */
3749 pf
->num_avail_sw_msix
= (u16
)vectors
;
3750 pf
->irq_tracker
->num_entries
= (u16
)vectors
;
3751 pf
->irq_tracker
->end
= pf
->irq_tracker
->num_entries
;
3757 * ice_is_wol_supported - check if WoL is supported
3758 * @hw: pointer to hardware info
3760 * Check if WoL is supported based on the HW configuration.
3761 * Returns true if NVM supports and enables WoL for this port, false otherwise
3763 bool ice_is_wol_supported(struct ice_hw
*hw
)
3767 /* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
3768 * word) indicates WoL is not supported on the corresponding PF ID.
3770 if (ice_read_sr_word(hw
, ICE_SR_NVM_WOL_CFG
, &wol_ctrl
))
3773 return !(BIT(hw
->port_info
->lport
) & wol_ctrl
);
3777 * ice_vsi_recfg_qs - Change the number of queues on a VSI
3778 * @vsi: VSI being changed
3779 * @new_rx: new number of Rx queues
3780 * @new_tx: new number of Tx queues
3782 * Only change the number of queues if new_tx, or new_rx is non-0.
3784 * Returns 0 on success.
3786 int ice_vsi_recfg_qs(struct ice_vsi
*vsi
, int new_rx
, int new_tx
)
3788 struct ice_pf
*pf
= vsi
->back
;
3789 int err
= 0, timeout
= 50;
3791 if (!new_rx
&& !new_tx
)
3794 while (test_and_set_bit(ICE_CFG_BUSY
, pf
->state
)) {
3798 usleep_range(1000, 2000);
3802 vsi
->req_txq
= (u16
)new_tx
;
3804 vsi
->req_rxq
= (u16
)new_rx
;
3806 /* set for the next time the netdev is started */
3807 if (!netif_running(vsi
->netdev
)) {
3808 ice_vsi_rebuild(vsi
, false);
3809 dev_dbg(ice_pf_to_dev(pf
), "Link is down, queue count change happens when link is brought up\n");
3814 ice_vsi_rebuild(vsi
, false);
3815 ice_pf_dcb_recfg(pf
);
3818 clear_bit(ICE_CFG_BUSY
, pf
->state
);
3823 * ice_set_safe_mode_vlan_cfg - configure PF VSI to allow all VLANs in safe mode
3824 * @pf: PF to configure
3826 * No VLAN offloads/filtering are advertised in safe mode so make sure the PF
3827 * VSI can still Tx/Rx VLAN tagged packets.
3829 static void ice_set_safe_mode_vlan_cfg(struct ice_pf
*pf
)
3831 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
3832 struct ice_vsi_ctx
*ctxt
;
3833 enum ice_status status
;
3839 ctxt
= kzalloc(sizeof(*ctxt
), GFP_KERNEL
);
3844 ctxt
->info
= vsi
->info
;
3846 ctxt
->info
.valid_sections
=
3847 cpu_to_le16(ICE_AQ_VSI_PROP_VLAN_VALID
|
3848 ICE_AQ_VSI_PROP_SECURITY_VALID
|
3849 ICE_AQ_VSI_PROP_SW_VALID
);
3851 /* disable VLAN anti-spoof */
3852 ctxt
->info
.sec_flags
&= ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA
<<
3853 ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S
);
3855 /* disable VLAN pruning and keep all other settings */
3856 ctxt
->info
.sw_flags2
&= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA
;
3858 /* allow all VLANs on Tx and don't strip on Rx */
3859 ctxt
->info
.vlan_flags
= ICE_AQ_VSI_VLAN_MODE_ALL
|
3860 ICE_AQ_VSI_VLAN_EMOD_NOTHING
;
3862 status
= ice_update_vsi(hw
, vsi
->idx
, ctxt
, NULL
);
3864 dev_err(ice_pf_to_dev(vsi
->back
), "Failed to update VSI for safe mode VLANs, err %s aq_err %s\n",
3865 ice_stat_str(status
),
3866 ice_aq_str(hw
->adminq
.sq_last_status
));
3868 vsi
->info
.sec_flags
= ctxt
->info
.sec_flags
;
3869 vsi
->info
.sw_flags2
= ctxt
->info
.sw_flags2
;
3870 vsi
->info
.vlan_flags
= ctxt
->info
.vlan_flags
;
3877 * ice_log_pkg_init - log result of DDP package load
3878 * @hw: pointer to hardware info
3879 * @status: status of package load
3882 ice_log_pkg_init(struct ice_hw
*hw
, enum ice_status
*status
)
3884 struct ice_pf
*pf
= (struct ice_pf
*)hw
->back
;
3885 struct device
*dev
= ice_pf_to_dev(pf
);
3889 /* The package download AdminQ command returned success because
3890 * this download succeeded or ICE_ERR_AQ_NO_WORK since there is
3891 * already a package loaded on the device.
3893 if (hw
->pkg_ver
.major
== hw
->active_pkg_ver
.major
&&
3894 hw
->pkg_ver
.minor
== hw
->active_pkg_ver
.minor
&&
3895 hw
->pkg_ver
.update
== hw
->active_pkg_ver
.update
&&
3896 hw
->pkg_ver
.draft
== hw
->active_pkg_ver
.draft
&&
3897 !memcmp(hw
->pkg_name
, hw
->active_pkg_name
,
3898 sizeof(hw
->pkg_name
))) {
3899 if (hw
->pkg_dwnld_status
== ICE_AQ_RC_EEXIST
)
3900 dev_info(dev
, "DDP package already present on device: %s version %d.%d.%d.%d\n",
3901 hw
->active_pkg_name
,
3902 hw
->active_pkg_ver
.major
,
3903 hw
->active_pkg_ver
.minor
,
3904 hw
->active_pkg_ver
.update
,
3905 hw
->active_pkg_ver
.draft
);
3907 dev_info(dev
, "The DDP package was successfully loaded: %s version %d.%d.%d.%d\n",
3908 hw
->active_pkg_name
,
3909 hw
->active_pkg_ver
.major
,
3910 hw
->active_pkg_ver
.minor
,
3911 hw
->active_pkg_ver
.update
,
3912 hw
->active_pkg_ver
.draft
);
3913 } else if (hw
->active_pkg_ver
.major
!= ICE_PKG_SUPP_VER_MAJ
||
3914 hw
->active_pkg_ver
.minor
!= ICE_PKG_SUPP_VER_MNR
) {
3915 dev_err(dev
, "The device has a DDP package that is not supported by the driver. The device has package '%s' version %d.%d.x.x. The driver requires version %d.%d.x.x. Entering Safe Mode.\n",
3916 hw
->active_pkg_name
,
3917 hw
->active_pkg_ver
.major
,
3918 hw
->active_pkg_ver
.minor
,
3919 ICE_PKG_SUPP_VER_MAJ
, ICE_PKG_SUPP_VER_MNR
);
3920 *status
= ICE_ERR_NOT_SUPPORTED
;
3921 } else if (hw
->active_pkg_ver
.major
== ICE_PKG_SUPP_VER_MAJ
&&
3922 hw
->active_pkg_ver
.minor
== ICE_PKG_SUPP_VER_MNR
) {
3923 dev_info(dev
, "The driver could not load the DDP package file because a compatible DDP package is already present on the device. The device has package '%s' version %d.%d.%d.%d. The package file found by the driver: '%s' version %d.%d.%d.%d.\n",
3924 hw
->active_pkg_name
,
3925 hw
->active_pkg_ver
.major
,
3926 hw
->active_pkg_ver
.minor
,
3927 hw
->active_pkg_ver
.update
,
3928 hw
->active_pkg_ver
.draft
,
3935 dev_err(dev
, "An unknown error occurred when loading the DDP package, please reboot the system. If the problem persists, update the NVM. Entering Safe Mode.\n");
3936 *status
= ICE_ERR_NOT_SUPPORTED
;
3939 case ICE_ERR_FW_DDP_MISMATCH
:
3940 dev_err(dev
, "The firmware loaded on the device is not compatible with the DDP package. Please update the device's NVM. Entering safe mode.\n");
3942 case ICE_ERR_BUF_TOO_SHORT
:
3944 dev_err(dev
, "The DDP package file is invalid. Entering Safe Mode.\n");
3946 case ICE_ERR_NOT_SUPPORTED
:
3947 /* Package File version not supported */
3948 if (hw
->pkg_ver
.major
> ICE_PKG_SUPP_VER_MAJ
||
3949 (hw
->pkg_ver
.major
== ICE_PKG_SUPP_VER_MAJ
&&
3950 hw
->pkg_ver
.minor
> ICE_PKG_SUPP_VER_MNR
))
3951 dev_err(dev
, "The DDP package file version is higher than the driver supports. Please use an updated driver. Entering Safe Mode.\n");
3952 else if (hw
->pkg_ver
.major
< ICE_PKG_SUPP_VER_MAJ
||
3953 (hw
->pkg_ver
.major
== ICE_PKG_SUPP_VER_MAJ
&&
3954 hw
->pkg_ver
.minor
< ICE_PKG_SUPP_VER_MNR
))
3955 dev_err(dev
, "The DDP package file version is lower than the driver supports. The driver requires version %d.%d.x.x. Please use an updated DDP Package file. Entering Safe Mode.\n",
3956 ICE_PKG_SUPP_VER_MAJ
, ICE_PKG_SUPP_VER_MNR
);
3958 case ICE_ERR_AQ_ERROR
:
3959 switch (hw
->pkg_dwnld_status
) {
3960 case ICE_AQ_RC_ENOSEC
:
3961 case ICE_AQ_RC_EBADSIG
:
3962 dev_err(dev
, "The DDP package could not be loaded because its signature is not valid. Please use a valid DDP Package. Entering Safe Mode.\n");
3964 case ICE_AQ_RC_ESVN
:
3965 dev_err(dev
, "The DDP Package could not be loaded because its security revision is too low. Please use an updated DDP Package. Entering Safe Mode.\n");
3967 case ICE_AQ_RC_EBADMAN
:
3968 case ICE_AQ_RC_EBADBUF
:
3969 dev_err(dev
, "An error occurred on the device while loading the DDP package. The device will be reset.\n");
3970 /* poll for reset to complete */
3971 if (ice_check_reset(hw
))
3972 dev_err(dev
, "Error resetting device. Please reload the driver\n");
3979 dev_err(dev
, "An unknown error (%d) occurred when loading the DDP package. Entering Safe Mode.\n",
3986 * ice_load_pkg - load/reload the DDP Package file
3987 * @firmware: firmware structure when firmware requested or NULL for reload
3988 * @pf: pointer to the PF instance
3990 * Called on probe and post CORER/GLOBR rebuild to load DDP Package and
3991 * initialize HW tables.
3994 ice_load_pkg(const struct firmware
*firmware
, struct ice_pf
*pf
)
3996 enum ice_status status
= ICE_ERR_PARAM
;
3997 struct device
*dev
= ice_pf_to_dev(pf
);
3998 struct ice_hw
*hw
= &pf
->hw
;
4000 /* Load DDP Package */
4001 if (firmware
&& !hw
->pkg_copy
) {
4002 status
= ice_copy_and_init_pkg(hw
, firmware
->data
,
4004 ice_log_pkg_init(hw
, &status
);
4005 } else if (!firmware
&& hw
->pkg_copy
) {
4006 /* Reload package during rebuild after CORER/GLOBR reset */
4007 status
= ice_init_pkg(hw
, hw
->pkg_copy
, hw
->pkg_size
);
4008 ice_log_pkg_init(hw
, &status
);
4010 dev_err(dev
, "The DDP package file failed to load. Entering Safe Mode.\n");
4015 clear_bit(ICE_FLAG_ADV_FEATURES
, pf
->flags
);
4019 /* Successful download package is the precondition for advanced
4020 * features, hence setting the ICE_FLAG_ADV_FEATURES flag
4022 set_bit(ICE_FLAG_ADV_FEATURES
, pf
->flags
);
4026 * ice_verify_cacheline_size - verify driver's assumption of 64 Byte cache lines
4027 * @pf: pointer to the PF structure
4029 * There is no error returned here because the driver should be able to handle
4030 * 128 Byte cache lines, so we only print a warning in case issues are seen,
4031 * specifically with Tx.
4033 static void ice_verify_cacheline_size(struct ice_pf
*pf
)
4035 if (rd32(&pf
->hw
, GLPCI_CNF2
) & GLPCI_CNF2_CACHELINE_SIZE_M
)
4036 dev_warn(ice_pf_to_dev(pf
), "%d Byte cache line assumption is invalid, driver may have Tx timeouts!\n",
4037 ICE_CACHE_LINE_BYTES
);
4041 * ice_send_version - update firmware with driver version
4044 * Returns ICE_SUCCESS on success, else error code
4046 static enum ice_status
ice_send_version(struct ice_pf
*pf
)
4048 struct ice_driver_ver dv
;
4050 dv
.major_ver
= 0xff;
4051 dv
.minor_ver
= 0xff;
4052 dv
.build_ver
= 0xff;
4053 dv
.subbuild_ver
= 0;
4054 strscpy((char *)dv
.driver_string
, UTS_RELEASE
,
4055 sizeof(dv
.driver_string
));
4056 return ice_aq_send_driver_ver(&pf
->hw
, &dv
, NULL
);
4060 * ice_init_fdir - Initialize flow director VSI and configuration
4061 * @pf: pointer to the PF instance
4063 * returns 0 on success, negative on error
4065 static int ice_init_fdir(struct ice_pf
*pf
)
4067 struct device
*dev
= ice_pf_to_dev(pf
);
4068 struct ice_vsi
*ctrl_vsi
;
4071 /* Side Band Flow Director needs to have a control VSI.
4072 * Allocate it and store it in the PF.
4074 ctrl_vsi
= ice_ctrl_vsi_setup(pf
, pf
->hw
.port_info
);
4076 dev_dbg(dev
, "could not create control VSI\n");
4080 err
= ice_vsi_open_ctrl(ctrl_vsi
);
4082 dev_dbg(dev
, "could not open control VSI\n");
4086 mutex_init(&pf
->hw
.fdir_fltr_lock
);
4088 err
= ice_fdir_create_dflt_rules(pf
);
4095 ice_fdir_release_flows(&pf
->hw
);
4096 ice_vsi_close(ctrl_vsi
);
4098 ice_vsi_release(ctrl_vsi
);
4099 if (pf
->ctrl_vsi_idx
!= ICE_NO_VSI
) {
4100 pf
->vsi
[pf
->ctrl_vsi_idx
] = NULL
;
4101 pf
->ctrl_vsi_idx
= ICE_NO_VSI
;
4107 * ice_get_opt_fw_name - return optional firmware file name or NULL
4108 * @pf: pointer to the PF instance
4110 static char *ice_get_opt_fw_name(struct ice_pf
*pf
)
4112 /* Optional firmware name same as default with additional dash
4113 * followed by a EUI-64 identifier (PCIe Device Serial Number)
4115 struct pci_dev
*pdev
= pf
->pdev
;
4116 char *opt_fw_filename
;
4119 /* Determine the name of the optional file using the DSN (two
4120 * dwords following the start of the DSN Capability).
4122 dsn
= pci_get_dsn(pdev
);
4126 opt_fw_filename
= kzalloc(NAME_MAX
, GFP_KERNEL
);
4127 if (!opt_fw_filename
)
4130 snprintf(opt_fw_filename
, NAME_MAX
, "%sice-%016llx.pkg",
4131 ICE_DDP_PKG_PATH
, dsn
);
4133 return opt_fw_filename
;
4137 * ice_request_fw - Device initialization routine
4138 * @pf: pointer to the PF instance
4140 static void ice_request_fw(struct ice_pf
*pf
)
4142 char *opt_fw_filename
= ice_get_opt_fw_name(pf
);
4143 const struct firmware
*firmware
= NULL
;
4144 struct device
*dev
= ice_pf_to_dev(pf
);
4147 /* optional device-specific DDP (if present) overrides the default DDP
4148 * package file. kernel logs a debug message if the file doesn't exist,
4149 * and warning messages for other errors.
4151 if (opt_fw_filename
) {
4152 err
= firmware_request_nowarn(&firmware
, opt_fw_filename
, dev
);
4154 kfree(opt_fw_filename
);
4158 /* request for firmware was successful. Download to device */
4159 ice_load_pkg(firmware
, pf
);
4160 kfree(opt_fw_filename
);
4161 release_firmware(firmware
);
4166 err
= request_firmware(&firmware
, ICE_DDP_PKG_FILE
, dev
);
4168 dev_err(dev
, "The DDP package file was not found or could not be read. Entering Safe Mode\n");
4172 /* request for firmware was successful. Download to device */
4173 ice_load_pkg(firmware
, pf
);
4174 release_firmware(firmware
);
4178 * ice_print_wake_reason - show the wake up cause in the log
4179 * @pf: pointer to the PF struct
4181 static void ice_print_wake_reason(struct ice_pf
*pf
)
4183 u32 wus
= pf
->wakeup_reason
;
4184 const char *wake_str
;
4186 /* if no wake event, nothing to print */
4190 if (wus
& PFPM_WUS_LNKC_M
)
4191 wake_str
= "Link\n";
4192 else if (wus
& PFPM_WUS_MAG_M
)
4193 wake_str
= "Magic Packet\n";
4194 else if (wus
& PFPM_WUS_MNG_M
)
4195 wake_str
= "Management\n";
4196 else if (wus
& PFPM_WUS_FW_RST_WK_M
)
4197 wake_str
= "Firmware Reset\n";
4199 wake_str
= "Unknown\n";
4201 dev_info(ice_pf_to_dev(pf
), "Wake reason: %s", wake_str
);
4205 * ice_register_netdev - register netdev and devlink port
4206 * @pf: pointer to the PF struct
4208 static int ice_register_netdev(struct ice_pf
*pf
)
4210 struct ice_vsi
*vsi
;
4213 vsi
= ice_get_main_vsi(pf
);
4214 if (!vsi
|| !vsi
->netdev
)
4217 err
= register_netdev(vsi
->netdev
);
4219 goto err_register_netdev
;
4221 set_bit(ICE_VSI_NETDEV_REGISTERED
, vsi
->state
);
4222 netif_carrier_off(vsi
->netdev
);
4223 netif_tx_stop_all_queues(vsi
->netdev
);
4224 err
= ice_devlink_create_pf_port(pf
);
4226 goto err_devlink_create
;
4228 devlink_port_type_eth_set(&pf
->devlink_port
, vsi
->netdev
);
4232 unregister_netdev(vsi
->netdev
);
4233 clear_bit(ICE_VSI_NETDEV_REGISTERED
, vsi
->state
);
4234 err_register_netdev
:
4235 free_netdev(vsi
->netdev
);
4237 clear_bit(ICE_VSI_NETDEV_ALLOCD
, vsi
->state
);
4242 * ice_probe - Device initialization routine
4243 * @pdev: PCI device information struct
4244 * @ent: entry in ice_pci_tbl
4246 * Returns 0 on success, negative on failure
4249 ice_probe(struct pci_dev
*pdev
, const struct pci_device_id __always_unused
*ent
)
4251 struct device
*dev
= &pdev
->dev
;
4256 if (pdev
->is_virtfn
) {
4257 dev_err(dev
, "can't probe a virtual function\n");
4261 /* this driver uses devres, see
4262 * Documentation/driver-api/driver-model/devres.rst
4264 err
= pcim_enable_device(pdev
);
4268 err
= pcim_iomap_regions(pdev
, BIT(ICE_BAR0
), dev_driver_string(dev
));
4270 dev_err(dev
, "BAR0 I/O map error %d\n", err
);
4274 pf
= ice_allocate_pf(dev
);
4278 /* initialize Auxiliary index to invalid value */
4281 /* set up for high or low DMA */
4282 err
= dma_set_mask_and_coherent(dev
, DMA_BIT_MASK(64));
4284 err
= dma_set_mask_and_coherent(dev
, DMA_BIT_MASK(32));
4286 dev_err(dev
, "DMA configuration failed: 0x%x\n", err
);
4290 pci_enable_pcie_error_reporting(pdev
);
4291 pci_set_master(pdev
);
4294 pci_set_drvdata(pdev
, pf
);
4295 set_bit(ICE_DOWN
, pf
->state
);
4296 /* Disable service task until DOWN bit is cleared */
4297 set_bit(ICE_SERVICE_DIS
, pf
->state
);
4300 hw
->hw_addr
= pcim_iomap_table(pdev
)[ICE_BAR0
];
4301 pci_save_state(pdev
);
4304 hw
->vendor_id
= pdev
->vendor
;
4305 hw
->device_id
= pdev
->device
;
4306 pci_read_config_byte(pdev
, PCI_REVISION_ID
, &hw
->revision_id
);
4307 hw
->subsystem_vendor_id
= pdev
->subsystem_vendor
;
4308 hw
->subsystem_device_id
= pdev
->subsystem_device
;
4309 hw
->bus
.device
= PCI_SLOT(pdev
->devfn
);
4310 hw
->bus
.func
= PCI_FUNC(pdev
->devfn
);
4311 ice_set_ctrlq_len(hw
);
4313 pf
->msg_enable
= netif_msg_init(debug
, ICE_DFLT_NETIF_M
);
4315 err
= ice_devlink_register(pf
);
4317 dev_err(dev
, "ice_devlink_register failed: %d\n", err
);
4318 goto err_exit_unroll
;
4321 #ifndef CONFIG_DYNAMIC_DEBUG
4323 hw
->debug_mask
= debug
;
4326 err
= ice_init_hw(hw
);
4328 dev_err(dev
, "ice_init_hw failed: %d\n", err
);
4330 goto err_exit_unroll
;
4335 /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be
4336 * set in pf->state, which will cause ice_is_safe_mode to return
4339 if (ice_is_safe_mode(pf
)) {
4340 dev_err(dev
, "Package download failed. Advanced features disabled - Device now in Safe Mode\n");
4341 /* we already got function/device capabilities but these don't
4342 * reflect what the driver needs to do in safe mode. Instead of
4343 * adding conditional logic everywhere to ignore these
4344 * device/function capabilities, override them.
4346 ice_set_safe_mode_caps(hw
);
4349 err
= ice_init_pf(pf
);
4351 dev_err(dev
, "ice_init_pf failed: %d\n", err
);
4352 goto err_init_pf_unroll
;
4355 ice_devlink_init_regions(pf
);
4357 pf
->hw
.udp_tunnel_nic
.set_port
= ice_udp_tunnel_set_port
;
4358 pf
->hw
.udp_tunnel_nic
.unset_port
= ice_udp_tunnel_unset_port
;
4359 pf
->hw
.udp_tunnel_nic
.flags
= UDP_TUNNEL_NIC_INFO_MAY_SLEEP
;
4360 pf
->hw
.udp_tunnel_nic
.shared
= &pf
->hw
.udp_tunnel_shared
;
4362 if (pf
->hw
.tnl
.valid_count
[TNL_VXLAN
]) {
4363 pf
->hw
.udp_tunnel_nic
.tables
[i
].n_entries
=
4364 pf
->hw
.tnl
.valid_count
[TNL_VXLAN
];
4365 pf
->hw
.udp_tunnel_nic
.tables
[i
].tunnel_types
=
4366 UDP_TUNNEL_TYPE_VXLAN
;
4369 if (pf
->hw
.tnl
.valid_count
[TNL_GENEVE
]) {
4370 pf
->hw
.udp_tunnel_nic
.tables
[i
].n_entries
=
4371 pf
->hw
.tnl
.valid_count
[TNL_GENEVE
];
4372 pf
->hw
.udp_tunnel_nic
.tables
[i
].tunnel_types
=
4373 UDP_TUNNEL_TYPE_GENEVE
;
4377 pf
->num_alloc_vsi
= hw
->func_caps
.guar_num_vsi
;
4378 if (!pf
->num_alloc_vsi
) {
4380 goto err_init_pf_unroll
;
4382 if (pf
->num_alloc_vsi
> UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
) {
4383 dev_warn(&pf
->pdev
->dev
,
4384 "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
4385 pf
->num_alloc_vsi
, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
);
4386 pf
->num_alloc_vsi
= UDP_TUNNEL_NIC_MAX_SHARING_DEVICES
;
4389 pf
->vsi
= devm_kcalloc(dev
, pf
->num_alloc_vsi
, sizeof(*pf
->vsi
),
4393 goto err_init_pf_unroll
;
4396 err
= ice_init_interrupt_scheme(pf
);
4398 dev_err(dev
, "ice_init_interrupt_scheme failed: %d\n", err
);
4400 goto err_init_vsi_unroll
;
4403 /* In case of MSIX we are going to setup the misc vector right here
4404 * to handle admin queue events etc. In case of legacy and MSI
4405 * the misc functionality and queue processing is combined in
4406 * the same vector and that gets setup at open.
4408 err
= ice_req_irq_msix_misc(pf
);
4410 dev_err(dev
, "setup of misc vector failed: %d\n", err
);
4411 goto err_init_interrupt_unroll
;
4414 /* create switch struct for the switch element created by FW on boot */
4415 pf
->first_sw
= devm_kzalloc(dev
, sizeof(*pf
->first_sw
), GFP_KERNEL
);
4416 if (!pf
->first_sw
) {
4418 goto err_msix_misc_unroll
;
4422 pf
->first_sw
->bridge_mode
= BRIDGE_MODE_VEB
;
4424 pf
->first_sw
->bridge_mode
= BRIDGE_MODE_VEPA
;
4426 pf
->first_sw
->pf
= pf
;
4428 /* record the sw_id available for later use */
4429 pf
->first_sw
->sw_id
= hw
->port_info
->sw_id
;
4431 err
= ice_setup_pf_sw(pf
);
4433 dev_err(dev
, "probe failed due to setup PF switch: %d\n", err
);
4434 goto err_alloc_sw_unroll
;
4437 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
4439 /* tell the firmware we are up */
4440 err
= ice_send_version(pf
);
4442 dev_err(dev
, "probe failed sending driver version %s. error: %d\n",
4444 goto err_send_version_unroll
;
4447 /* since everything is good, start the service timer */
4448 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
4450 err
= ice_init_link_events(pf
->hw
.port_info
);
4452 dev_err(dev
, "ice_init_link_events failed: %d\n", err
);
4453 goto err_send_version_unroll
;
4456 /* not a fatal error if this fails */
4457 err
= ice_init_nvm_phy_type(pf
->hw
.port_info
);
4459 dev_err(dev
, "ice_init_nvm_phy_type failed: %d\n", err
);
4461 /* not a fatal error if this fails */
4462 err
= ice_update_link_info(pf
->hw
.port_info
);
4464 dev_err(dev
, "ice_update_link_info failed: %d\n", err
);
4466 ice_init_link_dflt_override(pf
->hw
.port_info
);
4468 ice_check_module_power(pf
, pf
->hw
.port_info
->phy
.link_info
.link_cfg_err
);
4470 /* if media available, initialize PHY settings */
4471 if (pf
->hw
.port_info
->phy
.link_info
.link_info
&
4472 ICE_AQ_MEDIA_AVAILABLE
) {
4473 /* not a fatal error if this fails */
4474 err
= ice_init_phy_user_cfg(pf
->hw
.port_info
);
4476 dev_err(dev
, "ice_init_phy_user_cfg failed: %d\n", err
);
4478 if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, pf
->flags
)) {
4479 struct ice_vsi
*vsi
= ice_get_main_vsi(pf
);
4482 ice_configure_phy(vsi
);
4485 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
4488 ice_verify_cacheline_size(pf
);
4490 /* Save wakeup reason register for later use */
4491 pf
->wakeup_reason
= rd32(hw
, PFPM_WUS
);
4493 /* check for a power management event */
4494 ice_print_wake_reason(pf
);
4496 /* clear wake status, all bits */
4497 wr32(hw
, PFPM_WUS
, U32_MAX
);
4499 /* Disable WoL at init, wait for user to enable */
4500 device_set_wakeup_enable(dev
, false);
4502 if (ice_is_safe_mode(pf
)) {
4503 ice_set_safe_mode_vlan_cfg(pf
);
4507 /* initialize DDP driven features */
4508 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
4511 /* Note: Flow director init failure is non-fatal to load */
4512 if (ice_init_fdir(pf
))
4513 dev_err(dev
, "could not initialize flow director\n");
4515 /* Note: DCB init failure is non-fatal to load */
4516 if (ice_init_pf_dcb(pf
, false)) {
4517 clear_bit(ICE_FLAG_DCB_CAPABLE
, pf
->flags
);
4518 clear_bit(ICE_FLAG_DCB_ENA
, pf
->flags
);
4520 ice_cfg_lldp_mib_change(&pf
->hw
, true);
4523 if (ice_init_lag(pf
))
4524 dev_warn(dev
, "Failed to init link aggregation support\n");
4526 /* print PCI link speed and width */
4527 pcie_print_link_status(pf
->pdev
);
4530 err
= ice_register_netdev(pf
);
4532 goto err_netdev_reg
;
4534 /* ready to go, so clear down state bit */
4535 clear_bit(ICE_DOWN
, pf
->state
);
4536 if (ice_is_aux_ena(pf
)) {
4537 pf
->aux_idx
= ida_alloc(&ice_aux_ida
, GFP_KERNEL
);
4538 if (pf
->aux_idx
< 0) {
4539 dev_err(dev
, "Failed to allocate device ID for AUX driver\n");
4541 goto err_netdev_reg
;
4544 err
= ice_init_rdma(pf
);
4546 dev_err(dev
, "Failed to initialize RDMA: %d\n", err
);
4548 goto err_init_aux_unroll
;
4551 dev_warn(dev
, "RDMA is not supported on this device\n");
4556 err_init_aux_unroll
:
4558 ida_free(&ice_aux_ida
, pf
->aux_idx
);
4560 err_send_version_unroll
:
4561 ice_vsi_release_all(pf
);
4562 err_alloc_sw_unroll
:
4563 set_bit(ICE_SERVICE_DIS
, pf
->state
);
4564 set_bit(ICE_DOWN
, pf
->state
);
4565 devm_kfree(dev
, pf
->first_sw
);
4566 err_msix_misc_unroll
:
4567 ice_free_irq_msix_misc(pf
);
4568 err_init_interrupt_unroll
:
4569 ice_clear_interrupt_scheme(pf
);
4570 err_init_vsi_unroll
:
4571 devm_kfree(dev
, pf
->vsi
);
4574 ice_devlink_destroy_regions(pf
);
4577 ice_devlink_unregister(pf
);
4578 pci_disable_pcie_error_reporting(pdev
);
4579 pci_disable_device(pdev
);
4584 * ice_set_wake - enable or disable Wake on LAN
4585 * @pf: pointer to the PF struct
4587 * Simple helper for WoL control
4589 static void ice_set_wake(struct ice_pf
*pf
)
4591 struct ice_hw
*hw
= &pf
->hw
;
4592 bool wol
= pf
->wol_ena
;
4594 /* clear wake state, otherwise new wake events won't fire */
4595 wr32(hw
, PFPM_WUS
, U32_MAX
);
4597 /* enable / disable APM wake up, no RMW needed */
4598 wr32(hw
, PFPM_APM
, wol
? PFPM_APM_APME_M
: 0);
4600 /* set magic packet filter enabled */
4601 wr32(hw
, PFPM_WUFC
, wol
? PFPM_WUFC_MAG_M
: 0);
4605 * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
4606 * @pf: pointer to the PF struct
4608 * Issue firmware command to enable multicast magic wake, making
4609 * sure that any locally administered address (LAA) is used for
4610 * wake, and that PF reset doesn't undo the LAA.
4612 static void ice_setup_mc_magic_wake(struct ice_pf
*pf
)
4614 struct device
*dev
= ice_pf_to_dev(pf
);
4615 struct ice_hw
*hw
= &pf
->hw
;
4616 enum ice_status status
;
4617 u8 mac_addr
[ETH_ALEN
];
4618 struct ice_vsi
*vsi
;
4624 vsi
= ice_get_main_vsi(pf
);
4628 /* Get current MAC address in case it's an LAA */
4630 ether_addr_copy(mac_addr
, vsi
->netdev
->dev_addr
);
4632 ether_addr_copy(mac_addr
, vsi
->port_info
->mac
.perm_addr
);
4634 flags
= ICE_AQC_MAN_MAC_WR_MC_MAG_EN
|
4635 ICE_AQC_MAN_MAC_UPDATE_LAA_WOL
|
4636 ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP
;
4638 status
= ice_aq_manage_mac_write(hw
, mac_addr
, flags
, NULL
);
4640 dev_err(dev
, "Failed to enable Multicast Magic Packet wake, err %s aq_err %s\n",
4641 ice_stat_str(status
),
4642 ice_aq_str(hw
->adminq
.sq_last_status
));
4646 * ice_remove - Device removal routine
4647 * @pdev: PCI device information struct
4649 static void ice_remove(struct pci_dev
*pdev
)
4651 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
4654 for (i
= 0; i
< ICE_MAX_RESET_WAIT
; i
++) {
4655 if (!ice_is_reset_in_progress(pf
->state
))
4660 if (test_bit(ICE_FLAG_SRIOV_ENA
, pf
->flags
)) {
4661 set_bit(ICE_VF_RESETS_DISABLED
, pf
->state
);
4665 ice_service_task_stop(pf
);
4667 ice_aq_cancel_waiting_tasks(pf
);
4668 ice_unplug_aux_dev(pf
);
4669 if (pf
->aux_idx
>= 0)
4670 ida_free(&ice_aux_ida
, pf
->aux_idx
);
4671 set_bit(ICE_DOWN
, pf
->state
);
4673 mutex_destroy(&(&pf
->hw
)->fdir_fltr_lock
);
4675 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
4676 ice_ptp_release(pf
);
4677 if (!ice_is_safe_mode(pf
))
4678 ice_remove_arfs(pf
);
4679 ice_setup_mc_magic_wake(pf
);
4680 ice_vsi_release_all(pf
);
4682 ice_free_irq_msix_misc(pf
);
4683 ice_for_each_vsi(pf
, i
) {
4686 ice_vsi_free_q_vectors(pf
->vsi
[i
]);
4689 ice_devlink_destroy_regions(pf
);
4690 ice_deinit_hw(&pf
->hw
);
4691 ice_devlink_unregister(pf
);
4693 /* Issue a PFR as part of the prescribed driver unload flow. Do not
4694 * do it via ice_schedule_reset() since there is no need to rebuild
4695 * and the service task is already stopped.
4697 ice_reset(&pf
->hw
, ICE_RESET_PFR
);
4698 pci_wait_for_pending_transaction(pdev
);
4699 ice_clear_interrupt_scheme(pf
);
4700 pci_disable_pcie_error_reporting(pdev
);
4701 pci_disable_device(pdev
);
4705 * ice_shutdown - PCI callback for shutting down device
4706 * @pdev: PCI device information struct
4708 static void ice_shutdown(struct pci_dev
*pdev
)
4710 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
4714 if (system_state
== SYSTEM_POWER_OFF
) {
4715 pci_wake_from_d3(pdev
, pf
->wol_ena
);
4716 pci_set_power_state(pdev
, PCI_D3hot
);
4722 * ice_prepare_for_shutdown - prep for PCI shutdown
4723 * @pf: board private structure
4725 * Inform or close all dependent features in prep for PCI device shutdown
4727 static void ice_prepare_for_shutdown(struct ice_pf
*pf
)
4729 struct ice_hw
*hw
= &pf
->hw
;
4732 /* Notify VFs of impending reset */
4733 if (ice_check_sq_alive(hw
, &hw
->mailboxq
))
4734 ice_vc_notify_reset(pf
);
4736 dev_dbg(ice_pf_to_dev(pf
), "Tearing down internal switch for shutdown\n");
4738 /* disable the VSIs and their queues that are not already DOWN */
4739 ice_pf_dis_all_vsi(pf
, false);
4741 ice_for_each_vsi(pf
, v
)
4743 pf
->vsi
[v
]->vsi_num
= 0;
4745 ice_shutdown_all_ctrlq(hw
);
4749 * ice_reinit_interrupt_scheme - Reinitialize interrupt scheme
4750 * @pf: board private structure to reinitialize
4752 * This routine reinitialize interrupt scheme that was cleared during
4753 * power management suspend callback.
4755 * This should be called during resume routine to re-allocate the q_vectors
4756 * and reacquire interrupts.
4758 static int ice_reinit_interrupt_scheme(struct ice_pf
*pf
)
4760 struct device
*dev
= ice_pf_to_dev(pf
);
4763 /* Since we clear MSIX flag during suspend, we need to
4764 * set it back during resume...
4767 ret
= ice_init_interrupt_scheme(pf
);
4769 dev_err(dev
, "Failed to re-initialize interrupt %d\n", ret
);
4773 /* Remap vectors and rings, after successful re-init interrupts */
4774 ice_for_each_vsi(pf
, v
) {
4778 ret
= ice_vsi_alloc_q_vectors(pf
->vsi
[v
]);
4781 ice_vsi_map_rings_to_vectors(pf
->vsi
[v
]);
4784 ret
= ice_req_irq_msix_misc(pf
);
4786 dev_err(dev
, "Setting up misc vector failed after device suspend %d\n",
4796 ice_vsi_free_q_vectors(pf
->vsi
[v
]);
4803 * @dev: generic device information structure
4805 * Power Management callback to quiesce the device and prepare
4806 * for D3 transition.
4808 static int __maybe_unused
ice_suspend(struct device
*dev
)
4810 struct pci_dev
*pdev
= to_pci_dev(dev
);
4814 pf
= pci_get_drvdata(pdev
);
4816 if (!ice_pf_state_is_nominal(pf
)) {
4817 dev_err(dev
, "Device is not ready, no need to suspend it\n");
4821 /* Stop watchdog tasks until resume completion.
4822 * Even though it is most likely that the service task is
4823 * disabled if the device is suspended or down, the service task's
4824 * state is controlled by a different state bit, and we should
4825 * store and honor whatever state that bit is in at this point.
4827 disabled
= ice_service_task_stop(pf
);
4829 ice_unplug_aux_dev(pf
);
4831 /* Already suspended?, then there is nothing to do */
4832 if (test_and_set_bit(ICE_SUSPENDED
, pf
->state
)) {
4834 ice_service_task_restart(pf
);
4838 if (test_bit(ICE_DOWN
, pf
->state
) ||
4839 ice_is_reset_in_progress(pf
->state
)) {
4840 dev_err(dev
, "can't suspend device in reset or already down\n");
4842 ice_service_task_restart(pf
);
4846 ice_setup_mc_magic_wake(pf
);
4848 ice_prepare_for_shutdown(pf
);
4852 /* Free vectors, clear the interrupt scheme and release IRQs
4853 * for proper hibernation, especially with large number of CPUs.
4854 * Otherwise hibernation might fail when mapping all the vectors back
4857 ice_free_irq_msix_misc(pf
);
4858 ice_for_each_vsi(pf
, v
) {
4861 ice_vsi_free_q_vectors(pf
->vsi
[v
]);
4863 ice_free_cpu_rx_rmap(ice_get_main_vsi(pf
));
4864 ice_clear_interrupt_scheme(pf
);
4866 pci_save_state(pdev
);
4867 pci_wake_from_d3(pdev
, pf
->wol_ena
);
4868 pci_set_power_state(pdev
, PCI_D3hot
);
4873 * ice_resume - PM callback for waking up from D3
4874 * @dev: generic device information structure
4876 static int __maybe_unused
ice_resume(struct device
*dev
)
4878 struct pci_dev
*pdev
= to_pci_dev(dev
);
4879 enum ice_reset_req reset_type
;
4884 pci_set_power_state(pdev
, PCI_D0
);
4885 pci_restore_state(pdev
);
4886 pci_save_state(pdev
);
4888 if (!pci_device_is_present(pdev
))
4891 ret
= pci_enable_device_mem(pdev
);
4893 dev_err(dev
, "Cannot enable device after suspend\n");
4897 pf
= pci_get_drvdata(pdev
);
4900 pf
->wakeup_reason
= rd32(hw
, PFPM_WUS
);
4901 ice_print_wake_reason(pf
);
4903 /* We cleared the interrupt scheme when we suspended, so we need to
4904 * restore it now to resume device functionality.
4906 ret
= ice_reinit_interrupt_scheme(pf
);
4908 dev_err(dev
, "Cannot restore interrupt scheme: %d\n", ret
);
4910 clear_bit(ICE_DOWN
, pf
->state
);
4911 /* Now perform PF reset and rebuild */
4912 reset_type
= ICE_RESET_PFR
;
4913 /* re-enable service task for reset, but allow reset to schedule it */
4914 clear_bit(ICE_SERVICE_DIS
, pf
->state
);
4916 if (ice_schedule_reset(pf
, reset_type
))
4917 dev_err(dev
, "Reset during resume failed.\n");
4919 clear_bit(ICE_SUSPENDED
, pf
->state
);
4920 ice_service_task_restart(pf
);
4922 /* Restart the service task */
4923 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
4927 #endif /* CONFIG_PM */
4930 * ice_pci_err_detected - warning that PCI error has been detected
4931 * @pdev: PCI device information struct
4932 * @err: the type of PCI error
4934 * Called to warn that something happened on the PCI bus and the error handling
4935 * is in progress. Allows the driver to gracefully prepare/handle PCI errors.
4937 static pci_ers_result_t
4938 ice_pci_err_detected(struct pci_dev
*pdev
, pci_channel_state_t err
)
4940 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
4943 dev_err(&pdev
->dev
, "%s: unrecoverable device error %d\n",
4945 return PCI_ERS_RESULT_DISCONNECT
;
4948 if (!test_bit(ICE_SUSPENDED
, pf
->state
)) {
4949 ice_service_task_stop(pf
);
4951 if (!test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
)) {
4952 set_bit(ICE_PFR_REQ
, pf
->state
);
4953 ice_prepare_for_reset(pf
);
4957 return PCI_ERS_RESULT_NEED_RESET
;
4961 * ice_pci_err_slot_reset - a PCI slot reset has just happened
4962 * @pdev: PCI device information struct
4964 * Called to determine if the driver can recover from the PCI slot reset by
4965 * using a register read to determine if the device is recoverable.
4967 static pci_ers_result_t
ice_pci_err_slot_reset(struct pci_dev
*pdev
)
4969 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
4970 pci_ers_result_t result
;
4974 err
= pci_enable_device_mem(pdev
);
4976 dev_err(&pdev
->dev
, "Cannot re-enable PCI device after reset, error %d\n",
4978 result
= PCI_ERS_RESULT_DISCONNECT
;
4980 pci_set_master(pdev
);
4981 pci_restore_state(pdev
);
4982 pci_save_state(pdev
);
4983 pci_wake_from_d3(pdev
, false);
4985 /* Check for life */
4986 reg
= rd32(&pf
->hw
, GLGEN_RTRIG
);
4988 result
= PCI_ERS_RESULT_RECOVERED
;
4990 result
= PCI_ERS_RESULT_DISCONNECT
;
4993 err
= pci_aer_clear_nonfatal_status(pdev
);
4995 dev_dbg(&pdev
->dev
, "pci_aer_clear_nonfatal_status() failed, error %d\n",
4997 /* non-fatal, continue */
5003 * ice_pci_err_resume - restart operations after PCI error recovery
5004 * @pdev: PCI device information struct
5006 * Called to allow the driver to bring things back up after PCI error and/or
5007 * reset recovery have finished
5009 static void ice_pci_err_resume(struct pci_dev
*pdev
)
5011 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5014 dev_err(&pdev
->dev
, "%s failed, device is unrecoverable\n",
5019 if (test_bit(ICE_SUSPENDED
, pf
->state
)) {
5020 dev_dbg(&pdev
->dev
, "%s failed to resume normal operations!\n",
5025 ice_restore_all_vfs_msi_state(pdev
);
5027 ice_do_reset(pf
, ICE_RESET_PFR
);
5028 ice_service_task_restart(pf
);
5029 mod_timer(&pf
->serv_tmr
, round_jiffies(jiffies
+ pf
->serv_tmr_period
));
5033 * ice_pci_err_reset_prepare - prepare device driver for PCI reset
5034 * @pdev: PCI device information struct
5036 static void ice_pci_err_reset_prepare(struct pci_dev
*pdev
)
5038 struct ice_pf
*pf
= pci_get_drvdata(pdev
);
5040 if (!test_bit(ICE_SUSPENDED
, pf
->state
)) {
5041 ice_service_task_stop(pf
);
5043 if (!test_bit(ICE_PREPARED_FOR_RESET
, pf
->state
)) {
5044 set_bit(ICE_PFR_REQ
, pf
->state
);
5045 ice_prepare_for_reset(pf
);
5051 * ice_pci_err_reset_done - PCI reset done, device driver reset can begin
5052 * @pdev: PCI device information struct
5054 static void ice_pci_err_reset_done(struct pci_dev
*pdev
)
5056 ice_pci_err_resume(pdev
);
5059 /* ice_pci_tbl - PCI Device ID Table
5061 * Wildcard entries (PCI_ANY_ID) should come last
5062 * Last entry must be all 0s
5064 * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
5065 * Class, Class Mask, private data (not used) }
5067 static const struct pci_device_id ice_pci_tbl
[] = {
5068 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_BACKPLANE
), 0 },
5069 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_QSFP
), 0 },
5070 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810C_SFP
), 0 },
5071 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_BACKPLANE
), 0 },
5072 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_QSFP
), 0 },
5073 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E810_XXV_SFP
), 0 },
5074 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_BACKPLANE
), 0 },
5075 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_QSFP
), 0 },
5076 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_SFP
), 0 },
5077 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_10G_BASE_T
), 0 },
5078 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823C_SGMII
), 0 },
5079 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_BACKPLANE
), 0 },
5080 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_QSFP
), 0 },
5081 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_SFP
), 0 },
5082 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_10G_BASE_T
), 0 },
5083 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822C_SGMII
), 0 },
5084 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_BACKPLANE
), 0 },
5085 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_SFP
), 0 },
5086 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_10G_BASE_T
), 0 },
5087 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E822L_SGMII
), 0 },
5088 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_BACKPLANE
), 0 },
5089 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_SFP
), 0 },
5090 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_10G_BASE_T
), 0 },
5091 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_1GBE
), 0 },
5092 { PCI_VDEVICE(INTEL
, ICE_DEV_ID_E823L_QSFP
), 0 },
5093 /* required last entry */
5096 MODULE_DEVICE_TABLE(pci
, ice_pci_tbl
);
5098 static __maybe_unused
SIMPLE_DEV_PM_OPS(ice_pm_ops
, ice_suspend
, ice_resume
);
5100 static const struct pci_error_handlers ice_pci_err_handler
= {
5101 .error_detected
= ice_pci_err_detected
,
5102 .slot_reset
= ice_pci_err_slot_reset
,
5103 .reset_prepare
= ice_pci_err_reset_prepare
,
5104 .reset_done
= ice_pci_err_reset_done
,
5105 .resume
= ice_pci_err_resume
5108 static struct pci_driver ice_driver
= {
5109 .name
= KBUILD_MODNAME
,
5110 .id_table
= ice_pci_tbl
,
5112 .remove
= ice_remove
,
5114 .driver
.pm
= &ice_pm_ops
,
5115 #endif /* CONFIG_PM */
5116 .shutdown
= ice_shutdown
,
5117 .sriov_configure
= ice_sriov_configure
,
5118 .err_handler
= &ice_pci_err_handler
5122 * ice_module_init - Driver registration routine
5124 * ice_module_init is the first routine called when the driver is
5125 * loaded. All it does is register with the PCI subsystem.
5127 static int __init
ice_module_init(void)
5131 pr_info("%s\n", ice_driver_string
);
5132 pr_info("%s\n", ice_copyright
);
5134 ice_wq
= alloc_workqueue("%s", WQ_MEM_RECLAIM
, 0, KBUILD_MODNAME
);
5136 pr_err("Failed to create workqueue\n");
5140 status
= pci_register_driver(&ice_driver
);
5142 pr_err("failed to register PCI driver, err %d\n", status
);
5143 destroy_workqueue(ice_wq
);
5148 module_init(ice_module_init
);
5151 * ice_module_exit - Driver exit cleanup routine
5153 * ice_module_exit is called just before the driver is removed
5156 static void __exit
ice_module_exit(void)
5158 pci_unregister_driver(&ice_driver
);
5159 destroy_workqueue(ice_wq
);
5160 pr_info("module unloaded\n");
5162 module_exit(ice_module_exit
);
5165 * ice_set_mac_address - NDO callback to set MAC address
5166 * @netdev: network interface device structure
5167 * @pi: pointer to an address structure
5169 * Returns 0 on success, negative on failure
5171 static int ice_set_mac_address(struct net_device
*netdev
, void *pi
)
5173 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
5174 struct ice_vsi
*vsi
= np
->vsi
;
5175 struct ice_pf
*pf
= vsi
->back
;
5176 struct ice_hw
*hw
= &pf
->hw
;
5177 struct sockaddr
*addr
= pi
;
5178 enum ice_status status
;
5179 u8 old_mac
[ETH_ALEN
];
5184 mac
= (u8
*)addr
->sa_data
;
5186 if (!is_valid_ether_addr(mac
))
5187 return -EADDRNOTAVAIL
;
5189 if (ether_addr_equal(netdev
->dev_addr
, mac
)) {
5190 netdev_dbg(netdev
, "already using mac %pM\n", mac
);
5194 if (test_bit(ICE_DOWN
, pf
->state
) ||
5195 ice_is_reset_in_progress(pf
->state
)) {
5196 netdev_err(netdev
, "can't set mac %pM. device not ready\n",
5201 netif_addr_lock_bh(netdev
);
5202 ether_addr_copy(old_mac
, netdev
->dev_addr
);
5203 /* change the netdev's MAC address */
5204 memcpy(netdev
->dev_addr
, mac
, netdev
->addr_len
);
5205 netif_addr_unlock_bh(netdev
);
5207 /* Clean up old MAC filter. Not an error if old filter doesn't exist */
5208 status
= ice_fltr_remove_mac(vsi
, old_mac
, ICE_FWD_TO_VSI
);
5209 if (status
&& status
!= ICE_ERR_DOES_NOT_EXIST
) {
5210 err
= -EADDRNOTAVAIL
;
5211 goto err_update_filters
;
5214 /* Add filter for new MAC. If filter exists, return success */
5215 status
= ice_fltr_add_mac(vsi
, mac
, ICE_FWD_TO_VSI
);
5216 if (status
== ICE_ERR_ALREADY_EXISTS
)
5217 /* Although this MAC filter is already present in hardware it's
5218 * possible in some cases (e.g. bonding) that dev_addr was
5219 * modified outside of the driver and needs to be restored back
5222 netdev_dbg(netdev
, "filter for MAC %pM already exists\n", mac
);
5224 /* error if the new filter addition failed */
5225 err
= -EADDRNOTAVAIL
;
5229 netdev_err(netdev
, "can't set MAC %pM. filter update failed\n",
5231 netif_addr_lock_bh(netdev
);
5232 ether_addr_copy(netdev
->dev_addr
, old_mac
);
5233 netif_addr_unlock_bh(netdev
);
5237 netdev_dbg(vsi
->netdev
, "updated MAC address to %pM\n",
5240 /* write new MAC address to the firmware */
5241 flags
= ICE_AQC_MAN_MAC_UPDATE_LAA_WOL
;
5242 status
= ice_aq_manage_mac_write(hw
, mac
, flags
, NULL
);
5244 netdev_err(netdev
, "can't set MAC %pM. write to firmware failed error %s\n",
5245 mac
, ice_stat_str(status
));
5251 * ice_set_rx_mode - NDO callback to set the netdev filters
5252 * @netdev: network interface device structure
5254 static void ice_set_rx_mode(struct net_device
*netdev
)
5256 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
5257 struct ice_vsi
*vsi
= np
->vsi
;
5262 /* Set the flags to synchronize filters
5263 * ndo_set_rx_mode may be triggered even without a change in netdev
5266 set_bit(ICE_VSI_UMAC_FLTR_CHANGED
, vsi
->state
);
5267 set_bit(ICE_VSI_MMAC_FLTR_CHANGED
, vsi
->state
);
5268 set_bit(ICE_FLAG_FLTR_SYNC
, vsi
->back
->flags
);
5270 /* schedule our worker thread which will take care of
5271 * applying the new filter changes
5273 ice_service_task_schedule(vsi
->back
);
5277 * ice_set_tx_maxrate - NDO callback to set the maximum per-queue bitrate
5278 * @netdev: network interface device structure
5279 * @queue_index: Queue ID
5280 * @maxrate: maximum bandwidth in Mbps
5283 ice_set_tx_maxrate(struct net_device
*netdev
, int queue_index
, u32 maxrate
)
5285 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
5286 struct ice_vsi
*vsi
= np
->vsi
;
5287 enum ice_status status
;
5291 /* Validate maxrate requested is within permitted range */
5292 if (maxrate
&& (maxrate
> (ICE_SCHED_MAX_BW
/ 1000))) {
5293 netdev_err(netdev
, "Invalid max rate %d specified for the queue %d\n",
5294 maxrate
, queue_index
);
5298 q_handle
= vsi
->tx_rings
[queue_index
]->q_handle
;
5299 tc
= ice_dcb_get_tc(vsi
, queue_index
);
5301 /* Set BW back to default, when user set maxrate to 0 */
5303 status
= ice_cfg_q_bw_dflt_lmt(vsi
->port_info
, vsi
->idx
, tc
,
5304 q_handle
, ICE_MAX_BW
);
5306 status
= ice_cfg_q_bw_lmt(vsi
->port_info
, vsi
->idx
, tc
,
5307 q_handle
, ICE_MAX_BW
, maxrate
* 1000);
5309 netdev_err(netdev
, "Unable to set Tx max rate, error %s\n",
5310 ice_stat_str(status
));
5318 * ice_fdb_add - add an entry to the hardware database
5319 * @ndm: the input from the stack
5320 * @tb: pointer to array of nladdr (unused)
5321 * @dev: the net device pointer
5322 * @addr: the MAC address entry being added
5324 * @flags: instructions from stack about fdb operation
5325 * @extack: netlink extended ack
5328 ice_fdb_add(struct ndmsg
*ndm
, struct nlattr __always_unused
*tb
[],
5329 struct net_device
*dev
, const unsigned char *addr
, u16 vid
,
5330 u16 flags
, struct netlink_ext_ack __always_unused
*extack
)
5335 netdev_err(dev
, "VLANs aren't supported yet for dev_uc|mc_add()\n");
5338 if (ndm
->ndm_state
&& !(ndm
->ndm_state
& NUD_PERMANENT
)) {
5339 netdev_err(dev
, "FDB only supports static addresses\n");
5343 if (is_unicast_ether_addr(addr
) || is_link_local_ether_addr(addr
))
5344 err
= dev_uc_add_excl(dev
, addr
);
5345 else if (is_multicast_ether_addr(addr
))
5346 err
= dev_mc_add_excl(dev
, addr
);
5350 /* Only return duplicate errors if NLM_F_EXCL is set */
5351 if (err
== -EEXIST
&& !(flags
& NLM_F_EXCL
))
5358 * ice_fdb_del - delete an entry from the hardware database
5359 * @ndm: the input from the stack
5360 * @tb: pointer to array of nladdr (unused)
5361 * @dev: the net device pointer
5362 * @addr: the MAC address entry being added
5366 ice_fdb_del(struct ndmsg
*ndm
, __always_unused
struct nlattr
*tb
[],
5367 struct net_device
*dev
, const unsigned char *addr
,
5368 __always_unused u16 vid
)
5372 if (ndm
->ndm_state
& NUD_PERMANENT
) {
5373 netdev_err(dev
, "FDB only supports static addresses\n");
5377 if (is_unicast_ether_addr(addr
))
5378 err
= dev_uc_del(dev
, addr
);
5379 else if (is_multicast_ether_addr(addr
))
5380 err
= dev_mc_del(dev
, addr
);
5388 * ice_set_features - set the netdev feature flags
5389 * @netdev: ptr to the netdev being adjusted
5390 * @features: the feature set that the stack is suggesting
5393 ice_set_features(struct net_device
*netdev
, netdev_features_t features
)
5395 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
5396 struct ice_vsi
*vsi
= np
->vsi
;
5397 struct ice_pf
*pf
= vsi
->back
;
5400 /* Don't set any netdev advanced features with device in Safe Mode */
5401 if (ice_is_safe_mode(vsi
->back
)) {
5402 dev_err(ice_pf_to_dev(vsi
->back
), "Device is in Safe Mode - not enabling advanced netdev features\n");
5406 /* Do not change setting during reset */
5407 if (ice_is_reset_in_progress(pf
->state
)) {
5408 dev_err(ice_pf_to_dev(vsi
->back
), "Device is resetting, changing advanced netdev features temporarily unavailable.\n");
5412 /* Multiple features can be changed in one call so keep features in
5413 * separate if/else statements to guarantee each feature is checked
5415 if (features
& NETIF_F_RXHASH
&& !(netdev
->features
& NETIF_F_RXHASH
))
5416 ice_vsi_manage_rss_lut(vsi
, true);
5417 else if (!(features
& NETIF_F_RXHASH
) &&
5418 netdev
->features
& NETIF_F_RXHASH
)
5419 ice_vsi_manage_rss_lut(vsi
, false);
5421 if ((features
& NETIF_F_HW_VLAN_CTAG_RX
) &&
5422 !(netdev
->features
& NETIF_F_HW_VLAN_CTAG_RX
))
5423 ret
= ice_vsi_manage_vlan_stripping(vsi
, true);
5424 else if (!(features
& NETIF_F_HW_VLAN_CTAG_RX
) &&
5425 (netdev
->features
& NETIF_F_HW_VLAN_CTAG_RX
))
5426 ret
= ice_vsi_manage_vlan_stripping(vsi
, false);
5428 if ((features
& NETIF_F_HW_VLAN_CTAG_TX
) &&
5429 !(netdev
->features
& NETIF_F_HW_VLAN_CTAG_TX
))
5430 ret
= ice_vsi_manage_vlan_insertion(vsi
);
5431 else if (!(features
& NETIF_F_HW_VLAN_CTAG_TX
) &&
5432 (netdev
->features
& NETIF_F_HW_VLAN_CTAG_TX
))
5433 ret
= ice_vsi_manage_vlan_insertion(vsi
);
5435 if ((features
& NETIF_F_HW_VLAN_CTAG_FILTER
) &&
5436 !(netdev
->features
& NETIF_F_HW_VLAN_CTAG_FILTER
))
5437 ret
= ice_cfg_vlan_pruning(vsi
, true, false);
5438 else if (!(features
& NETIF_F_HW_VLAN_CTAG_FILTER
) &&
5439 (netdev
->features
& NETIF_F_HW_VLAN_CTAG_FILTER
))
5440 ret
= ice_cfg_vlan_pruning(vsi
, false, false);
5442 if ((features
& NETIF_F_NTUPLE
) &&
5443 !(netdev
->features
& NETIF_F_NTUPLE
)) {
5444 ice_vsi_manage_fdir(vsi
, true);
5446 } else if (!(features
& NETIF_F_NTUPLE
) &&
5447 (netdev
->features
& NETIF_F_NTUPLE
)) {
5448 ice_vsi_manage_fdir(vsi
, false);
5449 ice_clear_arfs(vsi
);
5456 * ice_vsi_vlan_setup - Setup VLAN offload properties on a VSI
5457 * @vsi: VSI to setup VLAN properties for
5459 static int ice_vsi_vlan_setup(struct ice_vsi
*vsi
)
5463 if (vsi
->netdev
->features
& NETIF_F_HW_VLAN_CTAG_RX
)
5464 ret
= ice_vsi_manage_vlan_stripping(vsi
, true);
5465 if (vsi
->netdev
->features
& NETIF_F_HW_VLAN_CTAG_TX
)
5466 ret
= ice_vsi_manage_vlan_insertion(vsi
);
5472 * ice_vsi_cfg - Setup the VSI
5473 * @vsi: the VSI being configured
5475 * Return 0 on success and negative value on error
5477 int ice_vsi_cfg(struct ice_vsi
*vsi
)
5482 ice_set_rx_mode(vsi
->netdev
);
5484 err
= ice_vsi_vlan_setup(vsi
);
5489 ice_vsi_cfg_dcb_rings(vsi
);
5491 err
= ice_vsi_cfg_lan_txqs(vsi
);
5492 if (!err
&& ice_is_xdp_ena_vsi(vsi
))
5493 err
= ice_vsi_cfg_xdp_txqs(vsi
);
5495 err
= ice_vsi_cfg_rxqs(vsi
);
5500 /* THEORY OF MODERATION:
5501 * The below code creates custom DIM profiles for use by this driver, because
5502 * the ice driver hardware works differently than the hardware that DIMLIB was
5503 * originally made for. ice hardware doesn't have packet count limits that
5504 * can trigger an interrupt, but it *does* have interrupt rate limit support,
5505 * and this code adds that capability to be used by the driver when it's using
5506 * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver
5507 * for how to "respond" to traffic and interrupts, so this driver uses a
5508 * slightly different set of moderation parameters to get best performance.
5511 /* the throttle rate for interrupts, basically worst case delay before
5512 * an initial interrupt fires, value is stored in microseconds.
5515 /* the rate limit for interrupts, which can cap a delay from a small
5516 * ITR at a certain amount of interrupts per second. f.e. a 2us ITR
5517 * could yield as much as 500,000 interrupts per second, but with a
5518 * 10us rate limit, it limits to 100,000 interrupts per second. Value
5519 * is stored in microseconds.
5524 /* Make a different profile for Rx that doesn't allow quite so aggressive
5525 * moderation at the high end (it maxes out at 128us or about 8k interrupts a
5526 * second. The INTRL/rate parameters here are only useful to cap small ITR
5527 * values, which is why for larger ITR's - like 128, which can only generate
5528 * 8k interrupts per second, there is no point to rate limit and the values
5529 * are set to zero. The rate limit values do affect latency, and so must
5530 * be reasonably small so to not impact latency sensitive tests.
5532 static const struct ice_dim rx_profile
[] = {
5540 /* The transmit profile, which has the same sorts of values
5541 * as the previous struct
5543 static const struct ice_dim tx_profile
[] = {
5551 static void ice_tx_dim_work(struct work_struct
*work
)
5553 struct ice_ring_container
*rc
;
5554 struct ice_q_vector
*q_vector
;
5558 dim
= container_of(work
, struct dim
, work
);
5559 rc
= container_of(dim
, struct ice_ring_container
, dim
);
5560 q_vector
= container_of(rc
, struct ice_q_vector
, tx
);
5562 if (dim
->profile_ix
>= ARRAY_SIZE(tx_profile
))
5563 dim
->profile_ix
= ARRAY_SIZE(tx_profile
) - 1;
5565 /* look up the values in our local table */
5566 itr
= tx_profile
[dim
->profile_ix
].itr
;
5567 intrl
= tx_profile
[dim
->profile_ix
].intrl
;
5569 ice_trace(tx_dim_work
, q_vector
, dim
);
5570 ice_write_itr(rc
, itr
);
5571 ice_write_intrl(q_vector
, intrl
);
5573 dim
->state
= DIM_START_MEASURE
;
5576 static void ice_rx_dim_work(struct work_struct
*work
)
5578 struct ice_ring_container
*rc
;
5579 struct ice_q_vector
*q_vector
;
5583 dim
= container_of(work
, struct dim
, work
);
5584 rc
= container_of(dim
, struct ice_ring_container
, dim
);
5585 q_vector
= container_of(rc
, struct ice_q_vector
, rx
);
5587 if (dim
->profile_ix
>= ARRAY_SIZE(rx_profile
))
5588 dim
->profile_ix
= ARRAY_SIZE(rx_profile
) - 1;
5590 /* look up the values in our local table */
5591 itr
= rx_profile
[dim
->profile_ix
].itr
;
5592 intrl
= rx_profile
[dim
->profile_ix
].intrl
;
5594 ice_trace(rx_dim_work
, q_vector
, dim
);
5595 ice_write_itr(rc
, itr
);
5596 ice_write_intrl(q_vector
, intrl
);
5598 dim
->state
= DIM_START_MEASURE
;
5602 * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI
5603 * @vsi: the VSI being configured
5605 static void ice_napi_enable_all(struct ice_vsi
*vsi
)
5612 ice_for_each_q_vector(vsi
, q_idx
) {
5613 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[q_idx
];
5615 INIT_WORK(&q_vector
->tx
.dim
.work
, ice_tx_dim_work
);
5616 q_vector
->tx
.dim
.mode
= DIM_CQ_PERIOD_MODE_START_FROM_EQE
;
5618 INIT_WORK(&q_vector
->rx
.dim
.work
, ice_rx_dim_work
);
5619 q_vector
->rx
.dim
.mode
= DIM_CQ_PERIOD_MODE_START_FROM_EQE
;
5621 if (q_vector
->rx
.ring
|| q_vector
->tx
.ring
)
5622 napi_enable(&q_vector
->napi
);
5627 * ice_up_complete - Finish the last steps of bringing up a connection
5628 * @vsi: The VSI being configured
5630 * Return 0 on success and negative value on error
5632 static int ice_up_complete(struct ice_vsi
*vsi
)
5634 struct ice_pf
*pf
= vsi
->back
;
5637 ice_vsi_cfg_msix(vsi
);
5639 /* Enable only Rx rings, Tx rings were enabled by the FW when the
5640 * Tx queue group list was configured and the context bits were
5641 * programmed using ice_vsi_cfg_txqs
5643 err
= ice_vsi_start_all_rx_rings(vsi
);
5647 clear_bit(ICE_VSI_DOWN
, vsi
->state
);
5648 ice_napi_enable_all(vsi
);
5649 ice_vsi_ena_irq(vsi
);
5651 if (vsi
->port_info
&&
5652 (vsi
->port_info
->phy
.link_info
.link_info
& ICE_AQ_LINK_UP
) &&
5654 ice_print_link_msg(vsi
, true);
5655 netif_tx_start_all_queues(vsi
->netdev
);
5656 netif_carrier_on(vsi
->netdev
);
5659 /* Perform an initial read of the statistics registers now to
5660 * set the baseline so counters are ready when interface is up
5662 ice_update_eth_stats(vsi
);
5663 ice_service_task_schedule(pf
);
5669 * ice_up - Bring the connection back up after being down
5670 * @vsi: VSI being configured
5672 int ice_up(struct ice_vsi
*vsi
)
5676 err
= ice_vsi_cfg(vsi
);
5678 err
= ice_up_complete(vsi
);
5684 * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring
5685 * @ring: Tx or Rx ring to read stats from
5686 * @pkts: packets stats counter
5687 * @bytes: bytes stats counter
5689 * This function fetches stats from the ring considering the atomic operations
5690 * that needs to be performed to read u64 values in 32 bit machine.
5693 ice_fetch_u64_stats_per_ring(struct ice_ring
*ring
, u64
*pkts
, u64
*bytes
)
5702 start
= u64_stats_fetch_begin_irq(&ring
->syncp
);
5703 *pkts
= ring
->stats
.pkts
;
5704 *bytes
= ring
->stats
.bytes
;
5705 } while (u64_stats_fetch_retry_irq(&ring
->syncp
, start
));
5709 * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
5710 * @vsi: the VSI to be updated
5711 * @rings: rings to work on
5712 * @count: number of rings
5715 ice_update_vsi_tx_ring_stats(struct ice_vsi
*vsi
, struct ice_ring
**rings
,
5718 struct rtnl_link_stats64
*vsi_stats
= &vsi
->net_stats
;
5721 for (i
= 0; i
< count
; i
++) {
5722 struct ice_ring
*ring
;
5725 ring
= READ_ONCE(rings
[i
]);
5726 ice_fetch_u64_stats_per_ring(ring
, &pkts
, &bytes
);
5727 vsi_stats
->tx_packets
+= pkts
;
5728 vsi_stats
->tx_bytes
+= bytes
;
5729 vsi
->tx_restart
+= ring
->tx_stats
.restart_q
;
5730 vsi
->tx_busy
+= ring
->tx_stats
.tx_busy
;
5731 vsi
->tx_linearize
+= ring
->tx_stats
.tx_linearize
;
5736 * ice_update_vsi_ring_stats - Update VSI stats counters
5737 * @vsi: the VSI to be updated
5739 static void ice_update_vsi_ring_stats(struct ice_vsi
*vsi
)
5741 struct rtnl_link_stats64
*vsi_stats
= &vsi
->net_stats
;
5745 /* reset netdev stats */
5746 vsi_stats
->tx_packets
= 0;
5747 vsi_stats
->tx_bytes
= 0;
5748 vsi_stats
->rx_packets
= 0;
5749 vsi_stats
->rx_bytes
= 0;
5751 /* reset non-netdev (extended) stats */
5752 vsi
->tx_restart
= 0;
5754 vsi
->tx_linearize
= 0;
5755 vsi
->rx_buf_failed
= 0;
5756 vsi
->rx_page_failed
= 0;
5760 /* update Tx rings counters */
5761 ice_update_vsi_tx_ring_stats(vsi
, vsi
->tx_rings
, vsi
->num_txq
);
5763 /* update Rx rings counters */
5764 ice_for_each_rxq(vsi
, i
) {
5765 struct ice_ring
*ring
= READ_ONCE(vsi
->rx_rings
[i
]);
5767 ice_fetch_u64_stats_per_ring(ring
, &pkts
, &bytes
);
5768 vsi_stats
->rx_packets
+= pkts
;
5769 vsi_stats
->rx_bytes
+= bytes
;
5770 vsi
->rx_buf_failed
+= ring
->rx_stats
.alloc_buf_failed
;
5771 vsi
->rx_page_failed
+= ring
->rx_stats
.alloc_page_failed
;
5774 /* update XDP Tx rings counters */
5775 if (ice_is_xdp_ena_vsi(vsi
))
5776 ice_update_vsi_tx_ring_stats(vsi
, vsi
->xdp_rings
,
5783 * ice_update_vsi_stats - Update VSI stats counters
5784 * @vsi: the VSI to be updated
5786 void ice_update_vsi_stats(struct ice_vsi
*vsi
)
5788 struct rtnl_link_stats64
*cur_ns
= &vsi
->net_stats
;
5789 struct ice_eth_stats
*cur_es
= &vsi
->eth_stats
;
5790 struct ice_pf
*pf
= vsi
->back
;
5792 if (test_bit(ICE_VSI_DOWN
, vsi
->state
) ||
5793 test_bit(ICE_CFG_BUSY
, pf
->state
))
5796 /* get stats as recorded by Tx/Rx rings */
5797 ice_update_vsi_ring_stats(vsi
);
5799 /* get VSI stats as recorded by the hardware */
5800 ice_update_eth_stats(vsi
);
5802 cur_ns
->tx_errors
= cur_es
->tx_errors
;
5803 cur_ns
->rx_dropped
= cur_es
->rx_discards
;
5804 cur_ns
->tx_dropped
= cur_es
->tx_discards
;
5805 cur_ns
->multicast
= cur_es
->rx_multicast
;
5807 /* update some more netdev stats if this is main VSI */
5808 if (vsi
->type
== ICE_VSI_PF
) {
5809 cur_ns
->rx_crc_errors
= pf
->stats
.crc_errors
;
5810 cur_ns
->rx_errors
= pf
->stats
.crc_errors
+
5811 pf
->stats
.illegal_bytes
+
5812 pf
->stats
.rx_len_errors
+
5813 pf
->stats
.rx_undersize
+
5814 pf
->hw_csum_rx_error
+
5815 pf
->stats
.rx_jabber
+
5816 pf
->stats
.rx_fragments
+
5817 pf
->stats
.rx_oversize
;
5818 cur_ns
->rx_length_errors
= pf
->stats
.rx_len_errors
;
5819 /* record drops from the port level */
5820 cur_ns
->rx_missed_errors
= pf
->stats
.eth
.rx_discards
;
5825 * ice_update_pf_stats - Update PF port stats counters
5826 * @pf: PF whose stats needs to be updated
5828 void ice_update_pf_stats(struct ice_pf
*pf
)
5830 struct ice_hw_port_stats
*prev_ps
, *cur_ps
;
5831 struct ice_hw
*hw
= &pf
->hw
;
5835 port
= hw
->port_info
->lport
;
5836 prev_ps
= &pf
->stats_prev
;
5837 cur_ps
= &pf
->stats
;
5839 ice_stat_update40(hw
, GLPRT_GORCL(port
), pf
->stat_prev_loaded
,
5840 &prev_ps
->eth
.rx_bytes
,
5841 &cur_ps
->eth
.rx_bytes
);
5843 ice_stat_update40(hw
, GLPRT_UPRCL(port
), pf
->stat_prev_loaded
,
5844 &prev_ps
->eth
.rx_unicast
,
5845 &cur_ps
->eth
.rx_unicast
);
5847 ice_stat_update40(hw
, GLPRT_MPRCL(port
), pf
->stat_prev_loaded
,
5848 &prev_ps
->eth
.rx_multicast
,
5849 &cur_ps
->eth
.rx_multicast
);
5851 ice_stat_update40(hw
, GLPRT_BPRCL(port
), pf
->stat_prev_loaded
,
5852 &prev_ps
->eth
.rx_broadcast
,
5853 &cur_ps
->eth
.rx_broadcast
);
5855 ice_stat_update32(hw
, PRTRPB_RDPC
, pf
->stat_prev_loaded
,
5856 &prev_ps
->eth
.rx_discards
,
5857 &cur_ps
->eth
.rx_discards
);
5859 ice_stat_update40(hw
, GLPRT_GOTCL(port
), pf
->stat_prev_loaded
,
5860 &prev_ps
->eth
.tx_bytes
,
5861 &cur_ps
->eth
.tx_bytes
);
5863 ice_stat_update40(hw
, GLPRT_UPTCL(port
), pf
->stat_prev_loaded
,
5864 &prev_ps
->eth
.tx_unicast
,
5865 &cur_ps
->eth
.tx_unicast
);
5867 ice_stat_update40(hw
, GLPRT_MPTCL(port
), pf
->stat_prev_loaded
,
5868 &prev_ps
->eth
.tx_multicast
,
5869 &cur_ps
->eth
.tx_multicast
);
5871 ice_stat_update40(hw
, GLPRT_BPTCL(port
), pf
->stat_prev_loaded
,
5872 &prev_ps
->eth
.tx_broadcast
,
5873 &cur_ps
->eth
.tx_broadcast
);
5875 ice_stat_update32(hw
, GLPRT_TDOLD(port
), pf
->stat_prev_loaded
,
5876 &prev_ps
->tx_dropped_link_down
,
5877 &cur_ps
->tx_dropped_link_down
);
5879 ice_stat_update40(hw
, GLPRT_PRC64L(port
), pf
->stat_prev_loaded
,
5880 &prev_ps
->rx_size_64
, &cur_ps
->rx_size_64
);
5882 ice_stat_update40(hw
, GLPRT_PRC127L(port
), pf
->stat_prev_loaded
,
5883 &prev_ps
->rx_size_127
, &cur_ps
->rx_size_127
);
5885 ice_stat_update40(hw
, GLPRT_PRC255L(port
), pf
->stat_prev_loaded
,
5886 &prev_ps
->rx_size_255
, &cur_ps
->rx_size_255
);
5888 ice_stat_update40(hw
, GLPRT_PRC511L(port
), pf
->stat_prev_loaded
,
5889 &prev_ps
->rx_size_511
, &cur_ps
->rx_size_511
);
5891 ice_stat_update40(hw
, GLPRT_PRC1023L(port
), pf
->stat_prev_loaded
,
5892 &prev_ps
->rx_size_1023
, &cur_ps
->rx_size_1023
);
5894 ice_stat_update40(hw
, GLPRT_PRC1522L(port
), pf
->stat_prev_loaded
,
5895 &prev_ps
->rx_size_1522
, &cur_ps
->rx_size_1522
);
5897 ice_stat_update40(hw
, GLPRT_PRC9522L(port
), pf
->stat_prev_loaded
,
5898 &prev_ps
->rx_size_big
, &cur_ps
->rx_size_big
);
5900 ice_stat_update40(hw
, GLPRT_PTC64L(port
), pf
->stat_prev_loaded
,
5901 &prev_ps
->tx_size_64
, &cur_ps
->tx_size_64
);
5903 ice_stat_update40(hw
, GLPRT_PTC127L(port
), pf
->stat_prev_loaded
,
5904 &prev_ps
->tx_size_127
, &cur_ps
->tx_size_127
);
5906 ice_stat_update40(hw
, GLPRT_PTC255L(port
), pf
->stat_prev_loaded
,
5907 &prev_ps
->tx_size_255
, &cur_ps
->tx_size_255
);
5909 ice_stat_update40(hw
, GLPRT_PTC511L(port
), pf
->stat_prev_loaded
,
5910 &prev_ps
->tx_size_511
, &cur_ps
->tx_size_511
);
5912 ice_stat_update40(hw
, GLPRT_PTC1023L(port
), pf
->stat_prev_loaded
,
5913 &prev_ps
->tx_size_1023
, &cur_ps
->tx_size_1023
);
5915 ice_stat_update40(hw
, GLPRT_PTC1522L(port
), pf
->stat_prev_loaded
,
5916 &prev_ps
->tx_size_1522
, &cur_ps
->tx_size_1522
);
5918 ice_stat_update40(hw
, GLPRT_PTC9522L(port
), pf
->stat_prev_loaded
,
5919 &prev_ps
->tx_size_big
, &cur_ps
->tx_size_big
);
5921 fd_ctr_base
= hw
->fd_ctr_base
;
5923 ice_stat_update40(hw
,
5924 GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base
)),
5925 pf
->stat_prev_loaded
, &prev_ps
->fd_sb_match
,
5926 &cur_ps
->fd_sb_match
);
5927 ice_stat_update32(hw
, GLPRT_LXONRXC(port
), pf
->stat_prev_loaded
,
5928 &prev_ps
->link_xon_rx
, &cur_ps
->link_xon_rx
);
5930 ice_stat_update32(hw
, GLPRT_LXOFFRXC(port
), pf
->stat_prev_loaded
,
5931 &prev_ps
->link_xoff_rx
, &cur_ps
->link_xoff_rx
);
5933 ice_stat_update32(hw
, GLPRT_LXONTXC(port
), pf
->stat_prev_loaded
,
5934 &prev_ps
->link_xon_tx
, &cur_ps
->link_xon_tx
);
5936 ice_stat_update32(hw
, GLPRT_LXOFFTXC(port
), pf
->stat_prev_loaded
,
5937 &prev_ps
->link_xoff_tx
, &cur_ps
->link_xoff_tx
);
5939 ice_update_dcb_stats(pf
);
5941 ice_stat_update32(hw
, GLPRT_CRCERRS(port
), pf
->stat_prev_loaded
,
5942 &prev_ps
->crc_errors
, &cur_ps
->crc_errors
);
5944 ice_stat_update32(hw
, GLPRT_ILLERRC(port
), pf
->stat_prev_loaded
,
5945 &prev_ps
->illegal_bytes
, &cur_ps
->illegal_bytes
);
5947 ice_stat_update32(hw
, GLPRT_MLFC(port
), pf
->stat_prev_loaded
,
5948 &prev_ps
->mac_local_faults
,
5949 &cur_ps
->mac_local_faults
);
5951 ice_stat_update32(hw
, GLPRT_MRFC(port
), pf
->stat_prev_loaded
,
5952 &prev_ps
->mac_remote_faults
,
5953 &cur_ps
->mac_remote_faults
);
5955 ice_stat_update32(hw
, GLPRT_RLEC(port
), pf
->stat_prev_loaded
,
5956 &prev_ps
->rx_len_errors
, &cur_ps
->rx_len_errors
);
5958 ice_stat_update32(hw
, GLPRT_RUC(port
), pf
->stat_prev_loaded
,
5959 &prev_ps
->rx_undersize
, &cur_ps
->rx_undersize
);
5961 ice_stat_update32(hw
, GLPRT_RFC(port
), pf
->stat_prev_loaded
,
5962 &prev_ps
->rx_fragments
, &cur_ps
->rx_fragments
);
5964 ice_stat_update32(hw
, GLPRT_ROC(port
), pf
->stat_prev_loaded
,
5965 &prev_ps
->rx_oversize
, &cur_ps
->rx_oversize
);
5967 ice_stat_update32(hw
, GLPRT_RJC(port
), pf
->stat_prev_loaded
,
5968 &prev_ps
->rx_jabber
, &cur_ps
->rx_jabber
);
5970 cur_ps
->fd_sb_status
= test_bit(ICE_FLAG_FD_ENA
, pf
->flags
) ? 1 : 0;
5972 pf
->stat_prev_loaded
= true;
5976 * ice_get_stats64 - get statistics for network device structure
5977 * @netdev: network interface device structure
5978 * @stats: main device statistics structure
5981 void ice_get_stats64(struct net_device
*netdev
, struct rtnl_link_stats64
*stats
)
5983 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
5984 struct rtnl_link_stats64
*vsi_stats
;
5985 struct ice_vsi
*vsi
= np
->vsi
;
5987 vsi_stats
= &vsi
->net_stats
;
5989 if (!vsi
->num_txq
|| !vsi
->num_rxq
)
5992 /* netdev packet/byte stats come from ring counter. These are obtained
5993 * by summing up ring counters (done by ice_update_vsi_ring_stats).
5994 * But, only call the update routine and read the registers if VSI is
5997 if (!test_bit(ICE_VSI_DOWN
, vsi
->state
))
5998 ice_update_vsi_ring_stats(vsi
);
5999 stats
->tx_packets
= vsi_stats
->tx_packets
;
6000 stats
->tx_bytes
= vsi_stats
->tx_bytes
;
6001 stats
->rx_packets
= vsi_stats
->rx_packets
;
6002 stats
->rx_bytes
= vsi_stats
->rx_bytes
;
6004 /* The rest of the stats can be read from the hardware but instead we
6005 * just return values that the watchdog task has already obtained from
6008 stats
->multicast
= vsi_stats
->multicast
;
6009 stats
->tx_errors
= vsi_stats
->tx_errors
;
6010 stats
->tx_dropped
= vsi_stats
->tx_dropped
;
6011 stats
->rx_errors
= vsi_stats
->rx_errors
;
6012 stats
->rx_dropped
= vsi_stats
->rx_dropped
;
6013 stats
->rx_crc_errors
= vsi_stats
->rx_crc_errors
;
6014 stats
->rx_length_errors
= vsi_stats
->rx_length_errors
;
6018 * ice_napi_disable_all - Disable NAPI for all q_vectors in the VSI
6019 * @vsi: VSI having NAPI disabled
6021 static void ice_napi_disable_all(struct ice_vsi
*vsi
)
6028 ice_for_each_q_vector(vsi
, q_idx
) {
6029 struct ice_q_vector
*q_vector
= vsi
->q_vectors
[q_idx
];
6031 if (q_vector
->rx
.ring
|| q_vector
->tx
.ring
)
6032 napi_disable(&q_vector
->napi
);
6034 cancel_work_sync(&q_vector
->tx
.dim
.work
);
6035 cancel_work_sync(&q_vector
->rx
.dim
.work
);
6040 * ice_down - Shutdown the connection
6041 * @vsi: The VSI being stopped
6043 int ice_down(struct ice_vsi
*vsi
)
6045 int i
, tx_err
, rx_err
, link_err
= 0;
6047 /* Caller of this function is expected to set the
6048 * vsi->state ICE_DOWN bit
6051 netif_carrier_off(vsi
->netdev
);
6052 netif_tx_disable(vsi
->netdev
);
6055 ice_vsi_dis_irq(vsi
);
6057 tx_err
= ice_vsi_stop_lan_tx_rings(vsi
, ICE_NO_RESET
, 0);
6059 netdev_err(vsi
->netdev
, "Failed stop Tx rings, VSI %d error %d\n",
6060 vsi
->vsi_num
, tx_err
);
6061 if (!tx_err
&& ice_is_xdp_ena_vsi(vsi
)) {
6062 tx_err
= ice_vsi_stop_xdp_tx_rings(vsi
);
6064 netdev_err(vsi
->netdev
, "Failed stop XDP rings, VSI %d error %d\n",
6065 vsi
->vsi_num
, tx_err
);
6068 rx_err
= ice_vsi_stop_all_rx_rings(vsi
);
6070 netdev_err(vsi
->netdev
, "Failed stop Rx rings, VSI %d error %d\n",
6071 vsi
->vsi_num
, rx_err
);
6073 ice_napi_disable_all(vsi
);
6075 if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA
, vsi
->back
->flags
)) {
6076 link_err
= ice_force_phys_link_state(vsi
, false);
6078 netdev_err(vsi
->netdev
, "Failed to set physical link down, VSI %d error %d\n",
6079 vsi
->vsi_num
, link_err
);
6082 ice_for_each_txq(vsi
, i
)
6083 ice_clean_tx_ring(vsi
->tx_rings
[i
]);
6085 ice_for_each_rxq(vsi
, i
)
6086 ice_clean_rx_ring(vsi
->rx_rings
[i
]);
6088 if (tx_err
|| rx_err
|| link_err
) {
6089 netdev_err(vsi
->netdev
, "Failed to close VSI 0x%04X on switch 0x%04X\n",
6090 vsi
->vsi_num
, vsi
->vsw
->sw_id
);
6098 * ice_vsi_setup_tx_rings - Allocate VSI Tx queue resources
6099 * @vsi: VSI having resources allocated
6101 * Return 0 on success, negative on failure
6103 int ice_vsi_setup_tx_rings(struct ice_vsi
*vsi
)
6107 if (!vsi
->num_txq
) {
6108 dev_err(ice_pf_to_dev(vsi
->back
), "VSI %d has 0 Tx queues\n",
6113 ice_for_each_txq(vsi
, i
) {
6114 struct ice_ring
*ring
= vsi
->tx_rings
[i
];
6119 ring
->netdev
= vsi
->netdev
;
6120 err
= ice_setup_tx_ring(ring
);
6129 * ice_vsi_setup_rx_rings - Allocate VSI Rx queue resources
6130 * @vsi: VSI having resources allocated
6132 * Return 0 on success, negative on failure
6134 int ice_vsi_setup_rx_rings(struct ice_vsi
*vsi
)
6138 if (!vsi
->num_rxq
) {
6139 dev_err(ice_pf_to_dev(vsi
->back
), "VSI %d has 0 Rx queues\n",
6144 ice_for_each_rxq(vsi
, i
) {
6145 struct ice_ring
*ring
= vsi
->rx_rings
[i
];
6150 ring
->netdev
= vsi
->netdev
;
6151 err
= ice_setup_rx_ring(ring
);
6160 * ice_vsi_open_ctrl - open control VSI for use
6161 * @vsi: the VSI to open
6163 * Initialization of the Control VSI
6165 * Returns 0 on success, negative value on error
6167 int ice_vsi_open_ctrl(struct ice_vsi
*vsi
)
6169 char int_name
[ICE_INT_NAME_STR_LEN
];
6170 struct ice_pf
*pf
= vsi
->back
;
6174 dev
= ice_pf_to_dev(pf
);
6175 /* allocate descriptors */
6176 err
= ice_vsi_setup_tx_rings(vsi
);
6180 err
= ice_vsi_setup_rx_rings(vsi
);
6184 err
= ice_vsi_cfg(vsi
);
6188 snprintf(int_name
, sizeof(int_name
) - 1, "%s-%s:ctrl",
6189 dev_driver_string(dev
), dev_name(dev
));
6190 err
= ice_vsi_req_irq_msix(vsi
, int_name
);
6194 ice_vsi_cfg_msix(vsi
);
6196 err
= ice_vsi_start_all_rx_rings(vsi
);
6198 goto err_up_complete
;
6200 clear_bit(ICE_VSI_DOWN
, vsi
->state
);
6201 ice_vsi_ena_irq(vsi
);
6208 ice_vsi_free_rx_rings(vsi
);
6210 ice_vsi_free_tx_rings(vsi
);
6216 * ice_vsi_open - Called when a network interface is made active
6217 * @vsi: the VSI to open
6219 * Initialization of the VSI
6221 * Returns 0 on success, negative value on error
6223 static int ice_vsi_open(struct ice_vsi
*vsi
)
6225 char int_name
[ICE_INT_NAME_STR_LEN
];
6226 struct ice_pf
*pf
= vsi
->back
;
6229 /* allocate descriptors */
6230 err
= ice_vsi_setup_tx_rings(vsi
);
6234 err
= ice_vsi_setup_rx_rings(vsi
);
6238 err
= ice_vsi_cfg(vsi
);
6242 snprintf(int_name
, sizeof(int_name
) - 1, "%s-%s",
6243 dev_driver_string(ice_pf_to_dev(pf
)), vsi
->netdev
->name
);
6244 err
= ice_vsi_req_irq_msix(vsi
, int_name
);
6248 /* Notify the stack of the actual queue counts. */
6249 err
= netif_set_real_num_tx_queues(vsi
->netdev
, vsi
->num_txq
);
6253 err
= netif_set_real_num_rx_queues(vsi
->netdev
, vsi
->num_rxq
);
6257 err
= ice_up_complete(vsi
);
6259 goto err_up_complete
;
6266 ice_vsi_free_irq(vsi
);
6268 ice_vsi_free_rx_rings(vsi
);
6270 ice_vsi_free_tx_rings(vsi
);
6276 * ice_vsi_release_all - Delete all VSIs
6277 * @pf: PF from which all VSIs are being removed
6279 static void ice_vsi_release_all(struct ice_pf
*pf
)
6286 ice_for_each_vsi(pf
, i
) {
6290 err
= ice_vsi_release(pf
->vsi
[i
]);
6292 dev_dbg(ice_pf_to_dev(pf
), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n",
6293 i
, err
, pf
->vsi
[i
]->vsi_num
);
6298 * ice_vsi_rebuild_by_type - Rebuild VSI of a given type
6299 * @pf: pointer to the PF instance
6300 * @type: VSI type to rebuild
6302 * Iterates through the pf->vsi array and rebuilds VSIs of the requested type
6304 static int ice_vsi_rebuild_by_type(struct ice_pf
*pf
, enum ice_vsi_type type
)
6306 struct device
*dev
= ice_pf_to_dev(pf
);
6307 enum ice_status status
;
6310 ice_for_each_vsi(pf
, i
) {
6311 struct ice_vsi
*vsi
= pf
->vsi
[i
];
6313 if (!vsi
|| vsi
->type
!= type
)
6316 /* rebuild the VSI */
6317 err
= ice_vsi_rebuild(vsi
, true);
6319 dev_err(dev
, "rebuild VSI failed, err %d, VSI index %d, type %s\n",
6320 err
, vsi
->idx
, ice_vsi_type_str(type
));
6324 /* replay filters for the VSI */
6325 status
= ice_replay_vsi(&pf
->hw
, vsi
->idx
);
6327 dev_err(dev
, "replay VSI failed, status %s, VSI index %d, type %s\n",
6328 ice_stat_str(status
), vsi
->idx
,
6329 ice_vsi_type_str(type
));
6333 /* Re-map HW VSI number, using VSI handle that has been
6334 * previously validated in ice_replay_vsi() call above
6336 vsi
->vsi_num
= ice_get_hw_vsi_num(&pf
->hw
, vsi
->idx
);
6338 /* enable the VSI */
6339 err
= ice_ena_vsi(vsi
, false);
6341 dev_err(dev
, "enable VSI failed, err %d, VSI index %d, type %s\n",
6342 err
, vsi
->idx
, ice_vsi_type_str(type
));
6346 dev_info(dev
, "VSI rebuilt. VSI index %d, type %s\n", vsi
->idx
,
6347 ice_vsi_type_str(type
));
6354 * ice_update_pf_netdev_link - Update PF netdev link status
6355 * @pf: pointer to the PF instance
6357 static void ice_update_pf_netdev_link(struct ice_pf
*pf
)
6362 ice_for_each_vsi(pf
, i
) {
6363 struct ice_vsi
*vsi
= pf
->vsi
[i
];
6365 if (!vsi
|| vsi
->type
!= ICE_VSI_PF
)
6368 ice_get_link_status(pf
->vsi
[i
]->port_info
, &link_up
);
6370 netif_carrier_on(pf
->vsi
[i
]->netdev
);
6371 netif_tx_wake_all_queues(pf
->vsi
[i
]->netdev
);
6373 netif_carrier_off(pf
->vsi
[i
]->netdev
);
6374 netif_tx_stop_all_queues(pf
->vsi
[i
]->netdev
);
6380 * ice_rebuild - rebuild after reset
6381 * @pf: PF to rebuild
6382 * @reset_type: type of reset
6384 * Do not rebuild VF VSI in this flow because that is already handled via
6385 * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
6386 * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
6387 * to reset/rebuild all the VF VSI twice.
6389 static void ice_rebuild(struct ice_pf
*pf
, enum ice_reset_req reset_type
)
6391 struct device
*dev
= ice_pf_to_dev(pf
);
6392 struct ice_hw
*hw
= &pf
->hw
;
6393 enum ice_status ret
;
6396 if (test_bit(ICE_DOWN
, pf
->state
))
6397 goto clear_recovery
;
6399 dev_dbg(dev
, "rebuilding PF after reset_type=%d\n", reset_type
);
6401 ret
= ice_init_all_ctrlq(hw
);
6403 dev_err(dev
, "control queues init failed %s\n",
6405 goto err_init_ctrlq
;
6408 /* if DDP was previously loaded successfully */
6409 if (!ice_is_safe_mode(pf
)) {
6410 /* reload the SW DB of filter tables */
6411 if (reset_type
== ICE_RESET_PFR
)
6412 ice_fill_blk_tbls(hw
);
6414 /* Reload DDP Package after CORER/GLOBR reset */
6415 ice_load_pkg(NULL
, pf
);
6418 ret
= ice_clear_pf_cfg(hw
);
6420 dev_err(dev
, "clear PF configuration failed %s\n",
6422 goto err_init_ctrlq
;
6425 if (pf
->first_sw
->dflt_vsi_ena
)
6426 dev_info(dev
, "Clearing default VSI, re-enable after reset completes\n");
6427 /* clear the default VSI configuration if it exists */
6428 pf
->first_sw
->dflt_vsi
= NULL
;
6429 pf
->first_sw
->dflt_vsi_ena
= false;
6431 ice_clear_pxe_mode(hw
);
6433 ret
= ice_init_nvm(hw
);
6435 dev_err(dev
, "ice_init_nvm failed %s\n", ice_stat_str(ret
));
6436 goto err_init_ctrlq
;
6439 ret
= ice_get_caps(hw
);
6441 dev_err(dev
, "ice_get_caps failed %s\n", ice_stat_str(ret
));
6442 goto err_init_ctrlq
;
6445 ret
= ice_aq_set_mac_cfg(hw
, ICE_AQ_SET_MAC_FRAME_SIZE_MAX
, NULL
);
6447 dev_err(dev
, "set_mac_cfg failed %s\n", ice_stat_str(ret
));
6448 goto err_init_ctrlq
;
6451 err
= ice_sched_init_port(hw
->port_info
);
6453 goto err_sched_init_port
;
6455 /* start misc vector */
6456 err
= ice_req_irq_msix_misc(pf
);
6458 dev_err(dev
, "misc vector setup failed: %d\n", err
);
6459 goto err_sched_init_port
;
6462 if (test_bit(ICE_FLAG_FD_ENA
, pf
->flags
)) {
6463 wr32(hw
, PFQF_FD_ENA
, PFQF_FD_ENA_FD_ENA_M
);
6464 if (!rd32(hw
, PFQF_FD_SIZE
)) {
6465 u16 unused
, guar
, b_effort
;
6467 guar
= hw
->func_caps
.fd_fltr_guar
;
6468 b_effort
= hw
->func_caps
.fd_fltr_best_effort
;
6470 /* force guaranteed filter pool for PF */
6471 ice_alloc_fd_guar_item(hw
, &unused
, guar
);
6472 /* force shared filter pool for PF */
6473 ice_alloc_fd_shrd_item(hw
, &unused
, b_effort
);
6477 if (test_bit(ICE_FLAG_DCB_ENA
, pf
->flags
))
6478 ice_dcb_rebuild(pf
);
6480 /* If the PF previously had enabled PTP, PTP init needs to happen before
6481 * the VSI rebuild. If not, this causes the PTP link status events to
6484 if (test_bit(ICE_FLAG_PTP_SUPPORTED
, pf
->flags
))
6487 /* rebuild PF VSI */
6488 err
= ice_vsi_rebuild_by_type(pf
, ICE_VSI_PF
);
6490 dev_err(dev
, "PF VSI rebuild failed: %d\n", err
);
6491 goto err_vsi_rebuild
;
6494 /* If Flow Director is active */
6495 if (test_bit(ICE_FLAG_FD_ENA
, pf
->flags
)) {
6496 err
= ice_vsi_rebuild_by_type(pf
, ICE_VSI_CTRL
);
6498 dev_err(dev
, "control VSI rebuild failed: %d\n", err
);
6499 goto err_vsi_rebuild
;
6502 /* replay HW Flow Director recipes */
6504 ice_fdir_replay_flows(hw
);
6506 /* replay Flow Director filters */
6507 ice_fdir_replay_fltrs(pf
);
6509 ice_rebuild_arfs(pf
);
6512 ice_update_pf_netdev_link(pf
);
6514 /* tell the firmware we are up */
6515 ret
= ice_send_version(pf
);
6517 dev_err(dev
, "Rebuild failed due to error sending driver version: %s\n",
6519 goto err_vsi_rebuild
;
6522 ice_replay_post(hw
);
6524 /* if we get here, reset flow is successful */
6525 clear_bit(ICE_RESET_FAILED
, pf
->state
);
6527 ice_plug_aux_dev(pf
);
6531 err_sched_init_port
:
6532 ice_sched_cleanup_all(hw
);
6534 ice_shutdown_all_ctrlq(hw
);
6535 set_bit(ICE_RESET_FAILED
, pf
->state
);
6537 /* set this bit in PF state to control service task scheduling */
6538 set_bit(ICE_NEEDS_RESTART
, pf
->state
);
6539 dev_err(dev
, "Rebuild failed, unload and reload driver\n");
6543 * ice_max_xdp_frame_size - returns the maximum allowed frame size for XDP
6544 * @vsi: Pointer to VSI structure
6546 static int ice_max_xdp_frame_size(struct ice_vsi
*vsi
)
6548 if (PAGE_SIZE
>= 8192 || test_bit(ICE_FLAG_LEGACY_RX
, vsi
->back
->flags
))
6549 return ICE_RXBUF_2048
- XDP_PACKET_HEADROOM
;
6551 return ICE_RXBUF_3072
;
6555 * ice_change_mtu - NDO callback to change the MTU
6556 * @netdev: network interface device structure
6557 * @new_mtu: new value for maximum frame size
6559 * Returns 0 on success, negative on failure
6561 static int ice_change_mtu(struct net_device
*netdev
, int new_mtu
)
6563 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6564 struct ice_vsi
*vsi
= np
->vsi
;
6565 struct ice_pf
*pf
= vsi
->back
;
6569 if (new_mtu
== (int)netdev
->mtu
) {
6570 netdev_warn(netdev
, "MTU is already %u\n", netdev
->mtu
);
6574 if (ice_is_xdp_ena_vsi(vsi
)) {
6575 int frame_size
= ice_max_xdp_frame_size(vsi
);
6577 if (new_mtu
+ ICE_ETH_PKT_HDR_PAD
> frame_size
) {
6578 netdev_err(netdev
, "max MTU for XDP usage is %d\n",
6579 frame_size
- ICE_ETH_PKT_HDR_PAD
);
6584 /* if a reset is in progress, wait for some time for it to complete */
6586 if (ice_is_reset_in_progress(pf
->state
)) {
6588 usleep_range(1000, 2000);
6593 } while (count
< 100);
6596 netdev_err(netdev
, "can't change MTU. Device is busy\n");
6600 netdev
->mtu
= (unsigned int)new_mtu
;
6602 /* if VSI is up, bring it down and then back up */
6603 if (!test_and_set_bit(ICE_VSI_DOWN
, vsi
->state
)) {
6604 err
= ice_down(vsi
);
6606 netdev_err(netdev
, "change MTU if_down err %d\n", err
);
6612 netdev_err(netdev
, "change MTU if_up err %d\n", err
);
6617 netdev_dbg(netdev
, "changed MTU to %d\n", new_mtu
);
6618 set_bit(ICE_FLAG_MTU_CHANGED
, pf
->flags
);
6624 * ice_eth_ioctl - Access the hwtstamp interface
6625 * @netdev: network interface device structure
6626 * @ifr: interface request data
6627 * @cmd: ioctl command
6629 static int ice_eth_ioctl(struct net_device
*netdev
, struct ifreq
*ifr
, int cmd
)
6631 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
6632 struct ice_pf
*pf
= np
->vsi
->back
;
6636 return ice_ptp_get_ts_config(pf
, ifr
);
6638 return ice_ptp_set_ts_config(pf
, ifr
);
6645 * ice_aq_str - convert AQ err code to a string
6646 * @aq_err: the AQ error code to convert
6648 const char *ice_aq_str(enum ice_aq_err aq_err
)
6653 case ICE_AQ_RC_EPERM
:
6654 return "ICE_AQ_RC_EPERM";
6655 case ICE_AQ_RC_ENOENT
:
6656 return "ICE_AQ_RC_ENOENT";
6657 case ICE_AQ_RC_ENOMEM
:
6658 return "ICE_AQ_RC_ENOMEM";
6659 case ICE_AQ_RC_EBUSY
:
6660 return "ICE_AQ_RC_EBUSY";
6661 case ICE_AQ_RC_EEXIST
:
6662 return "ICE_AQ_RC_EEXIST";
6663 case ICE_AQ_RC_EINVAL
:
6664 return "ICE_AQ_RC_EINVAL";
6665 case ICE_AQ_RC_ENOSPC
:
6666 return "ICE_AQ_RC_ENOSPC";
6667 case ICE_AQ_RC_ENOSYS
:
6668 return "ICE_AQ_RC_ENOSYS";
6669 case ICE_AQ_RC_EMODE
:
6670 return "ICE_AQ_RC_EMODE";
6671 case ICE_AQ_RC_ENOSEC
:
6672 return "ICE_AQ_RC_ENOSEC";
6673 case ICE_AQ_RC_EBADSIG
:
6674 return "ICE_AQ_RC_EBADSIG";
6675 case ICE_AQ_RC_ESVN
:
6676 return "ICE_AQ_RC_ESVN";
6677 case ICE_AQ_RC_EBADMAN
:
6678 return "ICE_AQ_RC_EBADMAN";
6679 case ICE_AQ_RC_EBADBUF
:
6680 return "ICE_AQ_RC_EBADBUF";
6683 return "ICE_AQ_RC_UNKNOWN";
6687 * ice_stat_str - convert status err code to a string
6688 * @stat_err: the status error code to convert
6690 const char *ice_stat_str(enum ice_status stat_err
)
6696 return "ICE_ERR_PARAM";
6697 case ICE_ERR_NOT_IMPL
:
6698 return "ICE_ERR_NOT_IMPL";
6699 case ICE_ERR_NOT_READY
:
6700 return "ICE_ERR_NOT_READY";
6701 case ICE_ERR_NOT_SUPPORTED
:
6702 return "ICE_ERR_NOT_SUPPORTED";
6703 case ICE_ERR_BAD_PTR
:
6704 return "ICE_ERR_BAD_PTR";
6705 case ICE_ERR_INVAL_SIZE
:
6706 return "ICE_ERR_INVAL_SIZE";
6707 case ICE_ERR_DEVICE_NOT_SUPPORTED
:
6708 return "ICE_ERR_DEVICE_NOT_SUPPORTED";
6709 case ICE_ERR_RESET_FAILED
:
6710 return "ICE_ERR_RESET_FAILED";
6711 case ICE_ERR_FW_API_VER
:
6712 return "ICE_ERR_FW_API_VER";
6713 case ICE_ERR_NO_MEMORY
:
6714 return "ICE_ERR_NO_MEMORY";
6716 return "ICE_ERR_CFG";
6717 case ICE_ERR_OUT_OF_RANGE
:
6718 return "ICE_ERR_OUT_OF_RANGE";
6719 case ICE_ERR_ALREADY_EXISTS
:
6720 return "ICE_ERR_ALREADY_EXISTS";
6722 return "ICE_ERR_NVM";
6723 case ICE_ERR_NVM_CHECKSUM
:
6724 return "ICE_ERR_NVM_CHECKSUM";
6725 case ICE_ERR_BUF_TOO_SHORT
:
6726 return "ICE_ERR_BUF_TOO_SHORT";
6727 case ICE_ERR_NVM_BLANK_MODE
:
6728 return "ICE_ERR_NVM_BLANK_MODE";
6729 case ICE_ERR_IN_USE
:
6730 return "ICE_ERR_IN_USE";
6731 case ICE_ERR_MAX_LIMIT
:
6732 return "ICE_ERR_MAX_LIMIT";
6733 case ICE_ERR_RESET_ONGOING
:
6734 return "ICE_ERR_RESET_ONGOING";
6735 case ICE_ERR_HW_TABLE
:
6736 return "ICE_ERR_HW_TABLE";
6737 case ICE_ERR_DOES_NOT_EXIST
:
6738 return "ICE_ERR_DOES_NOT_EXIST";
6739 case ICE_ERR_FW_DDP_MISMATCH
:
6740 return "ICE_ERR_FW_DDP_MISMATCH";
6741 case ICE_ERR_AQ_ERROR
:
6742 return "ICE_ERR_AQ_ERROR";
6743 case ICE_ERR_AQ_TIMEOUT
:
6744 return "ICE_ERR_AQ_TIMEOUT";
6745 case ICE_ERR_AQ_FULL
:
6746 return "ICE_ERR_AQ_FULL";
6747 case ICE_ERR_AQ_NO_WORK
:
6748 return "ICE_ERR_AQ_NO_WORK";
6749 case ICE_ERR_AQ_EMPTY
:
6750 return "ICE_ERR_AQ_EMPTY";
6751 case ICE_ERR_AQ_FW_CRITICAL
:
6752 return "ICE_ERR_AQ_FW_CRITICAL";
6755 return "ICE_ERR_UNKNOWN";
6759 * ice_set_rss_lut - Set RSS LUT
6760 * @vsi: Pointer to VSI structure
6761 * @lut: Lookup table
6762 * @lut_size: Lookup table size
6764 * Returns 0 on success, negative on failure
6766 int ice_set_rss_lut(struct ice_vsi
*vsi
, u8
*lut
, u16 lut_size
)
6768 struct ice_aq_get_set_rss_lut_params params
= {};
6769 struct ice_hw
*hw
= &vsi
->back
->hw
;
6770 enum ice_status status
;
6775 params
.vsi_handle
= vsi
->idx
;
6776 params
.lut_size
= lut_size
;
6777 params
.lut_type
= vsi
->rss_lut_type
;
6780 status
= ice_aq_set_rss_lut(hw
, ¶ms
);
6782 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot set RSS lut, err %s aq_err %s\n",
6783 ice_stat_str(status
),
6784 ice_aq_str(hw
->adminq
.sq_last_status
));
6792 * ice_set_rss_key - Set RSS key
6793 * @vsi: Pointer to the VSI structure
6794 * @seed: RSS hash seed
6796 * Returns 0 on success, negative on failure
6798 int ice_set_rss_key(struct ice_vsi
*vsi
, u8
*seed
)
6800 struct ice_hw
*hw
= &vsi
->back
->hw
;
6801 enum ice_status status
;
6806 status
= ice_aq_set_rss_key(hw
, vsi
->idx
, (struct ice_aqc_get_set_rss_keys
*)seed
);
6808 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot set RSS key, err %s aq_err %s\n",
6809 ice_stat_str(status
),
6810 ice_aq_str(hw
->adminq
.sq_last_status
));
6818 * ice_get_rss_lut - Get RSS LUT
6819 * @vsi: Pointer to VSI structure
6820 * @lut: Buffer to store the lookup table entries
6821 * @lut_size: Size of buffer to store the lookup table entries
6823 * Returns 0 on success, negative on failure
6825 int ice_get_rss_lut(struct ice_vsi
*vsi
, u8
*lut
, u16 lut_size
)
6827 struct ice_aq_get_set_rss_lut_params params
= {};
6828 struct ice_hw
*hw
= &vsi
->back
->hw
;
6829 enum ice_status status
;
6834 params
.vsi_handle
= vsi
->idx
;
6835 params
.lut_size
= lut_size
;
6836 params
.lut_type
= vsi
->rss_lut_type
;
6839 status
= ice_aq_get_rss_lut(hw
, ¶ms
);
6841 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot get RSS lut, err %s aq_err %s\n",
6842 ice_stat_str(status
),
6843 ice_aq_str(hw
->adminq
.sq_last_status
));
6851 * ice_get_rss_key - Get RSS key
6852 * @vsi: Pointer to VSI structure
6853 * @seed: Buffer to store the key in
6855 * Returns 0 on success, negative on failure
6857 int ice_get_rss_key(struct ice_vsi
*vsi
, u8
*seed
)
6859 struct ice_hw
*hw
= &vsi
->back
->hw
;
6860 enum ice_status status
;
6865 status
= ice_aq_get_rss_key(hw
, vsi
->idx
, (struct ice_aqc_get_set_rss_keys
*)seed
);
6867 dev_err(ice_pf_to_dev(vsi
->back
), "Cannot get RSS key, err %s aq_err %s\n",
6868 ice_stat_str(status
),
6869 ice_aq_str(hw
->adminq
.sq_last_status
));
6877 * ice_bridge_getlink - Get the hardware bridge mode
6880 * @seq: RTNL message seq
6881 * @dev: the netdev being configured
6882 * @filter_mask: filter mask passed in
6883 * @nlflags: netlink flags passed in
6885 * Return the bridge mode (VEB/VEPA)
6888 ice_bridge_getlink(struct sk_buff
*skb
, u32 pid
, u32 seq
,
6889 struct net_device
*dev
, u32 filter_mask
, int nlflags
)
6891 struct ice_netdev_priv
*np
= netdev_priv(dev
);
6892 struct ice_vsi
*vsi
= np
->vsi
;
6893 struct ice_pf
*pf
= vsi
->back
;
6896 bmode
= pf
->first_sw
->bridge_mode
;
6898 return ndo_dflt_bridge_getlink(skb
, pid
, seq
, dev
, bmode
, 0, 0, nlflags
,
6903 * ice_vsi_update_bridge_mode - Update VSI for switching bridge mode (VEB/VEPA)
6904 * @vsi: Pointer to VSI structure
6905 * @bmode: Hardware bridge mode (VEB/VEPA)
6907 * Returns 0 on success, negative on failure
6909 static int ice_vsi_update_bridge_mode(struct ice_vsi
*vsi
, u16 bmode
)
6911 struct ice_aqc_vsi_props
*vsi_props
;
6912 struct ice_hw
*hw
= &vsi
->back
->hw
;
6913 struct ice_vsi_ctx
*ctxt
;
6914 enum ice_status status
;
6917 vsi_props
= &vsi
->info
;
6919 ctxt
= kzalloc(sizeof(*ctxt
), GFP_KERNEL
);
6923 ctxt
->info
= vsi
->info
;
6925 if (bmode
== BRIDGE_MODE_VEB
)
6926 /* change from VEPA to VEB mode */
6927 ctxt
->info
.sw_flags
|= ICE_AQ_VSI_SW_FLAG_ALLOW_LB
;
6929 /* change from VEB to VEPA mode */
6930 ctxt
->info
.sw_flags
&= ~ICE_AQ_VSI_SW_FLAG_ALLOW_LB
;
6931 ctxt
->info
.valid_sections
= cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID
);
6933 status
= ice_update_vsi(hw
, vsi
->idx
, ctxt
, NULL
);
6935 dev_err(ice_pf_to_dev(vsi
->back
), "update VSI for bridge mode failed, bmode = %d err %s aq_err %s\n",
6936 bmode
, ice_stat_str(status
),
6937 ice_aq_str(hw
->adminq
.sq_last_status
));
6941 /* Update sw flags for book keeping */
6942 vsi_props
->sw_flags
= ctxt
->info
.sw_flags
;
6950 * ice_bridge_setlink - Set the hardware bridge mode
6951 * @dev: the netdev being configured
6952 * @nlh: RTNL message
6953 * @flags: bridge setlink flags
6954 * @extack: netlink extended ack
6956 * Sets the bridge mode (VEB/VEPA) of the switch to which the netdev (VSI) is
6957 * hooked up to. Iterates through the PF VSI list and sets the loopback mode (if
6958 * not already set for all VSIs connected to this switch. And also update the
6959 * unicast switch filter rules for the corresponding switch of the netdev.
6962 ice_bridge_setlink(struct net_device
*dev
, struct nlmsghdr
*nlh
,
6963 u16 __always_unused flags
,
6964 struct netlink_ext_ack __always_unused
*extack
)
6966 struct ice_netdev_priv
*np
= netdev_priv(dev
);
6967 struct ice_pf
*pf
= np
->vsi
->back
;
6968 struct nlattr
*attr
, *br_spec
;
6969 struct ice_hw
*hw
= &pf
->hw
;
6970 enum ice_status status
;
6971 struct ice_sw
*pf_sw
;
6972 int rem
, v
, err
= 0;
6974 pf_sw
= pf
->first_sw
;
6975 /* find the attribute in the netlink message */
6976 br_spec
= nlmsg_find_attr(nlh
, sizeof(struct ifinfomsg
), IFLA_AF_SPEC
);
6978 nla_for_each_nested(attr
, br_spec
, rem
) {
6981 if (nla_type(attr
) != IFLA_BRIDGE_MODE
)
6983 mode
= nla_get_u16(attr
);
6984 if (mode
!= BRIDGE_MODE_VEPA
&& mode
!= BRIDGE_MODE_VEB
)
6986 /* Continue if bridge mode is not being flipped */
6987 if (mode
== pf_sw
->bridge_mode
)
6989 /* Iterates through the PF VSI list and update the loopback
6992 ice_for_each_vsi(pf
, v
) {
6995 err
= ice_vsi_update_bridge_mode(pf
->vsi
[v
], mode
);
7000 hw
->evb_veb
= (mode
== BRIDGE_MODE_VEB
);
7001 /* Update the unicast switch filter rules for the corresponding
7002 * switch of the netdev
7004 status
= ice_update_sw_rule_bridge_mode(hw
);
7006 netdev_err(dev
, "switch rule update failed, mode = %d err %s aq_err %s\n",
7007 mode
, ice_stat_str(status
),
7008 ice_aq_str(hw
->adminq
.sq_last_status
));
7009 /* revert hw->evb_veb */
7010 hw
->evb_veb
= (pf_sw
->bridge_mode
== BRIDGE_MODE_VEB
);
7014 pf_sw
->bridge_mode
= mode
;
7021 * ice_tx_timeout - Respond to a Tx Hang
7022 * @netdev: network interface device structure
7023 * @txqueue: Tx queue
7025 static void ice_tx_timeout(struct net_device
*netdev
, unsigned int txqueue
)
7027 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7028 struct ice_ring
*tx_ring
= NULL
;
7029 struct ice_vsi
*vsi
= np
->vsi
;
7030 struct ice_pf
*pf
= vsi
->back
;
7033 pf
->tx_timeout_count
++;
7035 /* Check if PFC is enabled for the TC to which the queue belongs
7036 * to. If yes then Tx timeout is not caused by a hung queue, no
7037 * need to reset and rebuild
7039 if (ice_is_pfc_causing_hung_q(pf
, txqueue
)) {
7040 dev_info(ice_pf_to_dev(pf
), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
7045 /* now that we have an index, find the tx_ring struct */
7046 for (i
= 0; i
< vsi
->num_txq
; i
++)
7047 if (vsi
->tx_rings
[i
] && vsi
->tx_rings
[i
]->desc
)
7048 if (txqueue
== vsi
->tx_rings
[i
]->q_index
) {
7049 tx_ring
= vsi
->tx_rings
[i
];
7053 /* Reset recovery level if enough time has elapsed after last timeout.
7054 * Also ensure no new reset action happens before next timeout period.
7056 if (time_after(jiffies
, (pf
->tx_timeout_last_recovery
+ HZ
* 20)))
7057 pf
->tx_timeout_recovery_level
= 1;
7058 else if (time_before(jiffies
, (pf
->tx_timeout_last_recovery
+
7059 netdev
->watchdog_timeo
)))
7063 struct ice_hw
*hw
= &pf
->hw
;
7066 head
= (rd32(hw
, QTX_COMM_HEAD(vsi
->txq_map
[txqueue
])) &
7067 QTX_COMM_HEAD_HEAD_M
) >> QTX_COMM_HEAD_HEAD_S
;
7068 /* Read interrupt register */
7069 val
= rd32(hw
, GLINT_DYN_CTL(tx_ring
->q_vector
->reg_idx
));
7071 netdev_info(netdev
, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n",
7072 vsi
->vsi_num
, txqueue
, tx_ring
->next_to_clean
,
7073 head
, tx_ring
->next_to_use
, val
);
7076 pf
->tx_timeout_last_recovery
= jiffies
;
7077 netdev_info(netdev
, "tx_timeout recovery level %d, txqueue %u\n",
7078 pf
->tx_timeout_recovery_level
, txqueue
);
7080 switch (pf
->tx_timeout_recovery_level
) {
7082 set_bit(ICE_PFR_REQ
, pf
->state
);
7085 set_bit(ICE_CORER_REQ
, pf
->state
);
7088 set_bit(ICE_GLOBR_REQ
, pf
->state
);
7091 netdev_err(netdev
, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
7092 set_bit(ICE_DOWN
, pf
->state
);
7093 set_bit(ICE_VSI_NEEDS_RESTART
, vsi
->state
);
7094 set_bit(ICE_SERVICE_DIS
, pf
->state
);
7098 ice_service_task_schedule(pf
);
7099 pf
->tx_timeout_recovery_level
++;
7103 * ice_open - Called when a network interface becomes active
7104 * @netdev: network interface device structure
7106 * The open entry point is called when a network interface is made
7107 * active by the system (IFF_UP). At this point all resources needed
7108 * for transmit and receive operations are allocated, the interrupt
7109 * handler is registered with the OS, the netdev watchdog is enabled,
7110 * and the stack is notified that the interface is ready.
7112 * Returns 0 on success, negative value on failure
7114 int ice_open(struct net_device
*netdev
)
7116 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7117 struct ice_pf
*pf
= np
->vsi
->back
;
7119 if (ice_is_reset_in_progress(pf
->state
)) {
7120 netdev_err(netdev
, "can't open net device while reset is in progress");
7124 return ice_open_internal(netdev
);
7128 * ice_open_internal - Called when a network interface becomes active
7129 * @netdev: network interface device structure
7131 * Internal ice_open implementation. Should not be used directly except for ice_open and reset
7134 * Returns 0 on success, negative value on failure
7136 int ice_open_internal(struct net_device
*netdev
)
7138 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7139 struct ice_vsi
*vsi
= np
->vsi
;
7140 struct ice_pf
*pf
= vsi
->back
;
7141 struct ice_port_info
*pi
;
7142 enum ice_status status
;
7145 if (test_bit(ICE_NEEDS_RESTART
, pf
->state
)) {
7146 netdev_err(netdev
, "driver needs to be unloaded and reloaded\n");
7150 netif_carrier_off(netdev
);
7152 pi
= vsi
->port_info
;
7153 status
= ice_update_link_info(pi
);
7155 netdev_err(netdev
, "Failed to get link info, error %s\n",
7156 ice_stat_str(status
));
7160 ice_check_module_power(pf
, pi
->phy
.link_info
.link_cfg_err
);
7162 /* Set PHY if there is media, otherwise, turn off PHY */
7163 if (pi
->phy
.link_info
.link_info
& ICE_AQ_MEDIA_AVAILABLE
) {
7164 clear_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
7165 if (!test_bit(ICE_PHY_INIT_COMPLETE
, pf
->state
)) {
7166 err
= ice_init_phy_user_cfg(pi
);
7168 netdev_err(netdev
, "Failed to initialize PHY settings, error %d\n",
7174 err
= ice_configure_phy(vsi
);
7176 netdev_err(netdev
, "Failed to set physical link up, error %d\n",
7181 set_bit(ICE_FLAG_NO_MEDIA
, pf
->flags
);
7182 ice_set_link(vsi
, false);
7185 err
= ice_vsi_open(vsi
);
7187 netdev_err(netdev
, "Failed to open VSI 0x%04X on switch 0x%04X\n",
7188 vsi
->vsi_num
, vsi
->vsw
->sw_id
);
7190 /* Update existing tunnels information */
7191 udp_tunnel_get_rx_info(netdev
);
7197 * ice_stop - Disables a network interface
7198 * @netdev: network interface device structure
7200 * The stop entry point is called when an interface is de-activated by the OS,
7201 * and the netdevice enters the DOWN state. The hardware is still under the
7202 * driver's control, but the netdev interface is disabled.
7204 * Returns success only - not allowed to fail
7206 int ice_stop(struct net_device
*netdev
)
7208 struct ice_netdev_priv
*np
= netdev_priv(netdev
);
7209 struct ice_vsi
*vsi
= np
->vsi
;
7210 struct ice_pf
*pf
= vsi
->back
;
7212 if (ice_is_reset_in_progress(pf
->state
)) {
7213 netdev_err(netdev
, "can't stop net device while reset is in progress");
7223 * ice_features_check - Validate encapsulated packet conforms to limits
7225 * @netdev: This port's netdev
7226 * @features: Offload features that the stack believes apply
7228 static netdev_features_t
7229 ice_features_check(struct sk_buff
*skb
,
7230 struct net_device __always_unused
*netdev
,
7231 netdev_features_t features
)
7233 bool gso
= skb_is_gso(skb
);
7236 /* No point in doing any of this if neither checksum nor GSO are
7237 * being requested for this frame. We can rule out both by just
7238 * checking for CHECKSUM_PARTIAL
7240 if (skb
->ip_summed
!= CHECKSUM_PARTIAL
)
7243 /* We cannot support GSO if the MSS is going to be less than
7244 * 64 bytes. If it is then we need to drop support for GSO.
7246 if (gso
&& (skb_shinfo(skb
)->gso_size
< ICE_TXD_CTX_MIN_MSS
))
7247 features
&= ~NETIF_F_GSO_MASK
;
7249 len
= skb_network_offset(skb
);
7250 if (len
> ICE_TXD_MACLEN_MAX
|| len
& 0x1)
7251 goto out_rm_features
;
7253 len
= skb_network_header_len(skb
);
7254 if (len
> ICE_TXD_IPLEN_MAX
|| len
& 0x1)
7255 goto out_rm_features
;
7257 if (skb
->encapsulation
) {
7258 /* this must work for VXLAN frames AND IPIP/SIT frames, and in
7259 * the case of IPIP frames, the transport header pointer is
7260 * after the inner header! So check to make sure that this
7261 * is a GRE or UDP_TUNNEL frame before doing that math.
7263 if (gso
&& (skb_shinfo(skb
)->gso_type
&
7264 (SKB_GSO_GRE
| SKB_GSO_UDP_TUNNEL
))) {
7265 len
= skb_inner_network_header(skb
) -
7266 skb_transport_header(skb
);
7267 if (len
> ICE_TXD_L4LEN_MAX
|| len
& 0x1)
7268 goto out_rm_features
;
7271 len
= skb_inner_network_header_len(skb
);
7272 if (len
> ICE_TXD_IPLEN_MAX
|| len
& 0x1)
7273 goto out_rm_features
;
7278 return features
& ~(NETIF_F_CSUM_MASK
| NETIF_F_GSO_MASK
);
7281 static const struct net_device_ops ice_netdev_safe_mode_ops
= {
7282 .ndo_open
= ice_open
,
7283 .ndo_stop
= ice_stop
,
7284 .ndo_start_xmit
= ice_start_xmit
,
7285 .ndo_set_mac_address
= ice_set_mac_address
,
7286 .ndo_validate_addr
= eth_validate_addr
,
7287 .ndo_change_mtu
= ice_change_mtu
,
7288 .ndo_get_stats64
= ice_get_stats64
,
7289 .ndo_tx_timeout
= ice_tx_timeout
,
7290 .ndo_bpf
= ice_xdp_safe_mode
,
7293 static const struct net_device_ops ice_netdev_ops
= {
7294 .ndo_open
= ice_open
,
7295 .ndo_stop
= ice_stop
,
7296 .ndo_start_xmit
= ice_start_xmit
,
7297 .ndo_features_check
= ice_features_check
,
7298 .ndo_set_rx_mode
= ice_set_rx_mode
,
7299 .ndo_set_mac_address
= ice_set_mac_address
,
7300 .ndo_validate_addr
= eth_validate_addr
,
7301 .ndo_change_mtu
= ice_change_mtu
,
7302 .ndo_get_stats64
= ice_get_stats64
,
7303 .ndo_set_tx_maxrate
= ice_set_tx_maxrate
,
7304 .ndo_eth_ioctl
= ice_eth_ioctl
,
7305 .ndo_set_vf_spoofchk
= ice_set_vf_spoofchk
,
7306 .ndo_set_vf_mac
= ice_set_vf_mac
,
7307 .ndo_get_vf_config
= ice_get_vf_cfg
,
7308 .ndo_set_vf_trust
= ice_set_vf_trust
,
7309 .ndo_set_vf_vlan
= ice_set_vf_port_vlan
,
7310 .ndo_set_vf_link_state
= ice_set_vf_link_state
,
7311 .ndo_get_vf_stats
= ice_get_vf_stats
,
7312 .ndo_vlan_rx_add_vid
= ice_vlan_rx_add_vid
,
7313 .ndo_vlan_rx_kill_vid
= ice_vlan_rx_kill_vid
,
7314 .ndo_set_features
= ice_set_features
,
7315 .ndo_bridge_getlink
= ice_bridge_getlink
,
7316 .ndo_bridge_setlink
= ice_bridge_setlink
,
7317 .ndo_fdb_add
= ice_fdb_add
,
7318 .ndo_fdb_del
= ice_fdb_del
,
7319 #ifdef CONFIG_RFS_ACCEL
7320 .ndo_rx_flow_steer
= ice_rx_flow_steer
,
7322 .ndo_tx_timeout
= ice_tx_timeout
,
7324 .ndo_xdp_xmit
= ice_xdp_xmit
,
7325 .ndo_xsk_wakeup
= ice_xsk_wakeup
,