2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
43 /* General purpose, use for short periods of time.
44 * Beware of lock dependencies (preferably, no locks should be acquired
47 static DEFINE_SPINLOCK(lag_lock
);
49 static int mlx5_cmd_create_lag(struct mlx5_core_dev
*dev
, u8 remap_port1
,
50 u8 remap_port2
, bool shared_fdb
)
52 u32 in
[MLX5_ST_SZ_DW(create_lag_in
)] = {};
53 void *lag_ctx
= MLX5_ADDR_OF(create_lag_in
, in
, ctx
);
55 MLX5_SET(create_lag_in
, in
, opcode
, MLX5_CMD_OP_CREATE_LAG
);
57 MLX5_SET(lagc
, lag_ctx
, tx_remap_affinity_1
, remap_port1
);
58 MLX5_SET(lagc
, lag_ctx
, tx_remap_affinity_2
, remap_port2
);
59 MLX5_SET(lagc
, lag_ctx
, fdb_selection_mode
, shared_fdb
);
61 return mlx5_cmd_exec_in(dev
, create_lag
, in
);
64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev
*dev
, u8 remap_port1
,
67 u32 in
[MLX5_ST_SZ_DW(modify_lag_in
)] = {};
68 void *lag_ctx
= MLX5_ADDR_OF(modify_lag_in
, in
, ctx
);
70 MLX5_SET(modify_lag_in
, in
, opcode
, MLX5_CMD_OP_MODIFY_LAG
);
71 MLX5_SET(modify_lag_in
, in
, field_select
, 0x1);
73 MLX5_SET(lagc
, lag_ctx
, tx_remap_affinity_1
, remap_port1
);
74 MLX5_SET(lagc
, lag_ctx
, tx_remap_affinity_2
, remap_port2
);
76 return mlx5_cmd_exec_in(dev
, modify_lag
, in
);
79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev
*dev
)
81 u32 in
[MLX5_ST_SZ_DW(create_vport_lag_in
)] = {};
83 MLX5_SET(create_vport_lag_in
, in
, opcode
, MLX5_CMD_OP_CREATE_VPORT_LAG
);
85 return mlx5_cmd_exec_in(dev
, create_vport_lag
, in
);
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag
);
89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev
*dev
)
91 u32 in
[MLX5_ST_SZ_DW(destroy_vport_lag_in
)] = {};
93 MLX5_SET(destroy_vport_lag_in
, in
, opcode
, MLX5_CMD_OP_DESTROY_VPORT_LAG
);
95 return mlx5_cmd_exec_in(dev
, destroy_vport_lag
, in
);
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag
);
99 static int mlx5_lag_netdev_event(struct notifier_block
*this,
100 unsigned long event
, void *ptr
);
101 static void mlx5_do_bond_work(struct work_struct
*work
);
103 static void mlx5_ldev_free(struct kref
*ref
)
105 struct mlx5_lag
*ldev
= container_of(ref
, struct mlx5_lag
, ref
);
107 if (ldev
->nb
.notifier_call
)
108 unregister_netdevice_notifier_net(&init_net
, &ldev
->nb
);
109 mlx5_lag_mp_cleanup(ldev
);
110 cancel_delayed_work_sync(&ldev
->bond_work
);
111 destroy_workqueue(ldev
->wq
);
115 static void mlx5_ldev_put(struct mlx5_lag
*ldev
)
117 kref_put(&ldev
->ref
, mlx5_ldev_free
);
120 static void mlx5_ldev_get(struct mlx5_lag
*ldev
)
122 kref_get(&ldev
->ref
);
125 static struct mlx5_lag
*mlx5_lag_dev_alloc(struct mlx5_core_dev
*dev
)
127 struct mlx5_lag
*ldev
;
130 ldev
= kzalloc(sizeof(*ldev
), GFP_KERNEL
);
134 ldev
->wq
= create_singlethread_workqueue("mlx5_lag");
140 kref_init(&ldev
->ref
);
141 INIT_DELAYED_WORK(&ldev
->bond_work
, mlx5_do_bond_work
);
143 ldev
->nb
.notifier_call
= mlx5_lag_netdev_event
;
144 if (register_netdevice_notifier_net(&init_net
, &ldev
->nb
)) {
145 ldev
->nb
.notifier_call
= NULL
;
146 mlx5_core_err(dev
, "Failed to register LAG netdev notifier\n");
149 err
= mlx5_lag_mp_init(ldev
);
151 mlx5_core_err(dev
, "Failed to init multipath lag err=%d\n",
157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag
*ldev
,
158 struct net_device
*ndev
)
162 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++)
163 if (ldev
->pf
[i
].netdev
== ndev
)
169 static bool __mlx5_lag_is_roce(struct mlx5_lag
*ldev
)
171 return !!(ldev
->flags
& MLX5_LAG_FLAG_ROCE
);
174 static bool __mlx5_lag_is_sriov(struct mlx5_lag
*ldev
)
176 return !!(ldev
->flags
& MLX5_LAG_FLAG_SRIOV
);
179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker
*tracker
,
180 u8
*port1
, u8
*port2
)
185 p1en
= tracker
->netdev_state
[MLX5_LAG_P1
].tx_enabled
&&
186 tracker
->netdev_state
[MLX5_LAG_P1
].link_up
;
188 p2en
= tracker
->netdev_state
[MLX5_LAG_P2
].tx_enabled
&&
189 tracker
->netdev_state
[MLX5_LAG_P2
].link_up
;
193 if ((!p1en
&& !p2en
) || (p1en
&& p2en
))
202 void mlx5_modify_lag(struct mlx5_lag
*ldev
,
203 struct lag_tracker
*tracker
)
205 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
206 u8 v2p_port1
, v2p_port2
;
209 mlx5_infer_tx_affinity_mapping(tracker
, &v2p_port1
,
212 if (v2p_port1
!= ldev
->v2p_map
[MLX5_LAG_P1
] ||
213 v2p_port2
!= ldev
->v2p_map
[MLX5_LAG_P2
]) {
214 ldev
->v2p_map
[MLX5_LAG_P1
] = v2p_port1
;
215 ldev
->v2p_map
[MLX5_LAG_P2
] = v2p_port2
;
217 mlx5_core_info(dev0
, "modify lag map port 1:%d port 2:%d",
218 ldev
->v2p_map
[MLX5_LAG_P1
],
219 ldev
->v2p_map
[MLX5_LAG_P2
]);
221 err
= mlx5_cmd_modify_lag(dev0
, v2p_port1
, v2p_port2
);
224 "Failed to modify LAG (%d)\n",
229 static int mlx5_create_lag(struct mlx5_lag
*ldev
,
230 struct lag_tracker
*tracker
,
233 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
234 struct mlx5_core_dev
*dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
235 u32 in
[MLX5_ST_SZ_DW(destroy_lag_in
)] = {};
238 mlx5_infer_tx_affinity_mapping(tracker
, &ldev
->v2p_map
[MLX5_LAG_P1
],
239 &ldev
->v2p_map
[MLX5_LAG_P2
]);
241 mlx5_core_info(dev0
, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev
->v2p_map
[MLX5_LAG_P1
], ldev
->v2p_map
[MLX5_LAG_P2
],
245 err
= mlx5_cmd_create_lag(dev0
, ldev
->v2p_map
[MLX5_LAG_P1
],
246 ldev
->v2p_map
[MLX5_LAG_P2
], shared_fdb
);
249 "Failed to create LAG (%d)\n",
255 err
= mlx5_eswitch_offloads_config_single_fdb(dev0
->priv
.eswitch
,
258 mlx5_core_err(dev0
, "Can't enable single FDB mode\n");
260 mlx5_core_info(dev0
, "Operation mode is single FDB\n");
264 MLX5_SET(destroy_lag_in
, in
, opcode
, MLX5_CMD_OP_DESTROY_LAG
);
265 if (mlx5_cmd_exec_in(dev0
, destroy_lag
, in
))
267 "Failed to deactivate RoCE LAG; driver restart required\n");
273 int mlx5_activate_lag(struct mlx5_lag
*ldev
,
274 struct lag_tracker
*tracker
,
278 bool roce_lag
= !!(flags
& MLX5_LAG_FLAG_ROCE
);
279 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
282 err
= mlx5_create_lag(ldev
, tracker
, shared_fdb
);
286 "Failed to activate RoCE LAG\n");
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
295 ldev
->flags
|= flags
;
296 ldev
->shared_fdb
= shared_fdb
;
300 static int mlx5_deactivate_lag(struct mlx5_lag
*ldev
)
302 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
303 u32 in
[MLX5_ST_SZ_DW(destroy_lag_in
)] = {};
304 bool roce_lag
= __mlx5_lag_is_roce(ldev
);
307 ldev
->flags
&= ~MLX5_LAG_MODE_FLAGS
;
308 mlx5_lag_mp_reset(ldev
);
310 if (ldev
->shared_fdb
) {
311 mlx5_eswitch_offloads_destroy_single_fdb(ldev
->pf
[MLX5_LAG_P1
].dev
->priv
.eswitch
,
312 ldev
->pf
[MLX5_LAG_P2
].dev
->priv
.eswitch
);
313 ldev
->shared_fdb
= false;
316 MLX5_SET(destroy_lag_in
, in
, opcode
, MLX5_CMD_OP_DESTROY_LAG
);
317 err
= mlx5_cmd_exec_in(dev0
, destroy_lag
, in
);
321 "Failed to deactivate RoCE LAG; driver restart required\n");
324 "Failed to deactivate VF LAG; driver restart required\n"
325 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
332 static bool mlx5_lag_check_prereq(struct mlx5_lag
*ldev
)
334 if (!ldev
->pf
[MLX5_LAG_P1
].dev
|| !ldev
->pf
[MLX5_LAG_P2
].dev
)
337 #ifdef CONFIG_MLX5_ESWITCH
338 return mlx5_esw_lag_prereq(ldev
->pf
[MLX5_LAG_P1
].dev
,
339 ldev
->pf
[MLX5_LAG_P2
].dev
);
341 return (!mlx5_sriov_is_enabled(ldev
->pf
[MLX5_LAG_P1
].dev
) &&
342 !mlx5_sriov_is_enabled(ldev
->pf
[MLX5_LAG_P2
].dev
));
346 static void mlx5_lag_add_devices(struct mlx5_lag
*ldev
)
350 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++) {
351 if (!ldev
->pf
[i
].dev
)
354 if (ldev
->pf
[i
].dev
->priv
.flags
&
355 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV
)
358 ldev
->pf
[i
].dev
->priv
.flags
&= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
359 mlx5_rescan_drivers_locked(ldev
->pf
[i
].dev
);
363 static void mlx5_lag_remove_devices(struct mlx5_lag
*ldev
)
367 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++) {
368 if (!ldev
->pf
[i
].dev
)
371 if (ldev
->pf
[i
].dev
->priv
.flags
&
372 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV
)
375 ldev
->pf
[i
].dev
->priv
.flags
|= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
376 mlx5_rescan_drivers_locked(ldev
->pf
[i
].dev
);
380 static void mlx5_disable_lag(struct mlx5_lag
*ldev
)
382 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
383 struct mlx5_core_dev
*dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
384 bool shared_fdb
= ldev
->shared_fdb
;
388 roce_lag
= __mlx5_lag_is_roce(ldev
);
391 mlx5_lag_remove_devices(ldev
);
392 } else if (roce_lag
) {
393 if (!(dev0
->priv
.flags
& MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV
)) {
394 dev0
->priv
.flags
|= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
395 mlx5_rescan_drivers_locked(dev0
);
397 mlx5_nic_vport_disable_roce(dev1
);
400 err
= mlx5_deactivate_lag(ldev
);
404 if (shared_fdb
|| roce_lag
)
405 mlx5_lag_add_devices(ldev
);
408 if (!(dev0
->priv
.flags
& MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV
))
409 mlx5_eswitch_reload_reps(dev0
->priv
.eswitch
);
410 if (!(dev1
->priv
.flags
& MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV
))
411 mlx5_eswitch_reload_reps(dev1
->priv
.eswitch
);
415 static bool mlx5_shared_fdb_supported(struct mlx5_lag
*ldev
)
417 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
418 struct mlx5_core_dev
*dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
420 if (is_mdev_switchdev_mode(dev0
) &&
421 is_mdev_switchdev_mode(dev1
) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev0
->priv
.eswitch
) &&
423 mlx5_eswitch_vport_match_metadata_enabled(dev1
->priv
.eswitch
) &&
424 mlx5_devcom_is_paired(dev0
->priv
.devcom
,
425 MLX5_DEVCOM_ESW_OFFLOADS
) &&
426 MLX5_CAP_GEN(dev1
, lag_native_fdb_selection
) &&
427 MLX5_CAP_ESW(dev1
, root_ft_on_other_esw
) &&
428 MLX5_CAP_ESW(dev0
, esw_shared_ingress_acl
))
434 static void mlx5_do_bond(struct mlx5_lag
*ldev
)
436 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
437 struct mlx5_core_dev
*dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
438 struct lag_tracker tracker
;
439 bool do_bond
, roce_lag
;
442 if (!mlx5_lag_is_ready(ldev
)) {
445 /* VF LAG is in multipath mode, ignore bond change requests */
446 if (mlx5_lag_is_multipath(dev0
))
449 tracker
= ldev
->tracker
;
451 do_bond
= tracker
.is_bonded
&& mlx5_lag_check_prereq(ldev
);
454 if (do_bond
&& !__mlx5_lag_is_active(ldev
)) {
455 bool shared_fdb
= mlx5_shared_fdb_supported(ldev
);
457 roce_lag
= !mlx5_sriov_is_enabled(dev0
) &&
458 !mlx5_sriov_is_enabled(dev1
);
460 #ifdef CONFIG_MLX5_ESWITCH
461 roce_lag
= roce_lag
&&
462 dev0
->priv
.eswitch
->mode
== MLX5_ESWITCH_NONE
&&
463 dev1
->priv
.eswitch
->mode
== MLX5_ESWITCH_NONE
;
466 if (shared_fdb
|| roce_lag
)
467 mlx5_lag_remove_devices(ldev
);
469 err
= mlx5_activate_lag(ldev
, &tracker
,
470 roce_lag
? MLX5_LAG_FLAG_ROCE
:
474 if (shared_fdb
|| roce_lag
)
475 mlx5_lag_add_devices(ldev
);
478 } else if (roce_lag
) {
479 dev0
->priv
.flags
&= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
480 mlx5_rescan_drivers_locked(dev0
);
481 mlx5_nic_vport_enable_roce(dev1
);
482 } else if (shared_fdb
) {
483 dev0
->priv
.flags
&= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
484 mlx5_rescan_drivers_locked(dev0
);
486 err
= mlx5_eswitch_reload_reps(dev0
->priv
.eswitch
);
488 err
= mlx5_eswitch_reload_reps(dev1
->priv
.eswitch
);
491 dev0
->priv
.flags
|= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV
;
492 mlx5_rescan_drivers_locked(dev0
);
493 mlx5_deactivate_lag(ldev
);
494 mlx5_lag_add_devices(ldev
);
495 mlx5_eswitch_reload_reps(dev0
->priv
.eswitch
);
496 mlx5_eswitch_reload_reps(dev1
->priv
.eswitch
);
497 mlx5_core_err(dev0
, "Failed to enable lag\n");
501 } else if (do_bond
&& __mlx5_lag_is_active(ldev
)) {
502 mlx5_modify_lag(ldev
, &tracker
);
503 } else if (!do_bond
&& __mlx5_lag_is_active(ldev
)) {
504 mlx5_disable_lag(ldev
);
508 static void mlx5_queue_bond_work(struct mlx5_lag
*ldev
, unsigned long delay
)
510 queue_delayed_work(ldev
->wq
, &ldev
->bond_work
, delay
);
513 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev
*dev0
,
514 struct mlx5_core_dev
*dev1
)
517 mlx5_esw_lock(dev0
->priv
.eswitch
);
519 mlx5_esw_lock(dev1
->priv
.eswitch
);
522 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev
*dev0
,
523 struct mlx5_core_dev
*dev1
)
526 mlx5_esw_unlock(dev1
->priv
.eswitch
);
528 mlx5_esw_unlock(dev0
->priv
.eswitch
);
531 static void mlx5_do_bond_work(struct work_struct
*work
)
533 struct delayed_work
*delayed_work
= to_delayed_work(work
);
534 struct mlx5_lag
*ldev
= container_of(delayed_work
, struct mlx5_lag
,
536 struct mlx5_core_dev
*dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
537 struct mlx5_core_dev
*dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
540 status
= mlx5_dev_list_trylock();
542 mlx5_queue_bond_work(ldev
, HZ
);
546 if (ldev
->mode_changes_in_progress
) {
547 mlx5_dev_list_unlock();
548 mlx5_queue_bond_work(ldev
, HZ
);
552 mlx5_lag_lock_eswitches(dev0
, dev1
);
554 mlx5_lag_unlock_eswitches(dev0
, dev1
);
555 mlx5_dev_list_unlock();
558 static int mlx5_handle_changeupper_event(struct mlx5_lag
*ldev
,
559 struct lag_tracker
*tracker
,
560 struct net_device
*ndev
,
561 struct netdev_notifier_changeupper_info
*info
)
563 struct net_device
*upper
= info
->upper_dev
, *ndev_tmp
;
564 struct netdev_lag_upper_info
*lag_upper_info
= NULL
;
565 bool is_bonded
, is_in_lag
, mode_supported
;
570 if (!netif_is_lag_master(upper
))
574 lag_upper_info
= info
->upper_info
;
576 /* The event may still be of interest if the slave does not belong to
577 * us, but is enslaved to a master which has one or more of our netdevs
578 * as slaves (e.g., if a new slave is added to a master that bonds two
579 * of our netdevs, we should unbond).
582 for_each_netdev_in_bond_rcu(upper
, ndev_tmp
) {
583 idx
= mlx5_lag_dev_get_netdev_idx(ldev
, ndev_tmp
);
585 bond_status
|= (1 << idx
);
591 /* None of this lagdev's netdevs are slaves of this master. */
592 if (!(bond_status
& 0x3))
596 tracker
->tx_type
= lag_upper_info
->tx_type
;
598 /* Determine bonding status:
599 * A device is considered bonded if both its physical ports are slaves
600 * of the same lag master, and only them.
602 is_in_lag
= num_slaves
== MLX5_MAX_PORTS
&& bond_status
== 0x3;
604 if (!mlx5_lag_is_ready(ldev
) && is_in_lag
) {
605 NL_SET_ERR_MSG_MOD(info
->info
.extack
,
606 "Can't activate LAG offload, PF is configured with more than 64 VFs");
610 /* Lag mode must be activebackup or hash. */
611 mode_supported
= tracker
->tx_type
== NETDEV_LAG_TX_TYPE_ACTIVEBACKUP
||
612 tracker
->tx_type
== NETDEV_LAG_TX_TYPE_HASH
;
614 if (is_in_lag
&& !mode_supported
)
615 NL_SET_ERR_MSG_MOD(info
->info
.extack
,
616 "Can't activate LAG offload, TX type isn't supported");
618 is_bonded
= is_in_lag
&& mode_supported
;
619 if (tracker
->is_bonded
!= is_bonded
) {
620 tracker
->is_bonded
= is_bonded
;
627 static int mlx5_handle_changelowerstate_event(struct mlx5_lag
*ldev
,
628 struct lag_tracker
*tracker
,
629 struct net_device
*ndev
,
630 struct netdev_notifier_changelowerstate_info
*info
)
632 struct netdev_lag_lower_state_info
*lag_lower_info
;
635 if (!netif_is_lag_port(ndev
))
638 idx
= mlx5_lag_dev_get_netdev_idx(ldev
, ndev
);
642 /* This information is used to determine virtual to physical
645 lag_lower_info
= info
->lower_state_info
;
649 tracker
->netdev_state
[idx
] = *lag_lower_info
;
654 static int mlx5_lag_netdev_event(struct notifier_block
*this,
655 unsigned long event
, void *ptr
)
657 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
658 struct lag_tracker tracker
;
659 struct mlx5_lag
*ldev
;
662 if ((event
!= NETDEV_CHANGEUPPER
) && (event
!= NETDEV_CHANGELOWERSTATE
))
665 ldev
= container_of(this, struct mlx5_lag
, nb
);
667 if (!mlx5_lag_is_ready(ldev
) && event
== NETDEV_CHANGELOWERSTATE
)
670 tracker
= ldev
->tracker
;
673 case NETDEV_CHANGEUPPER
:
674 changed
= mlx5_handle_changeupper_event(ldev
, &tracker
, ndev
,
677 case NETDEV_CHANGELOWERSTATE
:
678 changed
= mlx5_handle_changelowerstate_event(ldev
, &tracker
,
683 ldev
->tracker
= tracker
;
686 mlx5_queue_bond_work(ldev
, 0);
691 static void mlx5_ldev_add_netdev(struct mlx5_lag
*ldev
,
692 struct mlx5_core_dev
*dev
,
693 struct net_device
*netdev
)
695 unsigned int fn
= PCI_FUNC(dev
->pdev
->devfn
);
697 if (fn
>= MLX5_MAX_PORTS
)
700 spin_lock(&lag_lock
);
701 ldev
->pf
[fn
].netdev
= netdev
;
702 ldev
->tracker
.netdev_state
[fn
].link_up
= 0;
703 ldev
->tracker
.netdev_state
[fn
].tx_enabled
= 0;
704 spin_unlock(&lag_lock
);
707 static void mlx5_ldev_remove_netdev(struct mlx5_lag
*ldev
,
708 struct net_device
*netdev
)
712 spin_lock(&lag_lock
);
713 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++) {
714 if (ldev
->pf
[i
].netdev
== netdev
) {
715 ldev
->pf
[i
].netdev
= NULL
;
719 spin_unlock(&lag_lock
);
722 static void mlx5_ldev_add_mdev(struct mlx5_lag
*ldev
,
723 struct mlx5_core_dev
*dev
)
725 unsigned int fn
= PCI_FUNC(dev
->pdev
->devfn
);
727 if (fn
>= MLX5_MAX_PORTS
)
730 ldev
->pf
[fn
].dev
= dev
;
731 dev
->priv
.lag
= ldev
;
734 /* Must be called with intf_mutex held */
735 static void mlx5_ldev_remove_mdev(struct mlx5_lag
*ldev
,
736 struct mlx5_core_dev
*dev
)
740 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++)
741 if (ldev
->pf
[i
].dev
== dev
)
744 if (i
== MLX5_MAX_PORTS
)
747 ldev
->pf
[i
].dev
= NULL
;
748 dev
->priv
.lag
= NULL
;
751 /* Must be called with intf_mutex held */
752 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev
*dev
)
754 struct mlx5_lag
*ldev
= NULL
;
755 struct mlx5_core_dev
*tmp_dev
;
757 if (!MLX5_CAP_GEN(dev
, vport_group_manager
) ||
758 !MLX5_CAP_GEN(dev
, lag_master
) ||
759 MLX5_CAP_GEN(dev
, num_lag_ports
) != MLX5_MAX_PORTS
)
762 tmp_dev
= mlx5_get_next_phys_dev(dev
);
764 ldev
= tmp_dev
->priv
.lag
;
767 ldev
= mlx5_lag_dev_alloc(dev
);
769 mlx5_core_err(dev
, "Failed to alloc lag dev\n");
773 if (ldev
->mode_changes_in_progress
)
778 mlx5_ldev_add_mdev(ldev
, dev
);
783 void mlx5_lag_remove_mdev(struct mlx5_core_dev
*dev
)
785 struct mlx5_lag
*ldev
;
787 ldev
= mlx5_lag_dev(dev
);
792 mlx5_dev_list_lock();
793 if (ldev
->mode_changes_in_progress
) {
794 mlx5_dev_list_unlock();
798 mlx5_ldev_remove_mdev(ldev
, dev
);
799 mlx5_dev_list_unlock();
803 void mlx5_lag_add_mdev(struct mlx5_core_dev
*dev
)
808 mlx5_dev_list_lock();
809 err
= __mlx5_lag_dev_add_mdev(dev
);
811 mlx5_dev_list_unlock();
815 mlx5_dev_list_unlock();
818 /* Must be called with intf_mutex held */
819 void mlx5_lag_remove_netdev(struct mlx5_core_dev
*dev
,
820 struct net_device
*netdev
)
822 struct mlx5_lag
*ldev
;
824 ldev
= mlx5_lag_dev(dev
);
828 mlx5_ldev_remove_netdev(ldev
, netdev
);
829 ldev
->flags
&= ~MLX5_LAG_FLAG_READY
;
831 if (__mlx5_lag_is_active(ldev
))
832 mlx5_queue_bond_work(ldev
, 0);
835 /* Must be called with intf_mutex held */
836 void mlx5_lag_add_netdev(struct mlx5_core_dev
*dev
,
837 struct net_device
*netdev
)
839 struct mlx5_lag
*ldev
;
842 ldev
= mlx5_lag_dev(dev
);
846 mlx5_ldev_add_netdev(ldev
, dev
, netdev
);
848 for (i
= 0; i
< MLX5_MAX_PORTS
; i
++)
849 if (!ldev
->pf
[i
].dev
)
852 if (i
>= MLX5_MAX_PORTS
)
853 ldev
->flags
|= MLX5_LAG_FLAG_READY
;
854 mlx5_queue_bond_work(ldev
, 0);
857 bool mlx5_lag_is_roce(struct mlx5_core_dev
*dev
)
859 struct mlx5_lag
*ldev
;
862 spin_lock(&lag_lock
);
863 ldev
= mlx5_lag_dev(dev
);
864 res
= ldev
&& __mlx5_lag_is_roce(ldev
);
865 spin_unlock(&lag_lock
);
869 EXPORT_SYMBOL(mlx5_lag_is_roce
);
871 bool mlx5_lag_is_active(struct mlx5_core_dev
*dev
)
873 struct mlx5_lag
*ldev
;
876 spin_lock(&lag_lock
);
877 ldev
= mlx5_lag_dev(dev
);
878 res
= ldev
&& __mlx5_lag_is_active(ldev
);
879 spin_unlock(&lag_lock
);
883 EXPORT_SYMBOL(mlx5_lag_is_active
);
885 bool mlx5_lag_is_master(struct mlx5_core_dev
*dev
)
887 struct mlx5_lag
*ldev
;
890 spin_lock(&lag_lock
);
891 ldev
= mlx5_lag_dev(dev
);
892 res
= ldev
&& __mlx5_lag_is_active(ldev
) &&
893 dev
== ldev
->pf
[MLX5_LAG_P1
].dev
;
894 spin_unlock(&lag_lock
);
898 EXPORT_SYMBOL(mlx5_lag_is_master
);
900 bool mlx5_lag_is_sriov(struct mlx5_core_dev
*dev
)
902 struct mlx5_lag
*ldev
;
905 spin_lock(&lag_lock
);
906 ldev
= mlx5_lag_dev(dev
);
907 res
= ldev
&& __mlx5_lag_is_sriov(ldev
);
908 spin_unlock(&lag_lock
);
912 EXPORT_SYMBOL(mlx5_lag_is_sriov
);
914 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev
*dev
)
916 struct mlx5_lag
*ldev
;
919 spin_lock(&lag_lock
);
920 ldev
= mlx5_lag_dev(dev
);
921 res
= ldev
&& __mlx5_lag_is_sriov(ldev
) && ldev
->shared_fdb
;
922 spin_unlock(&lag_lock
);
926 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb
);
928 void mlx5_lag_disable_change(struct mlx5_core_dev
*dev
)
930 struct mlx5_core_dev
*dev0
;
931 struct mlx5_core_dev
*dev1
;
932 struct mlx5_lag
*ldev
;
934 ldev
= mlx5_lag_dev(dev
);
938 mlx5_dev_list_lock();
940 dev0
= ldev
->pf
[MLX5_LAG_P1
].dev
;
941 dev1
= ldev
->pf
[MLX5_LAG_P2
].dev
;
943 ldev
->mode_changes_in_progress
++;
944 if (__mlx5_lag_is_active(ldev
)) {
945 mlx5_lag_lock_eswitches(dev0
, dev1
);
946 mlx5_disable_lag(ldev
);
947 mlx5_lag_unlock_eswitches(dev0
, dev1
);
949 mlx5_dev_list_unlock();
952 void mlx5_lag_enable_change(struct mlx5_core_dev
*dev
)
954 struct mlx5_lag
*ldev
;
956 ldev
= mlx5_lag_dev(dev
);
960 mlx5_dev_list_lock();
961 ldev
->mode_changes_in_progress
--;
962 mlx5_dev_list_unlock();
963 mlx5_queue_bond_work(ldev
, 0);
966 struct net_device
*mlx5_lag_get_roce_netdev(struct mlx5_core_dev
*dev
)
968 struct net_device
*ndev
= NULL
;
969 struct mlx5_lag
*ldev
;
971 spin_lock(&lag_lock
);
972 ldev
= mlx5_lag_dev(dev
);
974 if (!(ldev
&& __mlx5_lag_is_roce(ldev
)))
977 if (ldev
->tracker
.tx_type
== NETDEV_LAG_TX_TYPE_ACTIVEBACKUP
) {
978 ndev
= ldev
->tracker
.netdev_state
[MLX5_LAG_P1
].tx_enabled
?
979 ldev
->pf
[MLX5_LAG_P1
].netdev
:
980 ldev
->pf
[MLX5_LAG_P2
].netdev
;
982 ndev
= ldev
->pf
[MLX5_LAG_P1
].netdev
;
988 spin_unlock(&lag_lock
);
992 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev
);
994 u8
mlx5_lag_get_slave_port(struct mlx5_core_dev
*dev
,
995 struct net_device
*slave
)
997 struct mlx5_lag
*ldev
;
1000 spin_lock(&lag_lock
);
1001 ldev
= mlx5_lag_dev(dev
);
1002 if (!(ldev
&& __mlx5_lag_is_roce(ldev
)))
1005 if (ldev
->pf
[MLX5_LAG_P1
].netdev
== slave
)
1010 port
= ldev
->v2p_map
[port
];
1013 spin_unlock(&lag_lock
);
1016 EXPORT_SYMBOL(mlx5_lag_get_slave_port
);
1018 struct mlx5_core_dev
*mlx5_lag_get_peer_mdev(struct mlx5_core_dev
*dev
)
1020 struct mlx5_core_dev
*peer_dev
= NULL
;
1021 struct mlx5_lag
*ldev
;
1023 spin_lock(&lag_lock
);
1024 ldev
= mlx5_lag_dev(dev
);
1028 peer_dev
= ldev
->pf
[MLX5_LAG_P1
].dev
== dev
?
1029 ldev
->pf
[MLX5_LAG_P2
].dev
:
1030 ldev
->pf
[MLX5_LAG_P1
].dev
;
1033 spin_unlock(&lag_lock
);
1036 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev
);
1038 int mlx5_lag_query_cong_counters(struct mlx5_core_dev
*dev
,
1043 int outlen
= MLX5_ST_SZ_BYTES(query_cong_statistics_out
);
1044 struct mlx5_core_dev
*mdev
[MLX5_MAX_PORTS
];
1045 struct mlx5_lag
*ldev
;
1050 out
= kvzalloc(outlen
, GFP_KERNEL
);
1054 memset(values
, 0, sizeof(*values
) * num_counters
);
1056 spin_lock(&lag_lock
);
1057 ldev
= mlx5_lag_dev(dev
);
1058 if (ldev
&& __mlx5_lag_is_active(ldev
)) {
1059 num_ports
= MLX5_MAX_PORTS
;
1060 mdev
[MLX5_LAG_P1
] = ldev
->pf
[MLX5_LAG_P1
].dev
;
1061 mdev
[MLX5_LAG_P2
] = ldev
->pf
[MLX5_LAG_P2
].dev
;
1064 mdev
[MLX5_LAG_P1
] = dev
;
1066 spin_unlock(&lag_lock
);
1068 for (i
= 0; i
< num_ports
; ++i
) {
1069 u32 in
[MLX5_ST_SZ_DW(query_cong_statistics_in
)] = {};
1071 MLX5_SET(query_cong_statistics_in
, in
, opcode
,
1072 MLX5_CMD_OP_QUERY_CONG_STATISTICS
);
1073 ret
= mlx5_cmd_exec_inout(mdev
[i
], query_cong_statistics
, in
,
1078 for (j
= 0; j
< num_counters
; ++j
)
1079 values
[j
] += be64_to_cpup((__be64
*)(out
+ offsets
[j
]));
1086 EXPORT_SYMBOL(mlx5_lag_query_cong_counters
);