]> git.proxmox.com Git - mirror_ubuntu-kernels.git/blob - drivers/net/ethernet/mellanox/mlx5/core/lag.c
Merge tag 'for-5.16/block-2021-10-29' of git://git.kernel.dk/linux-block
[mirror_ubuntu-kernels.git] / drivers / net / ethernet / mellanox / mlx5 / core / lag.c
1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <linux/mlx5/driver.h>
35 #include <linux/mlx5/eswitch.h>
36 #include <linux/mlx5/vport.h>
37 #include "lib/devcom.h"
38 #include "mlx5_core.h"
39 #include "eswitch.h"
40 #include "lag.h"
41 #include "lag_mp.h"
42
43 /* General purpose, use for short periods of time.
44 * Beware of lock dependencies (preferably, no locks should be acquired
45 * under it).
46 */
47 static DEFINE_SPINLOCK(lag_lock);
48
49 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
50 u8 remap_port2, bool shared_fdb)
51 {
52 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
53 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
54
55 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
56
57 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
58 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
59 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
60
61 return mlx5_cmd_exec_in(dev, create_lag, in);
62 }
63
64 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
65 u8 remap_port2)
66 {
67 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
68 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
69
70 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
71 MLX5_SET(modify_lag_in, in, field_select, 0x1);
72
73 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
74 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
75
76 return mlx5_cmd_exec_in(dev, modify_lag, in);
77 }
78
79 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
80 {
81 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
82
83 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
84
85 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
86 }
87 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
88
89 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
90 {
91 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
92
93 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
94
95 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
96 }
97 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
98
99 static int mlx5_lag_netdev_event(struct notifier_block *this,
100 unsigned long event, void *ptr);
101 static void mlx5_do_bond_work(struct work_struct *work);
102
103 static void mlx5_ldev_free(struct kref *ref)
104 {
105 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
106
107 if (ldev->nb.notifier_call)
108 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
109 mlx5_lag_mp_cleanup(ldev);
110 cancel_delayed_work_sync(&ldev->bond_work);
111 destroy_workqueue(ldev->wq);
112 kfree(ldev);
113 }
114
115 static void mlx5_ldev_put(struct mlx5_lag *ldev)
116 {
117 kref_put(&ldev->ref, mlx5_ldev_free);
118 }
119
120 static void mlx5_ldev_get(struct mlx5_lag *ldev)
121 {
122 kref_get(&ldev->ref);
123 }
124
125 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
126 {
127 struct mlx5_lag *ldev;
128 int err;
129
130 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
131 if (!ldev)
132 return NULL;
133
134 ldev->wq = create_singlethread_workqueue("mlx5_lag");
135 if (!ldev->wq) {
136 kfree(ldev);
137 return NULL;
138 }
139
140 kref_init(&ldev->ref);
141 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
142
143 ldev->nb.notifier_call = mlx5_lag_netdev_event;
144 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
145 ldev->nb.notifier_call = NULL;
146 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
147 }
148
149 err = mlx5_lag_mp_init(ldev);
150 if (err)
151 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
152 err);
153
154 return ldev;
155 }
156
157 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
158 struct net_device *ndev)
159 {
160 int i;
161
162 for (i = 0; i < MLX5_MAX_PORTS; i++)
163 if (ldev->pf[i].netdev == ndev)
164 return i;
165
166 return -ENOENT;
167 }
168
169 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
170 {
171 return !!(ldev->flags & MLX5_LAG_FLAG_ROCE);
172 }
173
174 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
175 {
176 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV);
177 }
178
179 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
180 u8 *port1, u8 *port2)
181 {
182 bool p1en;
183 bool p2en;
184
185 p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled &&
186 tracker->netdev_state[MLX5_LAG_P1].link_up;
187
188 p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled &&
189 tracker->netdev_state[MLX5_LAG_P2].link_up;
190
191 *port1 = 1;
192 *port2 = 2;
193 if ((!p1en && !p2en) || (p1en && p2en))
194 return;
195
196 if (p1en)
197 *port2 = 1;
198 else
199 *port1 = 2;
200 }
201
202 void mlx5_modify_lag(struct mlx5_lag *ldev,
203 struct lag_tracker *tracker)
204 {
205 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
206 u8 v2p_port1, v2p_port2;
207 int err;
208
209 mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1,
210 &v2p_port2);
211
212 if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] ||
213 v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) {
214 ldev->v2p_map[MLX5_LAG_P1] = v2p_port1;
215 ldev->v2p_map[MLX5_LAG_P2] = v2p_port2;
216
217 mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d",
218 ldev->v2p_map[MLX5_LAG_P1],
219 ldev->v2p_map[MLX5_LAG_P2]);
220
221 err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2);
222 if (err)
223 mlx5_core_err(dev0,
224 "Failed to modify LAG (%d)\n",
225 err);
226 }
227 }
228
229 static int mlx5_create_lag(struct mlx5_lag *ldev,
230 struct lag_tracker *tracker,
231 bool shared_fdb)
232 {
233 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
234 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
235 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
236 int err;
237
238 mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1],
239 &ldev->v2p_map[MLX5_LAG_P2]);
240
241 mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d",
242 ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2],
243 shared_fdb);
244
245 err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1],
246 ldev->v2p_map[MLX5_LAG_P2], shared_fdb);
247 if (err) {
248 mlx5_core_err(dev0,
249 "Failed to create LAG (%d)\n",
250 err);
251 return err;
252 }
253
254 if (shared_fdb) {
255 err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch,
256 dev1->priv.eswitch);
257 if (err)
258 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
259 else
260 mlx5_core_info(dev0, "Operation mode is single FDB\n");
261 }
262
263 if (err) {
264 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
265 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
266 mlx5_core_err(dev0,
267 "Failed to deactivate RoCE LAG; driver restart required\n");
268 }
269
270 return err;
271 }
272
273 int mlx5_activate_lag(struct mlx5_lag *ldev,
274 struct lag_tracker *tracker,
275 u8 flags,
276 bool shared_fdb)
277 {
278 bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE);
279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
280 int err;
281
282 err = mlx5_create_lag(ldev, tracker, shared_fdb);
283 if (err) {
284 if (roce_lag) {
285 mlx5_core_err(dev0,
286 "Failed to activate RoCE LAG\n");
287 } else {
288 mlx5_core_err(dev0,
289 "Failed to activate VF LAG\n"
290 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
291 }
292 return err;
293 }
294
295 ldev->flags |= flags;
296 ldev->shared_fdb = shared_fdb;
297 return 0;
298 }
299
300 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
301 {
302 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
303 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
304 bool roce_lag = __mlx5_lag_is_roce(ldev);
305 int err;
306
307 ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
308 mlx5_lag_mp_reset(ldev);
309
310 if (ldev->shared_fdb) {
311 mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch,
312 ldev->pf[MLX5_LAG_P2].dev->priv.eswitch);
313 ldev->shared_fdb = false;
314 }
315
316 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
317 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
318 if (err) {
319 if (roce_lag) {
320 mlx5_core_err(dev0,
321 "Failed to deactivate RoCE LAG; driver restart required\n");
322 } else {
323 mlx5_core_err(dev0,
324 "Failed to deactivate VF LAG; driver restart required\n"
325 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
326 }
327 }
328
329 return err;
330 }
331
332 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
333 {
334 if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev)
335 return false;
336
337 #ifdef CONFIG_MLX5_ESWITCH
338 return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev,
339 ldev->pf[MLX5_LAG_P2].dev);
340 #else
341 return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) &&
342 !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev));
343 #endif
344 }
345
346 static void mlx5_lag_add_devices(struct mlx5_lag *ldev)
347 {
348 int i;
349
350 for (i = 0; i < MLX5_MAX_PORTS; i++) {
351 if (!ldev->pf[i].dev)
352 continue;
353
354 if (ldev->pf[i].dev->priv.flags &
355 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
356 continue;
357
358 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
359 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
360 }
361 }
362
363 static void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
364 {
365 int i;
366
367 for (i = 0; i < MLX5_MAX_PORTS; i++) {
368 if (!ldev->pf[i].dev)
369 continue;
370
371 if (ldev->pf[i].dev->priv.flags &
372 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
373 continue;
374
375 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
376 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
377 }
378 }
379
380 static void mlx5_disable_lag(struct mlx5_lag *ldev)
381 {
382 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
383 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
384 bool shared_fdb = ldev->shared_fdb;
385 bool roce_lag;
386 int err;
387
388 roce_lag = __mlx5_lag_is_roce(ldev);
389
390 if (shared_fdb) {
391 mlx5_lag_remove_devices(ldev);
392 } else if (roce_lag) {
393 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
394 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
395 mlx5_rescan_drivers_locked(dev0);
396 }
397 mlx5_nic_vport_disable_roce(dev1);
398 }
399
400 err = mlx5_deactivate_lag(ldev);
401 if (err)
402 return;
403
404 if (shared_fdb || roce_lag)
405 mlx5_lag_add_devices(ldev);
406
407 if (shared_fdb) {
408 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
409 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
410 if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
411 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
412 }
413 }
414
415 static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev)
416 {
417 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
418 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
419
420 if (is_mdev_switchdev_mode(dev0) &&
421 is_mdev_switchdev_mode(dev1) &&
422 mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) &&
423 mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) &&
424 mlx5_devcom_is_paired(dev0->priv.devcom,
425 MLX5_DEVCOM_ESW_OFFLOADS) &&
426 MLX5_CAP_GEN(dev1, lag_native_fdb_selection) &&
427 MLX5_CAP_ESW(dev1, root_ft_on_other_esw) &&
428 MLX5_CAP_ESW(dev0, esw_shared_ingress_acl))
429 return true;
430
431 return false;
432 }
433
434 static void mlx5_do_bond(struct mlx5_lag *ldev)
435 {
436 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
437 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
438 struct lag_tracker tracker;
439 bool do_bond, roce_lag;
440 int err;
441
442 if (!mlx5_lag_is_ready(ldev)) {
443 do_bond = false;
444 } else {
445 /* VF LAG is in multipath mode, ignore bond change requests */
446 if (mlx5_lag_is_multipath(dev0))
447 return;
448
449 tracker = ldev->tracker;
450
451 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
452 }
453
454 if (do_bond && !__mlx5_lag_is_active(ldev)) {
455 bool shared_fdb = mlx5_shared_fdb_supported(ldev);
456
457 roce_lag = !mlx5_sriov_is_enabled(dev0) &&
458 !mlx5_sriov_is_enabled(dev1);
459
460 #ifdef CONFIG_MLX5_ESWITCH
461 roce_lag = roce_lag &&
462 dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE &&
463 dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE;
464 #endif
465
466 if (shared_fdb || roce_lag)
467 mlx5_lag_remove_devices(ldev);
468
469 err = mlx5_activate_lag(ldev, &tracker,
470 roce_lag ? MLX5_LAG_FLAG_ROCE :
471 MLX5_LAG_FLAG_SRIOV,
472 shared_fdb);
473 if (err) {
474 if (shared_fdb || roce_lag)
475 mlx5_lag_add_devices(ldev);
476
477 return;
478 } else if (roce_lag) {
479 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
480 mlx5_rescan_drivers_locked(dev0);
481 mlx5_nic_vport_enable_roce(dev1);
482 } else if (shared_fdb) {
483 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
484 mlx5_rescan_drivers_locked(dev0);
485
486 err = mlx5_eswitch_reload_reps(dev0->priv.eswitch);
487 if (!err)
488 err = mlx5_eswitch_reload_reps(dev1->priv.eswitch);
489
490 if (err) {
491 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
492 mlx5_rescan_drivers_locked(dev0);
493 mlx5_deactivate_lag(ldev);
494 mlx5_lag_add_devices(ldev);
495 mlx5_eswitch_reload_reps(dev0->priv.eswitch);
496 mlx5_eswitch_reload_reps(dev1->priv.eswitch);
497 mlx5_core_err(dev0, "Failed to enable lag\n");
498 return;
499 }
500 }
501 } else if (do_bond && __mlx5_lag_is_active(ldev)) {
502 mlx5_modify_lag(ldev, &tracker);
503 } else if (!do_bond && __mlx5_lag_is_active(ldev)) {
504 mlx5_disable_lag(ldev);
505 }
506 }
507
508 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
509 {
510 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
511 }
512
513 static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0,
514 struct mlx5_core_dev *dev1)
515 {
516 if (dev0)
517 mlx5_esw_lock(dev0->priv.eswitch);
518 if (dev1)
519 mlx5_esw_lock(dev1->priv.eswitch);
520 }
521
522 static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0,
523 struct mlx5_core_dev *dev1)
524 {
525 if (dev1)
526 mlx5_esw_unlock(dev1->priv.eswitch);
527 if (dev0)
528 mlx5_esw_unlock(dev0->priv.eswitch);
529 }
530
531 static void mlx5_do_bond_work(struct work_struct *work)
532 {
533 struct delayed_work *delayed_work = to_delayed_work(work);
534 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
535 bond_work);
536 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
537 struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
538 int status;
539
540 status = mlx5_dev_list_trylock();
541 if (!status) {
542 mlx5_queue_bond_work(ldev, HZ);
543 return;
544 }
545
546 if (ldev->mode_changes_in_progress) {
547 mlx5_dev_list_unlock();
548 mlx5_queue_bond_work(ldev, HZ);
549 return;
550 }
551
552 mlx5_lag_lock_eswitches(dev0, dev1);
553 mlx5_do_bond(ldev);
554 mlx5_lag_unlock_eswitches(dev0, dev1);
555 mlx5_dev_list_unlock();
556 }
557
558 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
559 struct lag_tracker *tracker,
560 struct net_device *ndev,
561 struct netdev_notifier_changeupper_info *info)
562 {
563 struct net_device *upper = info->upper_dev, *ndev_tmp;
564 struct netdev_lag_upper_info *lag_upper_info = NULL;
565 bool is_bonded, is_in_lag, mode_supported;
566 int bond_status = 0;
567 int num_slaves = 0;
568 int idx;
569
570 if (!netif_is_lag_master(upper))
571 return 0;
572
573 if (info->linking)
574 lag_upper_info = info->upper_info;
575
576 /* The event may still be of interest if the slave does not belong to
577 * us, but is enslaved to a master which has one or more of our netdevs
578 * as slaves (e.g., if a new slave is added to a master that bonds two
579 * of our netdevs, we should unbond).
580 */
581 rcu_read_lock();
582 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
583 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
584 if (idx >= 0)
585 bond_status |= (1 << idx);
586
587 num_slaves++;
588 }
589 rcu_read_unlock();
590
591 /* None of this lagdev's netdevs are slaves of this master. */
592 if (!(bond_status & 0x3))
593 return 0;
594
595 if (lag_upper_info)
596 tracker->tx_type = lag_upper_info->tx_type;
597
598 /* Determine bonding status:
599 * A device is considered bonded if both its physical ports are slaves
600 * of the same lag master, and only them.
601 */
602 is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3;
603
604 if (!mlx5_lag_is_ready(ldev) && is_in_lag) {
605 NL_SET_ERR_MSG_MOD(info->info.extack,
606 "Can't activate LAG offload, PF is configured with more than 64 VFs");
607 return 0;
608 }
609
610 /* Lag mode must be activebackup or hash. */
611 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
612 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
613
614 if (is_in_lag && !mode_supported)
615 NL_SET_ERR_MSG_MOD(info->info.extack,
616 "Can't activate LAG offload, TX type isn't supported");
617
618 is_bonded = is_in_lag && mode_supported;
619 if (tracker->is_bonded != is_bonded) {
620 tracker->is_bonded = is_bonded;
621 return 1;
622 }
623
624 return 0;
625 }
626
627 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
628 struct lag_tracker *tracker,
629 struct net_device *ndev,
630 struct netdev_notifier_changelowerstate_info *info)
631 {
632 struct netdev_lag_lower_state_info *lag_lower_info;
633 int idx;
634
635 if (!netif_is_lag_port(ndev))
636 return 0;
637
638 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
639 if (idx < 0)
640 return 0;
641
642 /* This information is used to determine virtual to physical
643 * port mapping.
644 */
645 lag_lower_info = info->lower_state_info;
646 if (!lag_lower_info)
647 return 0;
648
649 tracker->netdev_state[idx] = *lag_lower_info;
650
651 return 1;
652 }
653
654 static int mlx5_lag_netdev_event(struct notifier_block *this,
655 unsigned long event, void *ptr)
656 {
657 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
658 struct lag_tracker tracker;
659 struct mlx5_lag *ldev;
660 int changed = 0;
661
662 if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE))
663 return NOTIFY_DONE;
664
665 ldev = container_of(this, struct mlx5_lag, nb);
666
667 if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE)
668 return NOTIFY_DONE;
669
670 tracker = ldev->tracker;
671
672 switch (event) {
673 case NETDEV_CHANGEUPPER:
674 changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev,
675 ptr);
676 break;
677 case NETDEV_CHANGELOWERSTATE:
678 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
679 ndev, ptr);
680 break;
681 }
682
683 ldev->tracker = tracker;
684
685 if (changed)
686 mlx5_queue_bond_work(ldev, 0);
687
688 return NOTIFY_DONE;
689 }
690
691 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
692 struct mlx5_core_dev *dev,
693 struct net_device *netdev)
694 {
695 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
696
697 if (fn >= MLX5_MAX_PORTS)
698 return;
699
700 spin_lock(&lag_lock);
701 ldev->pf[fn].netdev = netdev;
702 ldev->tracker.netdev_state[fn].link_up = 0;
703 ldev->tracker.netdev_state[fn].tx_enabled = 0;
704 spin_unlock(&lag_lock);
705 }
706
707 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
708 struct net_device *netdev)
709 {
710 int i;
711
712 spin_lock(&lag_lock);
713 for (i = 0; i < MLX5_MAX_PORTS; i++) {
714 if (ldev->pf[i].netdev == netdev) {
715 ldev->pf[i].netdev = NULL;
716 break;
717 }
718 }
719 spin_unlock(&lag_lock);
720 }
721
722 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
723 struct mlx5_core_dev *dev)
724 {
725 unsigned int fn = PCI_FUNC(dev->pdev->devfn);
726
727 if (fn >= MLX5_MAX_PORTS)
728 return;
729
730 ldev->pf[fn].dev = dev;
731 dev->priv.lag = ldev;
732 }
733
734 /* Must be called with intf_mutex held */
735 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
736 struct mlx5_core_dev *dev)
737 {
738 int i;
739
740 for (i = 0; i < MLX5_MAX_PORTS; i++)
741 if (ldev->pf[i].dev == dev)
742 break;
743
744 if (i == MLX5_MAX_PORTS)
745 return;
746
747 ldev->pf[i].dev = NULL;
748 dev->priv.lag = NULL;
749 }
750
751 /* Must be called with intf_mutex held */
752 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
753 {
754 struct mlx5_lag *ldev = NULL;
755 struct mlx5_core_dev *tmp_dev;
756
757 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
758 !MLX5_CAP_GEN(dev, lag_master) ||
759 MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)
760 return 0;
761
762 tmp_dev = mlx5_get_next_phys_dev(dev);
763 if (tmp_dev)
764 ldev = tmp_dev->priv.lag;
765
766 if (!ldev) {
767 ldev = mlx5_lag_dev_alloc(dev);
768 if (!ldev) {
769 mlx5_core_err(dev, "Failed to alloc lag dev\n");
770 return 0;
771 }
772 } else {
773 if (ldev->mode_changes_in_progress)
774 return -EAGAIN;
775 mlx5_ldev_get(ldev);
776 }
777
778 mlx5_ldev_add_mdev(ldev, dev);
779
780 return 0;
781 }
782
783 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
784 {
785 struct mlx5_lag *ldev;
786
787 ldev = mlx5_lag_dev(dev);
788 if (!ldev)
789 return;
790
791 recheck:
792 mlx5_dev_list_lock();
793 if (ldev->mode_changes_in_progress) {
794 mlx5_dev_list_unlock();
795 msleep(100);
796 goto recheck;
797 }
798 mlx5_ldev_remove_mdev(ldev, dev);
799 mlx5_dev_list_unlock();
800 mlx5_ldev_put(ldev);
801 }
802
803 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
804 {
805 int err;
806
807 recheck:
808 mlx5_dev_list_lock();
809 err = __mlx5_lag_dev_add_mdev(dev);
810 if (err) {
811 mlx5_dev_list_unlock();
812 msleep(100);
813 goto recheck;
814 }
815 mlx5_dev_list_unlock();
816 }
817
818 /* Must be called with intf_mutex held */
819 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
820 struct net_device *netdev)
821 {
822 struct mlx5_lag *ldev;
823
824 ldev = mlx5_lag_dev(dev);
825 if (!ldev)
826 return;
827
828 mlx5_ldev_remove_netdev(ldev, netdev);
829 ldev->flags &= ~MLX5_LAG_FLAG_READY;
830
831 if (__mlx5_lag_is_active(ldev))
832 mlx5_queue_bond_work(ldev, 0);
833 }
834
835 /* Must be called with intf_mutex held */
836 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
837 struct net_device *netdev)
838 {
839 struct mlx5_lag *ldev;
840 int i;
841
842 ldev = mlx5_lag_dev(dev);
843 if (!ldev)
844 return;
845
846 mlx5_ldev_add_netdev(ldev, dev, netdev);
847
848 for (i = 0; i < MLX5_MAX_PORTS; i++)
849 if (!ldev->pf[i].dev)
850 break;
851
852 if (i >= MLX5_MAX_PORTS)
853 ldev->flags |= MLX5_LAG_FLAG_READY;
854 mlx5_queue_bond_work(ldev, 0);
855 }
856
857 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
858 {
859 struct mlx5_lag *ldev;
860 bool res;
861
862 spin_lock(&lag_lock);
863 ldev = mlx5_lag_dev(dev);
864 res = ldev && __mlx5_lag_is_roce(ldev);
865 spin_unlock(&lag_lock);
866
867 return res;
868 }
869 EXPORT_SYMBOL(mlx5_lag_is_roce);
870
871 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
872 {
873 struct mlx5_lag *ldev;
874 bool res;
875
876 spin_lock(&lag_lock);
877 ldev = mlx5_lag_dev(dev);
878 res = ldev && __mlx5_lag_is_active(ldev);
879 spin_unlock(&lag_lock);
880
881 return res;
882 }
883 EXPORT_SYMBOL(mlx5_lag_is_active);
884
885 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
886 {
887 struct mlx5_lag *ldev;
888 bool res;
889
890 spin_lock(&lag_lock);
891 ldev = mlx5_lag_dev(dev);
892 res = ldev && __mlx5_lag_is_active(ldev) &&
893 dev == ldev->pf[MLX5_LAG_P1].dev;
894 spin_unlock(&lag_lock);
895
896 return res;
897 }
898 EXPORT_SYMBOL(mlx5_lag_is_master);
899
900 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
901 {
902 struct mlx5_lag *ldev;
903 bool res;
904
905 spin_lock(&lag_lock);
906 ldev = mlx5_lag_dev(dev);
907 res = ldev && __mlx5_lag_is_sriov(ldev);
908 spin_unlock(&lag_lock);
909
910 return res;
911 }
912 EXPORT_SYMBOL(mlx5_lag_is_sriov);
913
914 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
915 {
916 struct mlx5_lag *ldev;
917 bool res;
918
919 spin_lock(&lag_lock);
920 ldev = mlx5_lag_dev(dev);
921 res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb;
922 spin_unlock(&lag_lock);
923
924 return res;
925 }
926 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
927
928 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
929 {
930 struct mlx5_core_dev *dev0;
931 struct mlx5_core_dev *dev1;
932 struct mlx5_lag *ldev;
933
934 ldev = mlx5_lag_dev(dev);
935 if (!ldev)
936 return;
937
938 mlx5_dev_list_lock();
939
940 dev0 = ldev->pf[MLX5_LAG_P1].dev;
941 dev1 = ldev->pf[MLX5_LAG_P2].dev;
942
943 ldev->mode_changes_in_progress++;
944 if (__mlx5_lag_is_active(ldev)) {
945 mlx5_lag_lock_eswitches(dev0, dev1);
946 mlx5_disable_lag(ldev);
947 mlx5_lag_unlock_eswitches(dev0, dev1);
948 }
949 mlx5_dev_list_unlock();
950 }
951
952 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
953 {
954 struct mlx5_lag *ldev;
955
956 ldev = mlx5_lag_dev(dev);
957 if (!ldev)
958 return;
959
960 mlx5_dev_list_lock();
961 ldev->mode_changes_in_progress--;
962 mlx5_dev_list_unlock();
963 mlx5_queue_bond_work(ldev, 0);
964 }
965
966 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
967 {
968 struct net_device *ndev = NULL;
969 struct mlx5_lag *ldev;
970
971 spin_lock(&lag_lock);
972 ldev = mlx5_lag_dev(dev);
973
974 if (!(ldev && __mlx5_lag_is_roce(ldev)))
975 goto unlock;
976
977 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
978 ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ?
979 ldev->pf[MLX5_LAG_P1].netdev :
980 ldev->pf[MLX5_LAG_P2].netdev;
981 } else {
982 ndev = ldev->pf[MLX5_LAG_P1].netdev;
983 }
984 if (ndev)
985 dev_hold(ndev);
986
987 unlock:
988 spin_unlock(&lag_lock);
989
990 return ndev;
991 }
992 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
993
994 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
995 struct net_device *slave)
996 {
997 struct mlx5_lag *ldev;
998 u8 port = 0;
999
1000 spin_lock(&lag_lock);
1001 ldev = mlx5_lag_dev(dev);
1002 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1003 goto unlock;
1004
1005 if (ldev->pf[MLX5_LAG_P1].netdev == slave)
1006 port = MLX5_LAG_P1;
1007 else
1008 port = MLX5_LAG_P2;
1009
1010 port = ldev->v2p_map[port];
1011
1012 unlock:
1013 spin_unlock(&lag_lock);
1014 return port;
1015 }
1016 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1017
1018 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev)
1019 {
1020 struct mlx5_core_dev *peer_dev = NULL;
1021 struct mlx5_lag *ldev;
1022
1023 spin_lock(&lag_lock);
1024 ldev = mlx5_lag_dev(dev);
1025 if (!ldev)
1026 goto unlock;
1027
1028 peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ?
1029 ldev->pf[MLX5_LAG_P2].dev :
1030 ldev->pf[MLX5_LAG_P1].dev;
1031
1032 unlock:
1033 spin_unlock(&lag_lock);
1034 return peer_dev;
1035 }
1036 EXPORT_SYMBOL(mlx5_lag_get_peer_mdev);
1037
1038 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1039 u64 *values,
1040 int num_counters,
1041 size_t *offsets)
1042 {
1043 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1044 struct mlx5_core_dev *mdev[MLX5_MAX_PORTS];
1045 struct mlx5_lag *ldev;
1046 int num_ports;
1047 int ret, i, j;
1048 void *out;
1049
1050 out = kvzalloc(outlen, GFP_KERNEL);
1051 if (!out)
1052 return -ENOMEM;
1053
1054 memset(values, 0, sizeof(*values) * num_counters);
1055
1056 spin_lock(&lag_lock);
1057 ldev = mlx5_lag_dev(dev);
1058 if (ldev && __mlx5_lag_is_active(ldev)) {
1059 num_ports = MLX5_MAX_PORTS;
1060 mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev;
1061 mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev;
1062 } else {
1063 num_ports = 1;
1064 mdev[MLX5_LAG_P1] = dev;
1065 }
1066 spin_unlock(&lag_lock);
1067
1068 for (i = 0; i < num_ports; ++i) {
1069 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1070
1071 MLX5_SET(query_cong_statistics_in, in, opcode,
1072 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1073 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1074 out);
1075 if (ret)
1076 goto free;
1077
1078 for (j = 0; j < num_counters; ++j)
1079 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1080 }
1081
1082 free:
1083 kvfree(out);
1084 return ret;
1085 }
1086 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);