]>
Commit | Line | Data |
---|---|---|
7907f23a AH |
1 | /* |
2 | * Copyright (c) 2016, Mellanox Technologies. All rights reserved. | |
3 | * | |
4 | * This software is available to you under a choice of one of two | |
5 | * licenses. You may choose to be licensed under the terms of the GNU | |
6 | * General Public License (GPL) Version 2, available from the file | |
7 | * COPYING in the main directory of this source tree, or the | |
8 | * OpenIB.org BSD license below: | |
9 | * | |
10 | * Redistribution and use in source and binary forms, with or | |
11 | * without modification, are permitted provided that the following | |
12 | * conditions are met: | |
13 | * | |
14 | * - Redistributions of source code must retain the above | |
15 | * copyright notice, this list of conditions and the following | |
16 | * disclaimer. | |
17 | * | |
18 | * - Redistributions in binary form must reproduce the above | |
19 | * copyright notice, this list of conditions and the following | |
20 | * disclaimer in the documentation and/or other materials | |
21 | * provided with the distribution. | |
22 | * | |
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
30 | * SOFTWARE. | |
31 | */ | |
32 | ||
33 | #include <linux/netdevice.h> | |
34 | #include <linux/mlx5/driver.h> | |
35 | #include <linux/mlx5/vport.h> | |
36 | #include "mlx5_core.h" | |
3b5ff59f | 37 | #include "eswitch.h" |
10a193ed | 38 | #include "lag.h" |
544fe7c2 | 39 | #include "lag_mp.h" |
7907f23a AH |
40 | |
41 | /* General purpose, use for short periods of time. | |
42 | * Beware of lock dependencies (preferably, no locks should be acquired | |
43 | * under it). | |
44 | */ | |
45 | static DEFINE_MUTEX(lag_mutex); | |
46 | ||
47 | static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, | |
48 | u8 remap_port2) | |
49 | { | |
50 | u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0}; | |
51 | u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0}; | |
52 | void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); | |
53 | ||
54 | MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); | |
55 | ||
56 | MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); | |
57 | MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); | |
58 | ||
59 | return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); | |
60 | } | |
61 | ||
62 | static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, | |
63 | u8 remap_port2) | |
64 | { | |
65 | u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0}; | |
66 | u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0}; | |
67 | void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); | |
68 | ||
69 | MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); | |
70 | MLX5_SET(modify_lag_in, in, field_select, 0x1); | |
71 | ||
72 | MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); | |
73 | MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); | |
74 | ||
75 | return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); | |
76 | } | |
77 | ||
78 | static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev) | |
79 | { | |
80 | u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0}; | |
81 | u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0}; | |
82 | ||
83 | MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); | |
84 | ||
85 | return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); | |
86 | } | |
87 | ||
3bc34f3b AH |
88 | int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) |
89 | { | |
90 | u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0}; | |
91 | u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0}; | |
92 | ||
93 | MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); | |
94 | ||
95 | return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); | |
96 | } | |
97 | EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); | |
98 | ||
99 | int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) | |
100 | { | |
101 | u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0}; | |
102 | u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0}; | |
103 | ||
104 | MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); | |
105 | ||
106 | return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); | |
107 | } | |
108 | EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); | |
109 | ||
71a0ff65 MD |
110 | static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev, |
111 | bool reset, void *out, int out_size) | |
112 | { | |
113 | u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { }; | |
114 | ||
115 | MLX5_SET(query_cong_statistics_in, in, opcode, | |
116 | MLX5_CMD_OP_QUERY_CONG_STATISTICS); | |
117 | MLX5_SET(query_cong_statistics_in, in, clear, reset); | |
118 | return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); | |
119 | } | |
120 | ||
10a193ed RD |
121 | int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, |
122 | struct net_device *ndev) | |
7907f23a AH |
123 | { |
124 | int i; | |
125 | ||
126 | for (i = 0; i < MLX5_MAX_PORTS; i++) | |
127 | if (ldev->pf[i].netdev == ndev) | |
128 | return i; | |
129 | ||
130 | return -1; | |
131 | } | |
132 | ||
7c34ec19 AH |
133 | static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) |
134 | { | |
135 | return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); | |
136 | } | |
137 | ||
138 | static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) | |
139 | { | |
140 | return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); | |
141 | } | |
142 | ||
7907f23a AH |
143 | static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, |
144 | u8 *port1, u8 *port2) | |
145 | { | |
dc798b4c AH |
146 | *port1 = 1; |
147 | *port2 = 2; | |
84d2dbb0 EA |
148 | if (!tracker->netdev_state[MLX5_LAG_P1].tx_enabled || |
149 | !tracker->netdev_state[MLX5_LAG_P1].link_up) { | |
dc798b4c AH |
150 | *port1 = 2; |
151 | return; | |
7907f23a | 152 | } |
dc798b4c | 153 | |
84d2dbb0 EA |
154 | if (!tracker->netdev_state[MLX5_LAG_P2].tx_enabled || |
155 | !tracker->netdev_state[MLX5_LAG_P2].link_up) | |
dc798b4c | 156 | *port2 = 1; |
7907f23a AH |
157 | } |
158 | ||
10a193ed RD |
159 | void mlx5_modify_lag(struct mlx5_lag *ldev, |
160 | struct lag_tracker *tracker) | |
4c283e61 | 161 | { |
84d2dbb0 | 162 | struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; |
4c283e61 SK |
163 | u8 v2p_port1, v2p_port2; |
164 | int err; | |
165 | ||
166 | mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, | |
167 | &v2p_port2); | |
168 | ||
84d2dbb0 EA |
169 | if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || |
170 | v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { | |
171 | ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; | |
172 | ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; | |
4c283e61 SK |
173 | |
174 | mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", | |
84d2dbb0 EA |
175 | ldev->v2p_map[MLX5_LAG_P1], |
176 | ldev->v2p_map[MLX5_LAG_P2]); | |
4c283e61 SK |
177 | |
178 | err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); | |
179 | if (err) | |
180 | mlx5_core_err(dev0, | |
181 | "Failed to modify LAG (%d)\n", | |
182 | err); | |
183 | } | |
184 | } | |
185 | ||
8252cf72 RD |
186 | static int mlx5_create_lag(struct mlx5_lag *ldev, |
187 | struct lag_tracker *tracker) | |
7907f23a | 188 | { |
84d2dbb0 | 189 | struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; |
7907f23a AH |
190 | int err; |
191 | ||
84d2dbb0 EA |
192 | mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], |
193 | &ldev->v2p_map[MLX5_LAG_P2]); | |
7907f23a | 194 | |
3cfe432e | 195 | mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", |
84d2dbb0 | 196 | ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); |
3cfe432e | 197 | |
84d2dbb0 EA |
198 | err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], |
199 | ldev->v2p_map[MLX5_LAG_P2]); | |
7907f23a AH |
200 | if (err) |
201 | mlx5_core_err(dev0, | |
202 | "Failed to create LAG (%d)\n", | |
203 | err); | |
8252cf72 RD |
204 | return err; |
205 | } | |
206 | ||
10a193ed RD |
207 | int mlx5_activate_lag(struct mlx5_lag *ldev, |
208 | struct lag_tracker *tracker, | |
209 | u8 flags) | |
8252cf72 | 210 | { |
95824666 | 211 | bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); |
84d2dbb0 | 212 | struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; |
95824666 AH |
213 | int err; |
214 | ||
215 | err = mlx5_create_lag(ldev, tracker); | |
216 | if (err) { | |
217 | if (roce_lag) { | |
218 | mlx5_core_err(dev0, | |
219 | "Failed to activate RoCE LAG\n"); | |
220 | } else { | |
221 | mlx5_core_err(dev0, | |
222 | "Failed to activate VF LAG\n" | |
223 | "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); | |
224 | } | |
225 | return err; | |
226 | } | |
227 | ||
7c34ec19 | 228 | ldev->flags |= flags; |
95824666 | 229 | return 0; |
7907f23a AH |
230 | } |
231 | ||
95824666 | 232 | static int mlx5_deactivate_lag(struct mlx5_lag *ldev) |
7907f23a | 233 | { |
84d2dbb0 | 234 | struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; |
95824666 | 235 | bool roce_lag = __mlx5_lag_is_roce(ldev); |
7907f23a AH |
236 | int err; |
237 | ||
7c34ec19 | 238 | ldev->flags &= ~MLX5_LAG_MODE_FLAGS; |
7907f23a AH |
239 | |
240 | err = mlx5_cmd_destroy_lag(dev0); | |
95824666 AH |
241 | if (err) { |
242 | if (roce_lag) { | |
243 | mlx5_core_err(dev0, | |
244 | "Failed to deactivate RoCE LAG; driver restart required\n"); | |
245 | } else { | |
246 | mlx5_core_err(dev0, | |
247 | "Failed to deactivate VF LAG; driver restart required\n" | |
248 | "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); | |
249 | } | |
250 | } | |
251 | ||
252 | return err; | |
7907f23a AH |
253 | } |
254 | ||
eff849b2 RL |
255 | static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) |
256 | { | |
84d2dbb0 | 257 | if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) |
eff849b2 | 258 | return false; |
a6491744 AH |
259 | |
260 | #ifdef CONFIG_MLX5_ESWITCH | |
84d2dbb0 EA |
261 | return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, |
262 | ldev->pf[MLX5_LAG_P2].dev); | |
a6491744 | 263 | #else |
84d2dbb0 EA |
264 | return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && |
265 | !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); | |
a6491744 | 266 | #endif |
eff849b2 RL |
267 | } |
268 | ||
95824666 AH |
269 | static void mlx5_lag_add_ib_devices(struct mlx5_lag *ldev) |
270 | { | |
271 | int i; | |
272 | ||
273 | for (i = 0; i < MLX5_MAX_PORTS; i++) | |
274 | if (ldev->pf[i].dev) | |
275 | mlx5_add_dev_by_protocol(ldev->pf[i].dev, | |
276 | MLX5_INTERFACE_PROTOCOL_IB); | |
277 | } | |
278 | ||
279 | static void mlx5_lag_remove_ib_devices(struct mlx5_lag *ldev) | |
280 | { | |
281 | int i; | |
282 | ||
283 | for (i = 0; i < MLX5_MAX_PORTS; i++) | |
284 | if (ldev->pf[i].dev) | |
285 | mlx5_remove_dev_by_protocol(ldev->pf[i].dev, | |
286 | MLX5_INTERFACE_PROTOCOL_IB); | |
287 | } | |
288 | ||
7907f23a AH |
289 | static void mlx5_do_bond(struct mlx5_lag *ldev) |
290 | { | |
84d2dbb0 EA |
291 | struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; |
292 | struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; | |
7907f23a | 293 | struct lag_tracker tracker; |
7c34ec19 | 294 | bool do_bond, roce_lag; |
95824666 | 295 | int err; |
7907f23a AH |
296 | |
297 | if (!dev0 || !dev1) | |
298 | return; | |
299 | ||
300 | mutex_lock(&lag_mutex); | |
301 | tracker = ldev->tracker; | |
302 | mutex_unlock(&lag_mutex); | |
303 | ||
eff849b2 | 304 | do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); |
edb31b16 | 305 | |
292612d6 | 306 | if (do_bond && !__mlx5_lag_is_active(ldev)) { |
7c34ec19 AH |
307 | roce_lag = !mlx5_sriov_is_enabled(dev0) && |
308 | !mlx5_sriov_is_enabled(dev1); | |
309 | ||
86b39a66 | 310 | #ifdef CONFIG_MLX5_ESWITCH |
f6455de0 BW |
311 | roce_lag &= dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && |
312 | dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; | |
86b39a66 BW |
313 | #endif |
314 | ||
7c34ec19 | 315 | if (roce_lag) |
95824666 AH |
316 | mlx5_lag_remove_ib_devices(ldev); |
317 | ||
318 | err = mlx5_activate_lag(ldev, &tracker, | |
319 | roce_lag ? MLX5_LAG_FLAG_ROCE : | |
320 | MLX5_LAG_FLAG_SRIOV); | |
321 | if (err) { | |
322 | if (roce_lag) | |
323 | mlx5_lag_add_ib_devices(ldev); | |
7907f23a | 324 | |
95824666 AH |
325 | return; |
326 | } | |
7907f23a | 327 | |
7c34ec19 | 328 | if (roce_lag) { |
3b5ff59f RL |
329 | mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); |
330 | mlx5_nic_vport_enable_roce(dev1); | |
331 | } | |
292612d6 | 332 | } else if (do_bond && __mlx5_lag_is_active(ldev)) { |
4c283e61 | 333 | mlx5_modify_lag(ldev, &tracker); |
292612d6 | 334 | } else if (!do_bond && __mlx5_lag_is_active(ldev)) { |
7c34ec19 AH |
335 | roce_lag = __mlx5_lag_is_roce(ldev); |
336 | ||
337 | if (roce_lag) { | |
3b5ff59f RL |
338 | mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); |
339 | mlx5_nic_vport_disable_roce(dev1); | |
340 | } | |
7907f23a | 341 | |
95824666 AH |
342 | err = mlx5_deactivate_lag(ldev); |
343 | if (err) | |
344 | return; | |
7907f23a | 345 | |
7c34ec19 | 346 | if (roce_lag) |
95824666 | 347 | mlx5_lag_add_ib_devices(ldev); |
7907f23a AH |
348 | } |
349 | } | |
350 | ||
351 | static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) | |
352 | { | |
e6ee5e71 | 353 | queue_delayed_work(ldev->wq, &ldev->bond_work, delay); |
7907f23a AH |
354 | } |
355 | ||
356 | static void mlx5_do_bond_work(struct work_struct *work) | |
357 | { | |
358 | struct delayed_work *delayed_work = to_delayed_work(work); | |
359 | struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, | |
360 | bond_work); | |
361 | int status; | |
362 | ||
f1ee87fe | 363 | status = mlx5_dev_list_trylock(); |
7907f23a AH |
364 | if (!status) { |
365 | /* 1 sec delay. */ | |
366 | mlx5_queue_bond_work(ldev, HZ); | |
367 | return; | |
368 | } | |
369 | ||
370 | mlx5_do_bond(ldev); | |
f1ee87fe | 371 | mlx5_dev_list_unlock(); |
7907f23a AH |
372 | } |
373 | ||
374 | static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, | |
375 | struct lag_tracker *tracker, | |
376 | struct net_device *ndev, | |
377 | struct netdev_notifier_changeupper_info *info) | |
378 | { | |
379 | struct net_device *upper = info->upper_dev, *ndev_tmp; | |
e497ec68 | 380 | struct netdev_lag_upper_info *lag_upper_info = NULL; |
7907f23a AH |
381 | bool is_bonded; |
382 | int bond_status = 0; | |
383 | int num_slaves = 0; | |
384 | int idx; | |
385 | ||
386 | if (!netif_is_lag_master(upper)) | |
387 | return 0; | |
388 | ||
e497ec68 TB |
389 | if (info->linking) |
390 | lag_upper_info = info->upper_info; | |
7907f23a AH |
391 | |
392 | /* The event may still be of interest if the slave does not belong to | |
393 | * us, but is enslaved to a master which has one or more of our netdevs | |
394 | * as slaves (e.g., if a new slave is added to a master that bonds two | |
395 | * of our netdevs, we should unbond). | |
396 | */ | |
397 | rcu_read_lock(); | |
398 | for_each_netdev_in_bond_rcu(upper, ndev_tmp) { | |
399 | idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); | |
400 | if (idx > -1) | |
401 | bond_status |= (1 << idx); | |
402 | ||
403 | num_slaves++; | |
404 | } | |
405 | rcu_read_unlock(); | |
406 | ||
407 | /* None of this lagdev's netdevs are slaves of this master. */ | |
408 | if (!(bond_status & 0x3)) | |
409 | return 0; | |
410 | ||
411 | if (lag_upper_info) | |
412 | tracker->tx_type = lag_upper_info->tx_type; | |
413 | ||
414 | /* Determine bonding status: | |
415 | * A device is considered bonded if both its physical ports are slaves | |
416 | * of the same lag master, and only them. | |
417 | * Lag mode must be activebackup or hash. | |
418 | */ | |
419 | is_bonded = (num_slaves == MLX5_MAX_PORTS) && | |
420 | (bond_status == 0x3) && | |
421 | ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || | |
422 | (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); | |
423 | ||
424 | if (tracker->is_bonded != is_bonded) { | |
425 | tracker->is_bonded = is_bonded; | |
426 | return 1; | |
427 | } | |
428 | ||
429 | return 0; | |
430 | } | |
431 | ||
432 | static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, | |
433 | struct lag_tracker *tracker, | |
434 | struct net_device *ndev, | |
435 | struct netdev_notifier_changelowerstate_info *info) | |
436 | { | |
437 | struct netdev_lag_lower_state_info *lag_lower_info; | |
438 | int idx; | |
439 | ||
440 | if (!netif_is_lag_port(ndev)) | |
441 | return 0; | |
442 | ||
443 | idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); | |
444 | if (idx == -1) | |
445 | return 0; | |
446 | ||
447 | /* This information is used to determine virtual to physical | |
448 | * port mapping. | |
449 | */ | |
450 | lag_lower_info = info->lower_state_info; | |
451 | if (!lag_lower_info) | |
452 | return 0; | |
453 | ||
454 | tracker->netdev_state[idx] = *lag_lower_info; | |
455 | ||
456 | return 1; | |
457 | } | |
458 | ||
459 | static int mlx5_lag_netdev_event(struct notifier_block *this, | |
460 | unsigned long event, void *ptr) | |
461 | { | |
462 | struct net_device *ndev = netdev_notifier_info_to_dev(ptr); | |
463 | struct lag_tracker tracker; | |
464 | struct mlx5_lag *ldev; | |
465 | int changed = 0; | |
466 | ||
7907f23a AH |
467 | if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) |
468 | return NOTIFY_DONE; | |
469 | ||
470 | ldev = container_of(this, struct mlx5_lag, nb); | |
471 | tracker = ldev->tracker; | |
472 | ||
473 | switch (event) { | |
474 | case NETDEV_CHANGEUPPER: | |
475 | changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, | |
476 | ptr); | |
477 | break; | |
478 | case NETDEV_CHANGELOWERSTATE: | |
479 | changed = mlx5_handle_changelowerstate_event(ldev, &tracker, | |
480 | ndev, ptr); | |
481 | break; | |
482 | } | |
483 | ||
484 | mutex_lock(&lag_mutex); | |
485 | ldev->tracker = tracker; | |
486 | mutex_unlock(&lag_mutex); | |
487 | ||
488 | if (changed) | |
489 | mlx5_queue_bond_work(ldev, 0); | |
490 | ||
491 | return NOTIFY_DONE; | |
492 | } | |
493 | ||
494 | static struct mlx5_lag *mlx5_lag_dev_alloc(void) | |
495 | { | |
496 | struct mlx5_lag *ldev; | |
497 | ||
498 | ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); | |
499 | if (!ldev) | |
500 | return NULL; | |
501 | ||
e6ee5e71 RD |
502 | ldev->wq = create_singlethread_workqueue("mlx5_lag"); |
503 | if (!ldev->wq) { | |
504 | kfree(ldev); | |
505 | return NULL; | |
506 | } | |
507 | ||
7907f23a AH |
508 | INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); |
509 | ||
510 | return ldev; | |
511 | } | |
512 | ||
513 | static void mlx5_lag_dev_free(struct mlx5_lag *ldev) | |
514 | { | |
e6ee5e71 | 515 | destroy_workqueue(ldev->wq); |
7907f23a AH |
516 | kfree(ldev); |
517 | } | |
518 | ||
519 | static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, | |
520 | struct mlx5_core_dev *dev, | |
521 | struct net_device *netdev) | |
522 | { | |
523 | unsigned int fn = PCI_FUNC(dev->pdev->devfn); | |
524 | ||
525 | if (fn >= MLX5_MAX_PORTS) | |
526 | return; | |
527 | ||
528 | mutex_lock(&lag_mutex); | |
529 | ldev->pf[fn].dev = dev; | |
530 | ldev->pf[fn].netdev = netdev; | |
531 | ldev->tracker.netdev_state[fn].link_up = 0; | |
532 | ldev->tracker.netdev_state[fn].tx_enabled = 0; | |
533 | ||
534 | dev->priv.lag = ldev; | |
552db7bc | 535 | |
7907f23a AH |
536 | mutex_unlock(&lag_mutex); |
537 | } | |
538 | ||
539 | static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, | |
540 | struct mlx5_core_dev *dev) | |
541 | { | |
542 | int i; | |
543 | ||
544 | for (i = 0; i < MLX5_MAX_PORTS; i++) | |
545 | if (ldev->pf[i].dev == dev) | |
546 | break; | |
547 | ||
548 | if (i == MLX5_MAX_PORTS) | |
549 | return; | |
550 | ||
551 | mutex_lock(&lag_mutex); | |
552 | memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); | |
553 | ||
554 | dev->priv.lag = NULL; | |
555 | mutex_unlock(&lag_mutex); | |
556 | } | |
557 | ||
7907f23a AH |
558 | /* Must be called with intf_mutex held */ |
559 | void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) | |
560 | { | |
561 | struct mlx5_lag *ldev = NULL; | |
562 | struct mlx5_core_dev *tmp_dev; | |
544fe7c2 | 563 | int err; |
7907f23a AH |
564 | |
565 | if (!MLX5_CAP_GEN(dev, vport_group_manager) || | |
566 | !MLX5_CAP_GEN(dev, lag_master) || | |
567 | (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) | |
568 | return; | |
569 | ||
f1ee87fe MHY |
570 | tmp_dev = mlx5_get_next_phys_dev(dev); |
571 | if (tmp_dev) | |
572 | ldev = tmp_dev->priv.lag; | |
7907f23a AH |
573 | |
574 | if (!ldev) { | |
575 | ldev = mlx5_lag_dev_alloc(); | |
576 | if (!ldev) { | |
577 | mlx5_core_err(dev, "Failed to alloc lag dev\n"); | |
578 | return; | |
579 | } | |
580 | } | |
581 | ||
582 | mlx5_lag_dev_add_pf(ldev, dev, netdev); | |
583 | ||
584 | if (!ldev->nb.notifier_call) { | |
585 | ldev->nb.notifier_call = mlx5_lag_netdev_event; | |
e387f7d5 | 586 | if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { |
7907f23a AH |
587 | ldev->nb.notifier_call = NULL; |
588 | mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); | |
589 | } | |
590 | } | |
544fe7c2 RD |
591 | |
592 | err = mlx5_lag_mp_init(ldev); | |
593 | if (err) | |
594 | mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", | |
595 | err); | |
7907f23a AH |
596 | } |
597 | ||
598 | /* Must be called with intf_mutex held */ | |
e387f7d5 | 599 | void mlx5_lag_remove(struct mlx5_core_dev *dev) |
7907f23a AH |
600 | { |
601 | struct mlx5_lag *ldev; | |
602 | int i; | |
603 | ||
604 | ldev = mlx5_lag_dev_get(dev); | |
605 | if (!ldev) | |
606 | return; | |
607 | ||
292612d6 | 608 | if (__mlx5_lag_is_active(ldev)) |
7907f23a AH |
609 | mlx5_deactivate_lag(ldev); |
610 | ||
611 | mlx5_lag_dev_remove_pf(ldev, dev); | |
612 | ||
613 | for (i = 0; i < MLX5_MAX_PORTS; i++) | |
614 | if (ldev->pf[i].dev) | |
615 | break; | |
616 | ||
617 | if (i == MLX5_MAX_PORTS) { | |
618 | if (ldev->nb.notifier_call) | |
e387f7d5 | 619 | unregister_netdevice_notifier_net(&init_net, &ldev->nb); |
544fe7c2 | 620 | mlx5_lag_mp_cleanup(ldev); |
7907f23a AH |
621 | cancel_delayed_work_sync(&ldev->bond_work); |
622 | mlx5_lag_dev_free(ldev); | |
623 | } | |
624 | } | |
625 | ||
7c34ec19 AH |
626 | bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) |
627 | { | |
628 | struct mlx5_lag *ldev; | |
629 | bool res; | |
630 | ||
631 | mutex_lock(&lag_mutex); | |
632 | ldev = mlx5_lag_dev_get(dev); | |
633 | res = ldev && __mlx5_lag_is_roce(ldev); | |
634 | mutex_unlock(&lag_mutex); | |
635 | ||
636 | return res; | |
637 | } | |
638 | EXPORT_SYMBOL(mlx5_lag_is_roce); | |
639 | ||
7907f23a AH |
640 | bool mlx5_lag_is_active(struct mlx5_core_dev *dev) |
641 | { | |
642 | struct mlx5_lag *ldev; | |
643 | bool res; | |
644 | ||
645 | mutex_lock(&lag_mutex); | |
646 | ldev = mlx5_lag_dev_get(dev); | |
292612d6 | 647 | res = ldev && __mlx5_lag_is_active(ldev); |
7907f23a AH |
648 | mutex_unlock(&lag_mutex); |
649 | ||
650 | return res; | |
651 | } | |
652 | EXPORT_SYMBOL(mlx5_lag_is_active); | |
653 | ||
7c34ec19 AH |
654 | bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) |
655 | { | |
656 | struct mlx5_lag *ldev; | |
657 | bool res; | |
658 | ||
659 | mutex_lock(&lag_mutex); | |
660 | ldev = mlx5_lag_dev_get(dev); | |
661 | res = ldev && __mlx5_lag_is_sriov(ldev); | |
662 | mutex_unlock(&lag_mutex); | |
663 | ||
664 | return res; | |
665 | } | |
666 | EXPORT_SYMBOL(mlx5_lag_is_sriov); | |
667 | ||
eff849b2 | 668 | void mlx5_lag_update(struct mlx5_core_dev *dev) |
552db7bc MS |
669 | { |
670 | struct mlx5_lag *ldev; | |
552db7bc MS |
671 | |
672 | mlx5_dev_list_lock(); | |
552db7bc | 673 | ldev = mlx5_lag_dev_get(dev); |
eff849b2 | 674 | if (!ldev) |
552db7bc | 675 | goto unlock; |
552db7bc | 676 | |
eff849b2 | 677 | mlx5_do_bond(ldev); |
552db7bc | 678 | |
eff849b2 RL |
679 | unlock: |
680 | mlx5_dev_list_unlock(); | |
552db7bc MS |
681 | } |
682 | ||
6a32047a AH |
683 | struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) |
684 | { | |
685 | struct net_device *ndev = NULL; | |
686 | struct mlx5_lag *ldev; | |
687 | ||
688 | mutex_lock(&lag_mutex); | |
689 | ldev = mlx5_lag_dev_get(dev); | |
690 | ||
7c34ec19 | 691 | if (!(ldev && __mlx5_lag_is_roce(ldev))) |
6a32047a AH |
692 | goto unlock; |
693 | ||
694 | if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { | |
84d2dbb0 EA |
695 | ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? |
696 | ldev->pf[MLX5_LAG_P1].netdev : | |
697 | ldev->pf[MLX5_LAG_P2].netdev; | |
6a32047a | 698 | } else { |
84d2dbb0 | 699 | ndev = ldev->pf[MLX5_LAG_P1].netdev; |
6a32047a AH |
700 | } |
701 | if (ndev) | |
702 | dev_hold(ndev); | |
703 | ||
704 | unlock: | |
705 | mutex_unlock(&lag_mutex); | |
706 | ||
707 | return ndev; | |
708 | } | |
709 | EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); | |
710 | ||
917b41aa AH |
711 | bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) |
712 | { | |
713 | struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, | |
714 | priv); | |
715 | struct mlx5_lag *ldev; | |
716 | ||
717 | if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB) | |
718 | return true; | |
719 | ||
720 | ldev = mlx5_lag_dev_get(dev); | |
84d2dbb0 EA |
721 | if (!ldev || !__mlx5_lag_is_roce(ldev) || |
722 | ldev->pf[MLX5_LAG_P1].dev == dev) | |
917b41aa AH |
723 | return true; |
724 | ||
725 | /* If bonded, we do not add an IB device for PF1. */ | |
726 | return false; | |
727 | } | |
71a0ff65 MD |
728 | |
729 | int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, | |
730 | u64 *values, | |
731 | int num_counters, | |
732 | size_t *offsets) | |
733 | { | |
734 | int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); | |
735 | struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; | |
736 | struct mlx5_lag *ldev; | |
737 | int num_ports; | |
738 | int ret, i, j; | |
739 | void *out; | |
740 | ||
741 | out = kvzalloc(outlen, GFP_KERNEL); | |
742 | if (!out) | |
743 | return -ENOMEM; | |
744 | ||
745 | memset(values, 0, sizeof(*values) * num_counters); | |
746 | ||
747 | mutex_lock(&lag_mutex); | |
748 | ldev = mlx5_lag_dev_get(dev); | |
7c34ec19 | 749 | if (ldev && __mlx5_lag_is_roce(ldev)) { |
71a0ff65 | 750 | num_ports = MLX5_MAX_PORTS; |
84d2dbb0 EA |
751 | mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; |
752 | mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; | |
71a0ff65 MD |
753 | } else { |
754 | num_ports = 1; | |
84d2dbb0 | 755 | mdev[MLX5_LAG_P1] = dev; |
71a0ff65 MD |
756 | } |
757 | ||
758 | for (i = 0; i < num_ports; ++i) { | |
759 | ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen); | |
760 | if (ret) | |
761 | goto unlock; | |
762 | ||
763 | for (j = 0; j < num_counters; ++j) | |
764 | values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); | |
765 | } | |
766 | ||
767 | unlock: | |
768 | mutex_unlock(&lag_mutex); | |
769 | kvfree(out); | |
770 | return ret; | |
771 | } | |
772 | EXPORT_SYMBOL(mlx5_lag_query_cong_counters); |