]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
mlxsw: spectrum_router: Simplify LPM tree allocation
[mirror_ubuntu-artful-kernel.git] / drivers / net / ethernet / mellanox / mlxsw / spectrum_router.c
1 /*
2 * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
3 * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
4 * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
5 * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com>
6 * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the names of the copyright holders nor the names of its
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
19 *
20 * Alternatively, this software may be distributed under the terms of the
21 * GNU General Public License ("GPL") version 2 as published by the Free
22 * Software Foundation.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 #include <linux/kernel.h>
38 #include <linux/types.h>
39 #include <linux/rhashtable.h>
40 #include <linux/bitops.h>
41 #include <linux/in6.h>
42 #include <linux/notifier.h>
43 #include <linux/inetdevice.h>
44 #include <net/netevent.h>
45 #include <net/neighbour.h>
46 #include <net/arp.h>
47 #include <net/ip_fib.h>
48
49 #include "spectrum.h"
50 #include "core.h"
51 #include "reg.h"
52
53 struct mlxsw_sp_rif {
54 struct list_head nexthop_list;
55 struct list_head neigh_list;
56 struct net_device *dev;
57 struct mlxsw_sp_fid *f;
58 unsigned char addr[ETH_ALEN];
59 int mtu;
60 u16 rif;
61 };
62
63 static struct mlxsw_sp_rif *
64 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
65 const struct net_device *dev);
66
67 #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
68 for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
69
70 static bool
71 mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1,
72 struct mlxsw_sp_prefix_usage *prefix_usage2)
73 {
74 unsigned char prefix;
75
76 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) {
77 if (!test_bit(prefix, prefix_usage2->b))
78 return false;
79 }
80 return true;
81 }
82
83 static bool
84 mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
85 struct mlxsw_sp_prefix_usage *prefix_usage2)
86 {
87 return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
88 }
89
90 static bool
91 mlxsw_sp_prefix_usage_none(struct mlxsw_sp_prefix_usage *prefix_usage)
92 {
93 struct mlxsw_sp_prefix_usage prefix_usage_none = {{ 0 } };
94
95 return mlxsw_sp_prefix_usage_eq(prefix_usage, &prefix_usage_none);
96 }
97
98 static void
99 mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
100 struct mlxsw_sp_prefix_usage *prefix_usage2)
101 {
102 memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
103 }
104
105 static void
106 mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage)
107 {
108 memset(prefix_usage, 0, sizeof(*prefix_usage));
109 }
110
111 static void
112 mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
113 unsigned char prefix_len)
114 {
115 set_bit(prefix_len, prefix_usage->b);
116 }
117
118 static void
119 mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
120 unsigned char prefix_len)
121 {
122 clear_bit(prefix_len, prefix_usage->b);
123 }
124
125 struct mlxsw_sp_fib_key {
126 unsigned char addr[sizeof(struct in6_addr)];
127 unsigned char prefix_len;
128 };
129
130 enum mlxsw_sp_fib_entry_type {
131 MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
132 MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
133 MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
134 };
135
136 struct mlxsw_sp_nexthop_group;
137
138 struct mlxsw_sp_fib_node {
139 struct list_head entry_list;
140 struct list_head list;
141 struct rhash_head ht_node;
142 struct mlxsw_sp_vr *vr;
143 struct mlxsw_sp_fib_key key;
144 };
145
146 struct mlxsw_sp_fib_entry_params {
147 u32 tb_id;
148 u32 prio;
149 u8 tos;
150 u8 type;
151 };
152
153 struct mlxsw_sp_fib_entry {
154 struct list_head list;
155 struct mlxsw_sp_fib_node *fib_node;
156 enum mlxsw_sp_fib_entry_type type;
157 struct list_head nexthop_group_node;
158 struct mlxsw_sp_nexthop_group *nh_group;
159 struct mlxsw_sp_fib_entry_params params;
160 bool offloaded;
161 };
162
163 struct mlxsw_sp_fib {
164 struct rhashtable ht;
165 struct list_head node_list;
166 unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
167 struct mlxsw_sp_prefix_usage prefix_usage;
168 };
169
170 static const struct rhashtable_params mlxsw_sp_fib_ht_params;
171
172 static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void)
173 {
174 struct mlxsw_sp_fib *fib;
175 int err;
176
177 fib = kzalloc(sizeof(*fib), GFP_KERNEL);
178 if (!fib)
179 return ERR_PTR(-ENOMEM);
180 err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
181 if (err)
182 goto err_rhashtable_init;
183 INIT_LIST_HEAD(&fib->node_list);
184 return fib;
185
186 err_rhashtable_init:
187 kfree(fib);
188 return ERR_PTR(err);
189 }
190
191 static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib)
192 {
193 WARN_ON(!list_empty(&fib->node_list));
194 rhashtable_destroy(&fib->ht);
195 kfree(fib);
196 }
197
198 static struct mlxsw_sp_lpm_tree *
199 mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
200 {
201 static struct mlxsw_sp_lpm_tree *lpm_tree;
202 int i;
203
204 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
205 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
206 if (lpm_tree->ref_count == 0)
207 return lpm_tree;
208 }
209 return NULL;
210 }
211
212 static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
213 struct mlxsw_sp_lpm_tree *lpm_tree)
214 {
215 char ralta_pl[MLXSW_REG_RALTA_LEN];
216
217 mlxsw_reg_ralta_pack(ralta_pl, true,
218 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
219 lpm_tree->id);
220 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
221 }
222
223 static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
224 struct mlxsw_sp_lpm_tree *lpm_tree)
225 {
226 char ralta_pl[MLXSW_REG_RALTA_LEN];
227
228 mlxsw_reg_ralta_pack(ralta_pl, false,
229 (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
230 lpm_tree->id);
231 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
232 }
233
234 static int
235 mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
236 struct mlxsw_sp_prefix_usage *prefix_usage,
237 struct mlxsw_sp_lpm_tree *lpm_tree)
238 {
239 char ralst_pl[MLXSW_REG_RALST_LEN];
240 u8 root_bin = 0;
241 u8 prefix;
242 u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
243
244 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
245 root_bin = prefix;
246
247 mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
248 mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
249 if (prefix == 0)
250 continue;
251 mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
252 MLXSW_REG_RALST_BIN_NO_CHILD);
253 last_prefix = prefix;
254 }
255 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
256 }
257
258 static struct mlxsw_sp_lpm_tree *
259 mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
260 struct mlxsw_sp_prefix_usage *prefix_usage,
261 enum mlxsw_sp_l3proto proto)
262 {
263 struct mlxsw_sp_lpm_tree *lpm_tree;
264 int err;
265
266 lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
267 if (!lpm_tree)
268 return ERR_PTR(-EBUSY);
269 lpm_tree->proto = proto;
270 err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
271 if (err)
272 return ERR_PTR(err);
273
274 err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
275 lpm_tree);
276 if (err)
277 goto err_left_struct_set;
278 memcpy(&lpm_tree->prefix_usage, prefix_usage,
279 sizeof(lpm_tree->prefix_usage));
280 return lpm_tree;
281
282 err_left_struct_set:
283 mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
284 return ERR_PTR(err);
285 }
286
287 static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
288 struct mlxsw_sp_lpm_tree *lpm_tree)
289 {
290 return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
291 }
292
293 static struct mlxsw_sp_lpm_tree *
294 mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
295 struct mlxsw_sp_prefix_usage *prefix_usage,
296 enum mlxsw_sp_l3proto proto)
297 {
298 struct mlxsw_sp_lpm_tree *lpm_tree;
299 int i;
300
301 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
302 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
303 if (lpm_tree->ref_count != 0 &&
304 lpm_tree->proto == proto &&
305 mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
306 prefix_usage))
307 goto inc_ref_count;
308 }
309 lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage,
310 proto);
311 if (IS_ERR(lpm_tree))
312 return lpm_tree;
313
314 inc_ref_count:
315 lpm_tree->ref_count++;
316 return lpm_tree;
317 }
318
319 static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
320 struct mlxsw_sp_lpm_tree *lpm_tree)
321 {
322 if (--lpm_tree->ref_count == 0)
323 return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
324 return 0;
325 }
326
327 static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
328 {
329 struct mlxsw_sp_lpm_tree *lpm_tree;
330 int i;
331
332 for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) {
333 lpm_tree = &mlxsw_sp->router.lpm_trees[i];
334 lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
335 }
336 }
337
338 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
339 {
340 struct mlxsw_sp_vr *vr;
341 int i;
342
343 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
344 vr = &mlxsw_sp->router.vrs[i];
345 if (!vr->used)
346 return vr;
347 }
348 return NULL;
349 }
350
351 static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
352 struct mlxsw_sp_vr *vr)
353 {
354 char raltb_pl[MLXSW_REG_RALTB_LEN];
355
356 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
357 (enum mlxsw_reg_ralxx_protocol) vr->proto,
358 vr->lpm_tree->id);
359 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
360 }
361
362 static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
363 struct mlxsw_sp_vr *vr)
364 {
365 char raltb_pl[MLXSW_REG_RALTB_LEN];
366
367 /* Bind to tree 0 which is default */
368 mlxsw_reg_raltb_pack(raltb_pl, vr->id,
369 (enum mlxsw_reg_ralxx_protocol) vr->proto, 0);
370 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
371 }
372
373 static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
374 {
375 /* For our purpose, squash main and local table into one */
376 if (tb_id == RT_TABLE_LOCAL)
377 tb_id = RT_TABLE_MAIN;
378 return tb_id;
379 }
380
381 static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
382 u32 tb_id,
383 enum mlxsw_sp_l3proto proto)
384 {
385 struct mlxsw_sp_vr *vr;
386 int i;
387
388 tb_id = mlxsw_sp_fix_tb_id(tb_id);
389
390 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
391 vr = &mlxsw_sp->router.vrs[i];
392 if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
393 return vr;
394 }
395 return NULL;
396 }
397
398 static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
399 unsigned char prefix_len,
400 u32 tb_id,
401 enum mlxsw_sp_l3proto proto)
402 {
403 struct mlxsw_sp_prefix_usage req_prefix_usage;
404 struct mlxsw_sp_lpm_tree *lpm_tree;
405 struct mlxsw_sp_vr *vr;
406 int err;
407
408 vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
409 if (!vr)
410 return ERR_PTR(-EBUSY);
411 vr->fib = mlxsw_sp_fib_create();
412 if (IS_ERR(vr->fib))
413 return ERR_CAST(vr->fib);
414
415 vr->proto = proto;
416 vr->tb_id = tb_id;
417 mlxsw_sp_prefix_usage_zero(&req_prefix_usage);
418 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
419 lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
420 proto);
421 if (IS_ERR(lpm_tree)) {
422 err = PTR_ERR(lpm_tree);
423 goto err_tree_get;
424 }
425 vr->lpm_tree = lpm_tree;
426 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
427 if (err)
428 goto err_tree_bind;
429
430 vr->used = true;
431 return vr;
432
433 err_tree_bind:
434 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
435 err_tree_get:
436 mlxsw_sp_fib_destroy(vr->fib);
437
438 return ERR_PTR(err);
439 }
440
441 static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
442 struct mlxsw_sp_vr *vr)
443 {
444 mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr);
445 mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree);
446 mlxsw_sp_fib_destroy(vr->fib);
447 vr->used = false;
448 }
449
450 static int
451 mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
452 struct mlxsw_sp_prefix_usage *req_prefix_usage)
453 {
454 struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree;
455 struct mlxsw_sp_lpm_tree *new_tree;
456 int err;
457
458 if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage))
459 return 0;
460
461 new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage,
462 vr->proto);
463 if (IS_ERR(new_tree)) {
464 /* We failed to get a tree according to the required
465 * prefix usage. However, the current tree might be still good
466 * for us if our requirement is subset of the prefixes used
467 * in the tree.
468 */
469 if (mlxsw_sp_prefix_usage_subset(req_prefix_usage,
470 &lpm_tree->prefix_usage))
471 return 0;
472 return PTR_ERR(new_tree);
473 }
474
475 /* Prevent packet loss by overwriting existing binding */
476 vr->lpm_tree = new_tree;
477 err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr);
478 if (err)
479 goto err_tree_bind;
480 mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
481
482 return 0;
483
484 err_tree_bind:
485 vr->lpm_tree = lpm_tree;
486 mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
487 return err;
488 }
489
490 static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp,
491 unsigned char prefix_len,
492 u32 tb_id,
493 enum mlxsw_sp_l3proto proto)
494 {
495 struct mlxsw_sp_vr *vr;
496 int err;
497
498 tb_id = mlxsw_sp_fix_tb_id(tb_id);
499 vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto);
500 if (!vr) {
501 vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto);
502 if (IS_ERR(vr))
503 return vr;
504 } else {
505 struct mlxsw_sp_prefix_usage req_prefix_usage;
506
507 mlxsw_sp_prefix_usage_cpy(&req_prefix_usage,
508 &vr->fib->prefix_usage);
509 mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len);
510 /* Need to replace LPM tree in case new prefix is required. */
511 err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
512 &req_prefix_usage);
513 if (err)
514 return ERR_PTR(err);
515 }
516 return vr;
517 }
518
519 static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
520 {
521 /* Destroy virtual router entity in case the associated FIB is empty
522 * and allow it to be used for other tables in future. Otherwise,
523 * check if some prefix usage did not disappear and change tree if
524 * that is the case. Note that in case new, smaller tree cannot be
525 * allocated, the original one will be kept being used.
526 */
527 if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage))
528 mlxsw_sp_vr_destroy(mlxsw_sp, vr);
529 else
530 mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr,
531 &vr->fib->prefix_usage);
532 }
533
534 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
535 {
536 struct mlxsw_sp_vr *vr;
537 u64 max_vrs;
538 int i;
539
540 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
541 return -EIO;
542
543 max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
544 mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
545 GFP_KERNEL);
546 if (!mlxsw_sp->router.vrs)
547 return -ENOMEM;
548
549 for (i = 0; i < max_vrs; i++) {
550 vr = &mlxsw_sp->router.vrs[i];
551 vr->id = i;
552 }
553
554 return 0;
555 }
556
557 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
558
559 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
560 {
561 /* At this stage we're guaranteed not to have new incoming
562 * FIB notifications and the work queue is free from FIBs
563 * sitting on top of mlxsw netdevs. However, we can still
564 * have other FIBs queued. Flush the queue before flushing
565 * the device's tables. No need for locks, as we're the only
566 * writer.
567 */
568 mlxsw_core_flush_owq();
569 mlxsw_sp_router_fib_flush(mlxsw_sp);
570 kfree(mlxsw_sp->router.vrs);
571 }
572
573 struct mlxsw_sp_neigh_key {
574 struct neighbour *n;
575 };
576
577 struct mlxsw_sp_neigh_entry {
578 struct list_head rif_list_node;
579 struct rhash_head ht_node;
580 struct mlxsw_sp_neigh_key key;
581 u16 rif;
582 bool connected;
583 unsigned char ha[ETH_ALEN];
584 struct list_head nexthop_list; /* list of nexthops using
585 * this neigh entry
586 */
587 struct list_head nexthop_neighs_list_node;
588 };
589
590 static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
591 .key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
592 .head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
593 .key_len = sizeof(struct mlxsw_sp_neigh_key),
594 };
595
596 static struct mlxsw_sp_neigh_entry *
597 mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
598 u16 rif)
599 {
600 struct mlxsw_sp_neigh_entry *neigh_entry;
601
602 neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
603 if (!neigh_entry)
604 return NULL;
605
606 neigh_entry->key.n = n;
607 neigh_entry->rif = rif;
608 INIT_LIST_HEAD(&neigh_entry->nexthop_list);
609
610 return neigh_entry;
611 }
612
613 static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
614 {
615 kfree(neigh_entry);
616 }
617
618 static int
619 mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
620 struct mlxsw_sp_neigh_entry *neigh_entry)
621 {
622 return rhashtable_insert_fast(&mlxsw_sp->router.neigh_ht,
623 &neigh_entry->ht_node,
624 mlxsw_sp_neigh_ht_params);
625 }
626
627 static void
628 mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
629 struct mlxsw_sp_neigh_entry *neigh_entry)
630 {
631 rhashtable_remove_fast(&mlxsw_sp->router.neigh_ht,
632 &neigh_entry->ht_node,
633 mlxsw_sp_neigh_ht_params);
634 }
635
636 static struct mlxsw_sp_neigh_entry *
637 mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
638 {
639 struct mlxsw_sp_neigh_entry *neigh_entry;
640 struct mlxsw_sp_rif *r;
641 int err;
642
643 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
644 if (!r)
645 return ERR_PTR(-EINVAL);
646
647 neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif);
648 if (!neigh_entry)
649 return ERR_PTR(-ENOMEM);
650
651 err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
652 if (err)
653 goto err_neigh_entry_insert;
654
655 list_add(&neigh_entry->rif_list_node, &r->neigh_list);
656
657 return neigh_entry;
658
659 err_neigh_entry_insert:
660 mlxsw_sp_neigh_entry_free(neigh_entry);
661 return ERR_PTR(err);
662 }
663
664 static void
665 mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
666 struct mlxsw_sp_neigh_entry *neigh_entry)
667 {
668 list_del(&neigh_entry->rif_list_node);
669 mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
670 mlxsw_sp_neigh_entry_free(neigh_entry);
671 }
672
673 static struct mlxsw_sp_neigh_entry *
674 mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
675 {
676 struct mlxsw_sp_neigh_key key;
677
678 key.n = n;
679 return rhashtable_lookup_fast(&mlxsw_sp->router.neigh_ht,
680 &key, mlxsw_sp_neigh_ht_params);
681 }
682
683 static void
684 mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
685 {
686 unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
687
688 mlxsw_sp->router.neighs_update.interval = jiffies_to_msecs(interval);
689 }
690
691 static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
692 char *rauhtd_pl,
693 int ent_index)
694 {
695 struct net_device *dev;
696 struct neighbour *n;
697 __be32 dipn;
698 u32 dip;
699 u16 rif;
700
701 mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
702
703 if (!mlxsw_sp->rifs[rif]) {
704 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
705 return;
706 }
707
708 dipn = htonl(dip);
709 dev = mlxsw_sp->rifs[rif]->dev;
710 n = neigh_lookup(&arp_tbl, &dipn, dev);
711 if (!n) {
712 netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n",
713 &dip);
714 return;
715 }
716
717 netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
718 neigh_event_send(n, NULL);
719 neigh_release(n);
720 }
721
722 static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
723 char *rauhtd_pl,
724 int rec_index)
725 {
726 u8 num_entries;
727 int i;
728
729 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
730 rec_index);
731 /* Hardware starts counting at 0, so add 1. */
732 num_entries++;
733
734 /* Each record consists of several neighbour entries. */
735 for (i = 0; i < num_entries; i++) {
736 int ent_index;
737
738 ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
739 mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
740 ent_index);
741 }
742
743 }
744
745 static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
746 char *rauhtd_pl, int rec_index)
747 {
748 switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
749 case MLXSW_REG_RAUHTD_TYPE_IPV4:
750 mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
751 rec_index);
752 break;
753 case MLXSW_REG_RAUHTD_TYPE_IPV6:
754 WARN_ON_ONCE(1);
755 break;
756 }
757 }
758
759 static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
760 {
761 u8 num_rec, last_rec_index, num_entries;
762
763 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
764 last_rec_index = num_rec - 1;
765
766 if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
767 return false;
768 if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
769 MLXSW_REG_RAUHTD_TYPE_IPV6)
770 return true;
771
772 num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
773 last_rec_index);
774 if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
775 return true;
776 return false;
777 }
778
779 static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
780 {
781 char *rauhtd_pl;
782 u8 num_rec;
783 int i, err;
784
785 rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
786 if (!rauhtd_pl)
787 return -ENOMEM;
788
789 /* Make sure the neighbour's netdev isn't removed in the
790 * process.
791 */
792 rtnl_lock();
793 do {
794 mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4);
795 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
796 rauhtd_pl);
797 if (err) {
798 dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
799 break;
800 }
801 num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
802 for (i = 0; i < num_rec; i++)
803 mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
804 i);
805 } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
806 rtnl_unlock();
807
808 kfree(rauhtd_pl);
809 return err;
810 }
811
812 static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
813 {
814 struct mlxsw_sp_neigh_entry *neigh_entry;
815
816 /* Take RTNL mutex here to prevent lists from changes */
817 rtnl_lock();
818 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
819 nexthop_neighs_list_node)
820 /* If this neigh have nexthops, make the kernel think this neigh
821 * is active regardless of the traffic.
822 */
823 neigh_event_send(neigh_entry->key.n, NULL);
824 rtnl_unlock();
825 }
826
827 static void
828 mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
829 {
830 unsigned long interval = mlxsw_sp->router.neighs_update.interval;
831
832 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw,
833 msecs_to_jiffies(interval));
834 }
835
836 static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
837 {
838 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
839 router.neighs_update.dw.work);
840 int err;
841
842 err = mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp);
843 if (err)
844 dev_err(mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
845
846 mlxsw_sp_router_neighs_update_nh(mlxsw_sp);
847
848 mlxsw_sp_router_neighs_update_work_schedule(mlxsw_sp);
849 }
850
851 static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
852 {
853 struct mlxsw_sp_neigh_entry *neigh_entry;
854 struct mlxsw_sp *mlxsw_sp = container_of(work, struct mlxsw_sp,
855 router.nexthop_probe_dw.work);
856
857 /* Iterate over nexthop neighbours, find those who are unresolved and
858 * send arp on them. This solves the chicken-egg problem when
859 * the nexthop wouldn't get offloaded until the neighbor is resolved
860 * but it wouldn't get resolved ever in case traffic is flowing in HW
861 * using different nexthop.
862 *
863 * Take RTNL mutex here to prevent lists from changes.
864 */
865 rtnl_lock();
866 list_for_each_entry(neigh_entry, &mlxsw_sp->router.nexthop_neighs_list,
867 nexthop_neighs_list_node)
868 if (!neigh_entry->connected)
869 neigh_event_send(neigh_entry->key.n, NULL);
870 rtnl_unlock();
871
872 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw,
873 MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
874 }
875
876 static void
877 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
878 struct mlxsw_sp_neigh_entry *neigh_entry,
879 bool removing);
880
881 static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
882 {
883 return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
884 MLXSW_REG_RAUHT_OP_WRITE_DELETE;
885 }
886
887 static void
888 mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
889 struct mlxsw_sp_neigh_entry *neigh_entry,
890 enum mlxsw_reg_rauht_op op)
891 {
892 struct neighbour *n = neigh_entry->key.n;
893 u32 dip = ntohl(*((__be32 *) n->primary_key));
894 char rauht_pl[MLXSW_REG_RAUHT_LEN];
895
896 mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
897 dip);
898 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
899 }
900
901 static void
902 mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
903 struct mlxsw_sp_neigh_entry *neigh_entry,
904 bool adding)
905 {
906 if (!adding && !neigh_entry->connected)
907 return;
908 neigh_entry->connected = adding;
909 if (neigh_entry->key.n->tbl == &arp_tbl)
910 mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
911 mlxsw_sp_rauht_op(adding));
912 else
913 WARN_ON_ONCE(1);
914 }
915
916 struct mlxsw_sp_neigh_event_work {
917 struct work_struct work;
918 struct mlxsw_sp *mlxsw_sp;
919 struct neighbour *n;
920 };
921
922 static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
923 {
924 struct mlxsw_sp_neigh_event_work *neigh_work =
925 container_of(work, struct mlxsw_sp_neigh_event_work, work);
926 struct mlxsw_sp *mlxsw_sp = neigh_work->mlxsw_sp;
927 struct mlxsw_sp_neigh_entry *neigh_entry;
928 struct neighbour *n = neigh_work->n;
929 unsigned char ha[ETH_ALEN];
930 bool entry_connected;
931 u8 nud_state, dead;
932
933 /* If these parameters are changed after we release the lock,
934 * then we are guaranteed to receive another event letting us
935 * know about it.
936 */
937 read_lock_bh(&n->lock);
938 memcpy(ha, n->ha, ETH_ALEN);
939 nud_state = n->nud_state;
940 dead = n->dead;
941 read_unlock_bh(&n->lock);
942
943 rtnl_lock();
944 entry_connected = nud_state & NUD_VALID && !dead;
945 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
946 if (!entry_connected && !neigh_entry)
947 goto out;
948 if (!neigh_entry) {
949 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
950 if (IS_ERR(neigh_entry))
951 goto out;
952 }
953
954 memcpy(neigh_entry->ha, ha, ETH_ALEN);
955 mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
956 mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected);
957
958 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
959 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
960
961 out:
962 rtnl_unlock();
963 neigh_release(n);
964 kfree(neigh_work);
965 }
966
967 int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
968 unsigned long event, void *ptr)
969 {
970 struct mlxsw_sp_neigh_event_work *neigh_work;
971 struct mlxsw_sp_port *mlxsw_sp_port;
972 struct mlxsw_sp *mlxsw_sp;
973 unsigned long interval;
974 struct neigh_parms *p;
975 struct neighbour *n;
976
977 switch (event) {
978 case NETEVENT_DELAY_PROBE_TIME_UPDATE:
979 p = ptr;
980
981 /* We don't care about changes in the default table. */
982 if (!p->dev || p->tbl != &arp_tbl)
983 return NOTIFY_DONE;
984
985 /* We are in atomic context and can't take RTNL mutex,
986 * so use RCU variant to walk the device chain.
987 */
988 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
989 if (!mlxsw_sp_port)
990 return NOTIFY_DONE;
991
992 mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
993 interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
994 mlxsw_sp->router.neighs_update.interval = interval;
995
996 mlxsw_sp_port_dev_put(mlxsw_sp_port);
997 break;
998 case NETEVENT_NEIGH_UPDATE:
999 n = ptr;
1000
1001 if (n->tbl != &arp_tbl)
1002 return NOTIFY_DONE;
1003
1004 mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
1005 if (!mlxsw_sp_port)
1006 return NOTIFY_DONE;
1007
1008 neigh_work = kzalloc(sizeof(*neigh_work), GFP_ATOMIC);
1009 if (!neigh_work) {
1010 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1011 return NOTIFY_BAD;
1012 }
1013
1014 INIT_WORK(&neigh_work->work, mlxsw_sp_router_neigh_event_work);
1015 neigh_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
1016 neigh_work->n = n;
1017
1018 /* Take a reference to ensure the neighbour won't be
1019 * destructed until we drop the reference in delayed
1020 * work.
1021 */
1022 neigh_clone(n);
1023 mlxsw_core_schedule_work(&neigh_work->work);
1024 mlxsw_sp_port_dev_put(mlxsw_sp_port);
1025 break;
1026 }
1027
1028 return NOTIFY_DONE;
1029 }
1030
1031 static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
1032 {
1033 int err;
1034
1035 err = rhashtable_init(&mlxsw_sp->router.neigh_ht,
1036 &mlxsw_sp_neigh_ht_params);
1037 if (err)
1038 return err;
1039
1040 /* Initialize the polling interval according to the default
1041 * table.
1042 */
1043 mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
1044
1045 /* Create the delayed works for the activity_update */
1046 INIT_DELAYED_WORK(&mlxsw_sp->router.neighs_update.dw,
1047 mlxsw_sp_router_neighs_update_work);
1048 INIT_DELAYED_WORK(&mlxsw_sp->router.nexthop_probe_dw,
1049 mlxsw_sp_router_probe_unresolved_nexthops);
1050 mlxsw_core_schedule_dw(&mlxsw_sp->router.neighs_update.dw, 0);
1051 mlxsw_core_schedule_dw(&mlxsw_sp->router.nexthop_probe_dw, 0);
1052 return 0;
1053 }
1054
1055 static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
1056 {
1057 cancel_delayed_work_sync(&mlxsw_sp->router.neighs_update.dw);
1058 cancel_delayed_work_sync(&mlxsw_sp->router.nexthop_probe_dw);
1059 rhashtable_destroy(&mlxsw_sp->router.neigh_ht);
1060 }
1061
1062 static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp,
1063 const struct mlxsw_sp_rif *r)
1064 {
1065 char rauht_pl[MLXSW_REG_RAUHT_LEN];
1066
1067 mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL,
1068 r->rif, r->addr);
1069 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
1070 }
1071
1072 static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1073 struct mlxsw_sp_rif *r)
1074 {
1075 struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
1076
1077 mlxsw_sp_neigh_rif_flush(mlxsw_sp, r);
1078 list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list,
1079 rif_list_node)
1080 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1081 }
1082
1083 struct mlxsw_sp_nexthop_key {
1084 struct fib_nh *fib_nh;
1085 };
1086
1087 struct mlxsw_sp_nexthop {
1088 struct list_head neigh_list_node; /* member of neigh entry list */
1089 struct list_head rif_list_node;
1090 struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
1091 * this belongs to
1092 */
1093 struct rhash_head ht_node;
1094 struct mlxsw_sp_nexthop_key key;
1095 struct mlxsw_sp_rif *r;
1096 u8 should_offload:1, /* set indicates this neigh is connected and
1097 * should be put to KVD linear area of this group.
1098 */
1099 offloaded:1, /* set in case the neigh is actually put into
1100 * KVD linear area of this group.
1101 */
1102 update:1; /* set indicates that MAC of this neigh should be
1103 * updated in HW
1104 */
1105 struct mlxsw_sp_neigh_entry *neigh_entry;
1106 };
1107
1108 struct mlxsw_sp_nexthop_group_key {
1109 struct fib_info *fi;
1110 };
1111
1112 struct mlxsw_sp_nexthop_group {
1113 struct rhash_head ht_node;
1114 struct list_head fib_list; /* list of fib entries that use this group */
1115 struct mlxsw_sp_nexthop_group_key key;
1116 u8 adj_index_valid:1,
1117 gateway:1; /* routes using the group use a gateway */
1118 u32 adj_index;
1119 u16 ecmp_size;
1120 u16 count;
1121 struct mlxsw_sp_nexthop nexthops[0];
1122 #define nh_rif nexthops[0].r
1123 };
1124
1125 static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
1126 .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key),
1127 .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
1128 .key_len = sizeof(struct mlxsw_sp_nexthop_group_key),
1129 };
1130
1131 static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
1132 struct mlxsw_sp_nexthop_group *nh_grp)
1133 {
1134 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_group_ht,
1135 &nh_grp->ht_node,
1136 mlxsw_sp_nexthop_group_ht_params);
1137 }
1138
1139 static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
1140 struct mlxsw_sp_nexthop_group *nh_grp)
1141 {
1142 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_group_ht,
1143 &nh_grp->ht_node,
1144 mlxsw_sp_nexthop_group_ht_params);
1145 }
1146
1147 static struct mlxsw_sp_nexthop_group *
1148 mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp,
1149 struct mlxsw_sp_nexthop_group_key key)
1150 {
1151 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_group_ht, &key,
1152 mlxsw_sp_nexthop_group_ht_params);
1153 }
1154
1155 static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
1156 .key_offset = offsetof(struct mlxsw_sp_nexthop, key),
1157 .head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
1158 .key_len = sizeof(struct mlxsw_sp_nexthop_key),
1159 };
1160
1161 static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
1162 struct mlxsw_sp_nexthop *nh)
1163 {
1164 return rhashtable_insert_fast(&mlxsw_sp->router.nexthop_ht,
1165 &nh->ht_node, mlxsw_sp_nexthop_ht_params);
1166 }
1167
1168 static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
1169 struct mlxsw_sp_nexthop *nh)
1170 {
1171 rhashtable_remove_fast(&mlxsw_sp->router.nexthop_ht, &nh->ht_node,
1172 mlxsw_sp_nexthop_ht_params);
1173 }
1174
1175 static struct mlxsw_sp_nexthop *
1176 mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
1177 struct mlxsw_sp_nexthop_key key)
1178 {
1179 return rhashtable_lookup_fast(&mlxsw_sp->router.nexthop_ht, &key,
1180 mlxsw_sp_nexthop_ht_params);
1181 }
1182
1183 static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
1184 struct mlxsw_sp_vr *vr,
1185 u32 adj_index, u16 ecmp_size,
1186 u32 new_adj_index,
1187 u16 new_ecmp_size)
1188 {
1189 char raleu_pl[MLXSW_REG_RALEU_LEN];
1190
1191 mlxsw_reg_raleu_pack(raleu_pl,
1192 (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id,
1193 adj_index, ecmp_size, new_adj_index,
1194 new_ecmp_size);
1195 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
1196 }
1197
1198 static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
1199 struct mlxsw_sp_nexthop_group *nh_grp,
1200 u32 old_adj_index, u16 old_ecmp_size)
1201 {
1202 struct mlxsw_sp_fib_entry *fib_entry;
1203 struct mlxsw_sp_vr *vr = NULL;
1204 int err;
1205
1206 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1207 if (vr == fib_entry->fib_node->vr)
1208 continue;
1209 vr = fib_entry->fib_node->vr;
1210 err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr,
1211 old_adj_index,
1212 old_ecmp_size,
1213 nh_grp->adj_index,
1214 nh_grp->ecmp_size);
1215 if (err)
1216 return err;
1217 }
1218 return 0;
1219 }
1220
1221 static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
1222 struct mlxsw_sp_nexthop *nh)
1223 {
1224 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1225 char ratr_pl[MLXSW_REG_RATR_LEN];
1226
1227 mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
1228 true, adj_index, neigh_entry->rif);
1229 mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
1230 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
1231 }
1232
1233 static int
1234 mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp,
1235 struct mlxsw_sp_nexthop_group *nh_grp,
1236 bool reallocate)
1237 {
1238 u32 adj_index = nh_grp->adj_index; /* base */
1239 struct mlxsw_sp_nexthop *nh;
1240 int i;
1241 int err;
1242
1243 for (i = 0; i < nh_grp->count; i++) {
1244 nh = &nh_grp->nexthops[i];
1245
1246 if (!nh->should_offload) {
1247 nh->offloaded = 0;
1248 continue;
1249 }
1250
1251 if (nh->update || reallocate) {
1252 err = mlxsw_sp_nexthop_mac_update(mlxsw_sp,
1253 adj_index, nh);
1254 if (err)
1255 return err;
1256 nh->update = 0;
1257 nh->offloaded = 1;
1258 }
1259 adj_index++;
1260 }
1261 return 0;
1262 }
1263
1264 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1265 struct mlxsw_sp_fib_entry *fib_entry);
1266
1267 static int
1268 mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
1269 struct mlxsw_sp_nexthop_group *nh_grp)
1270 {
1271 struct mlxsw_sp_fib_entry *fib_entry;
1272 int err;
1273
1274 list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
1275 err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1276 if (err)
1277 return err;
1278 }
1279 return 0;
1280 }
1281
1282 static void
1283 mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
1284 struct mlxsw_sp_nexthop_group *nh_grp)
1285 {
1286 struct mlxsw_sp_nexthop *nh;
1287 bool offload_change = false;
1288 u32 adj_index;
1289 u16 ecmp_size = 0;
1290 bool old_adj_index_valid;
1291 u32 old_adj_index;
1292 u16 old_ecmp_size;
1293 int ret;
1294 int i;
1295 int err;
1296
1297 if (!nh_grp->gateway) {
1298 mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1299 return;
1300 }
1301
1302 for (i = 0; i < nh_grp->count; i++) {
1303 nh = &nh_grp->nexthops[i];
1304
1305 if (nh->should_offload ^ nh->offloaded) {
1306 offload_change = true;
1307 if (nh->should_offload)
1308 nh->update = 1;
1309 }
1310 if (nh->should_offload)
1311 ecmp_size++;
1312 }
1313 if (!offload_change) {
1314 /* Nothing was added or removed, so no need to reallocate. Just
1315 * update MAC on existing adjacency indexes.
1316 */
1317 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp,
1318 false);
1319 if (err) {
1320 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1321 goto set_trap;
1322 }
1323 return;
1324 }
1325 if (!ecmp_size)
1326 /* No neigh of this group is connected so we just set
1327 * the trap and let everthing flow through kernel.
1328 */
1329 goto set_trap;
1330
1331 ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size);
1332 if (ret < 0) {
1333 /* We ran out of KVD linear space, just set the
1334 * trap and let everything flow through kernel.
1335 */
1336 dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
1337 goto set_trap;
1338 }
1339 adj_index = ret;
1340 old_adj_index_valid = nh_grp->adj_index_valid;
1341 old_adj_index = nh_grp->adj_index;
1342 old_ecmp_size = nh_grp->ecmp_size;
1343 nh_grp->adj_index_valid = 1;
1344 nh_grp->adj_index = adj_index;
1345 nh_grp->ecmp_size = ecmp_size;
1346 err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true);
1347 if (err) {
1348 dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
1349 goto set_trap;
1350 }
1351
1352 if (!old_adj_index_valid) {
1353 /* The trap was set for fib entries, so we have to call
1354 * fib entry update to unset it and use adjacency index.
1355 */
1356 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1357 if (err) {
1358 dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
1359 goto set_trap;
1360 }
1361 return;
1362 }
1363
1364 err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
1365 old_adj_index, old_ecmp_size);
1366 mlxsw_sp_kvdl_free(mlxsw_sp, old_adj_index);
1367 if (err) {
1368 dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
1369 goto set_trap;
1370 }
1371 return;
1372
1373 set_trap:
1374 old_adj_index_valid = nh_grp->adj_index_valid;
1375 nh_grp->adj_index_valid = 0;
1376 for (i = 0; i < nh_grp->count; i++) {
1377 nh = &nh_grp->nexthops[i];
1378 nh->offloaded = 0;
1379 }
1380 err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
1381 if (err)
1382 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
1383 if (old_adj_index_valid)
1384 mlxsw_sp_kvdl_free(mlxsw_sp, nh_grp->adj_index);
1385 }
1386
1387 static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
1388 bool removing)
1389 {
1390 if (!removing && !nh->should_offload)
1391 nh->should_offload = 1;
1392 else if (removing && nh->offloaded)
1393 nh->should_offload = 0;
1394 nh->update = 1;
1395 }
1396
1397 static void
1398 mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
1399 struct mlxsw_sp_neigh_entry *neigh_entry,
1400 bool removing)
1401 {
1402 struct mlxsw_sp_nexthop *nh;
1403
1404 list_for_each_entry(nh, &neigh_entry->nexthop_list,
1405 neigh_list_node) {
1406 __mlxsw_sp_nexthop_neigh_update(nh, removing);
1407 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1408 }
1409 }
1410
1411 static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
1412 struct mlxsw_sp_rif *r)
1413 {
1414 if (nh->r)
1415 return;
1416
1417 nh->r = r;
1418 list_add(&nh->rif_list_node, &r->nexthop_list);
1419 }
1420
1421 static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
1422 {
1423 if (!nh->r)
1424 return;
1425
1426 list_del(&nh->rif_list_node);
1427 nh->r = NULL;
1428 }
1429
1430 static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
1431 struct mlxsw_sp_nexthop *nh)
1432 {
1433 struct mlxsw_sp_neigh_entry *neigh_entry;
1434 struct fib_nh *fib_nh = nh->key.fib_nh;
1435 struct neighbour *n;
1436 u8 nud_state, dead;
1437 int err;
1438
1439 if (!nh->nh_grp->gateway || nh->neigh_entry)
1440 return 0;
1441
1442 /* Take a reference of neigh here ensuring that neigh would
1443 * not be detructed before the nexthop entry is finished.
1444 * The reference is taken either in neigh_lookup() or
1445 * in neigh_create() in case n is not found.
1446 */
1447 n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1448 if (!n) {
1449 n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev);
1450 if (IS_ERR(n))
1451 return PTR_ERR(n);
1452 neigh_event_send(n, NULL);
1453 }
1454 neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
1455 if (!neigh_entry) {
1456 neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
1457 if (IS_ERR(neigh_entry)) {
1458 err = -EINVAL;
1459 goto err_neigh_entry_create;
1460 }
1461 }
1462
1463 /* If that is the first nexthop connected to that neigh, add to
1464 * nexthop_neighs_list
1465 */
1466 if (list_empty(&neigh_entry->nexthop_list))
1467 list_add_tail(&neigh_entry->nexthop_neighs_list_node,
1468 &mlxsw_sp->router.nexthop_neighs_list);
1469
1470 nh->neigh_entry = neigh_entry;
1471 list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
1472 read_lock_bh(&n->lock);
1473 nud_state = n->nud_state;
1474 dead = n->dead;
1475 read_unlock_bh(&n->lock);
1476 __mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
1477
1478 return 0;
1479
1480 err_neigh_entry_create:
1481 neigh_release(n);
1482 return err;
1483 }
1484
1485 static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
1486 struct mlxsw_sp_nexthop *nh)
1487 {
1488 struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
1489 struct neighbour *n;
1490
1491 if (!neigh_entry)
1492 return;
1493 n = neigh_entry->key.n;
1494
1495 __mlxsw_sp_nexthop_neigh_update(nh, true);
1496 list_del(&nh->neigh_list_node);
1497 nh->neigh_entry = NULL;
1498
1499 /* If that is the last nexthop connected to that neigh, remove from
1500 * nexthop_neighs_list
1501 */
1502 if (list_empty(&neigh_entry->nexthop_list))
1503 list_del(&neigh_entry->nexthop_neighs_list_node);
1504
1505 if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
1506 mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
1507
1508 neigh_release(n);
1509 }
1510
1511 static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp,
1512 struct mlxsw_sp_nexthop_group *nh_grp,
1513 struct mlxsw_sp_nexthop *nh,
1514 struct fib_nh *fib_nh)
1515 {
1516 struct net_device *dev = fib_nh->nh_dev;
1517 struct in_device *in_dev;
1518 struct mlxsw_sp_rif *r;
1519 int err;
1520
1521 nh->nh_grp = nh_grp;
1522 nh->key.fib_nh = fib_nh;
1523 err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
1524 if (err)
1525 return err;
1526
1527 if (!dev)
1528 return 0;
1529
1530 in_dev = __in_dev_get_rtnl(dev);
1531 if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
1532 fib_nh->nh_flags & RTNH_F_LINKDOWN)
1533 return 0;
1534
1535 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
1536 if (!r)
1537 return 0;
1538 mlxsw_sp_nexthop_rif_init(nh, r);
1539
1540 err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1541 if (err)
1542 goto err_nexthop_neigh_init;
1543
1544 return 0;
1545
1546 err_nexthop_neigh_init:
1547 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1548 return err;
1549 }
1550
1551 static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp,
1552 struct mlxsw_sp_nexthop *nh)
1553 {
1554 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1555 mlxsw_sp_nexthop_rif_fini(nh);
1556 mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
1557 }
1558
1559 static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp,
1560 unsigned long event, struct fib_nh *fib_nh)
1561 {
1562 struct mlxsw_sp_nexthop_key key;
1563 struct mlxsw_sp_nexthop *nh;
1564 struct mlxsw_sp_rif *r;
1565
1566 if (mlxsw_sp->router.aborted)
1567 return;
1568
1569 key.fib_nh = fib_nh;
1570 nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
1571 if (WARN_ON_ONCE(!nh))
1572 return;
1573
1574 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev);
1575 if (!r)
1576 return;
1577
1578 switch (event) {
1579 case FIB_EVENT_NH_ADD:
1580 mlxsw_sp_nexthop_rif_init(nh, r);
1581 mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
1582 break;
1583 case FIB_EVENT_NH_DEL:
1584 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1585 mlxsw_sp_nexthop_rif_fini(nh);
1586 break;
1587 }
1588
1589 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1590 }
1591
1592 static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
1593 struct mlxsw_sp_rif *r)
1594 {
1595 struct mlxsw_sp_nexthop *nh, *tmp;
1596
1597 list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) {
1598 mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
1599 mlxsw_sp_nexthop_rif_fini(nh);
1600 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
1601 }
1602 }
1603
1604 static struct mlxsw_sp_nexthop_group *
1605 mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
1606 {
1607 struct mlxsw_sp_nexthop_group *nh_grp;
1608 struct mlxsw_sp_nexthop *nh;
1609 struct fib_nh *fib_nh;
1610 size_t alloc_size;
1611 int i;
1612 int err;
1613
1614 alloc_size = sizeof(*nh_grp) +
1615 fi->fib_nhs * sizeof(struct mlxsw_sp_nexthop);
1616 nh_grp = kzalloc(alloc_size, GFP_KERNEL);
1617 if (!nh_grp)
1618 return ERR_PTR(-ENOMEM);
1619 INIT_LIST_HEAD(&nh_grp->fib_list);
1620 nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK;
1621 nh_grp->count = fi->fib_nhs;
1622 nh_grp->key.fi = fi;
1623 for (i = 0; i < nh_grp->count; i++) {
1624 nh = &nh_grp->nexthops[i];
1625 fib_nh = &fi->fib_nh[i];
1626 err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh);
1627 if (err)
1628 goto err_nexthop_init;
1629 }
1630 err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
1631 if (err)
1632 goto err_nexthop_group_insert;
1633 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1634 return nh_grp;
1635
1636 err_nexthop_group_insert:
1637 err_nexthop_init:
1638 for (i--; i >= 0; i--) {
1639 nh = &nh_grp->nexthops[i];
1640 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1641 }
1642 kfree(nh_grp);
1643 return ERR_PTR(err);
1644 }
1645
1646 static void
1647 mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp,
1648 struct mlxsw_sp_nexthop_group *nh_grp)
1649 {
1650 struct mlxsw_sp_nexthop *nh;
1651 int i;
1652
1653 mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
1654 for (i = 0; i < nh_grp->count; i++) {
1655 nh = &nh_grp->nexthops[i];
1656 mlxsw_sp_nexthop_fini(mlxsw_sp, nh);
1657 }
1658 mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
1659 WARN_ON_ONCE(nh_grp->adj_index_valid);
1660 kfree(nh_grp);
1661 }
1662
1663 static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp,
1664 struct mlxsw_sp_fib_entry *fib_entry,
1665 struct fib_info *fi)
1666 {
1667 struct mlxsw_sp_nexthop_group_key key;
1668 struct mlxsw_sp_nexthop_group *nh_grp;
1669
1670 key.fi = fi;
1671 nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key);
1672 if (!nh_grp) {
1673 nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi);
1674 if (IS_ERR(nh_grp))
1675 return PTR_ERR(nh_grp);
1676 }
1677 list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
1678 fib_entry->nh_group = nh_grp;
1679 return 0;
1680 }
1681
1682 static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
1683 struct mlxsw_sp_fib_entry *fib_entry)
1684 {
1685 struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
1686
1687 list_del(&fib_entry->nexthop_group_node);
1688 if (!list_empty(&nh_grp->fib_list))
1689 return;
1690 mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp);
1691 }
1692
1693 static bool
1694 mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
1695 {
1696 struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
1697
1698 if (fib_entry->params.tos)
1699 return false;
1700
1701 switch (fib_entry->type) {
1702 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1703 return !!nh_group->adj_index_valid;
1704 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1705 return !!nh_group->nh_rif;
1706 default:
1707 return false;
1708 }
1709 }
1710
1711 static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
1712 {
1713 fib_entry->offloaded = true;
1714
1715 switch (fib_entry->fib_node->vr->proto) {
1716 case MLXSW_SP_L3_PROTO_IPV4:
1717 fib_info_offload_inc(fib_entry->nh_group->key.fi);
1718 break;
1719 case MLXSW_SP_L3_PROTO_IPV6:
1720 WARN_ON_ONCE(1);
1721 }
1722 }
1723
1724 static void
1725 mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
1726 {
1727 switch (fib_entry->fib_node->vr->proto) {
1728 case MLXSW_SP_L3_PROTO_IPV4:
1729 fib_info_offload_dec(fib_entry->nh_group->key.fi);
1730 break;
1731 case MLXSW_SP_L3_PROTO_IPV6:
1732 WARN_ON_ONCE(1);
1733 }
1734
1735 fib_entry->offloaded = false;
1736 }
1737
1738 static void
1739 mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
1740 enum mlxsw_reg_ralue_op op, int err)
1741 {
1742 switch (op) {
1743 case MLXSW_REG_RALUE_OP_WRITE_DELETE:
1744 if (!fib_entry->offloaded)
1745 return;
1746 return mlxsw_sp_fib_entry_offload_unset(fib_entry);
1747 case MLXSW_REG_RALUE_OP_WRITE_WRITE:
1748 if (err)
1749 return;
1750 if (mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1751 !fib_entry->offloaded)
1752 mlxsw_sp_fib_entry_offload_set(fib_entry);
1753 else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) &&
1754 fib_entry->offloaded)
1755 mlxsw_sp_fib_entry_offload_unset(fib_entry);
1756 return;
1757 default:
1758 return;
1759 }
1760 }
1761
1762 static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp,
1763 struct mlxsw_sp_fib_entry *fib_entry,
1764 enum mlxsw_reg_ralue_op op)
1765 {
1766 char ralue_pl[MLXSW_REG_RALUE_LEN];
1767 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1768 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1769 enum mlxsw_reg_ralue_trap_action trap_action;
1770 u16 trap_id = 0;
1771 u32 adjacency_index = 0;
1772 u16 ecmp_size = 0;
1773
1774 /* In case the nexthop group adjacency index is valid, use it
1775 * with provided ECMP size. Otherwise, setup trap and pass
1776 * traffic to kernel.
1777 */
1778 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1779 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1780 adjacency_index = fib_entry->nh_group->adj_index;
1781 ecmp_size = fib_entry->nh_group->ecmp_size;
1782 } else {
1783 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1784 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1785 }
1786
1787 mlxsw_reg_ralue_pack4(ralue_pl,
1788 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1789 vr->id, fib_entry->fib_node->key.prefix_len,
1790 *p_dip);
1791 mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
1792 adjacency_index, ecmp_size);
1793 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1794 }
1795
1796 static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp,
1797 struct mlxsw_sp_fib_entry *fib_entry,
1798 enum mlxsw_reg_ralue_op op)
1799 {
1800 struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif;
1801 enum mlxsw_reg_ralue_trap_action trap_action;
1802 char ralue_pl[MLXSW_REG_RALUE_LEN];
1803 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1804 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1805 u16 trap_id = 0;
1806 u16 rif = 0;
1807
1808 if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
1809 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
1810 rif = r->rif;
1811 } else {
1812 trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
1813 trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
1814 }
1815
1816 mlxsw_reg_ralue_pack4(ralue_pl,
1817 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1818 vr->id, fib_entry->fib_node->key.prefix_len,
1819 *p_dip);
1820 mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif);
1821 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1822 }
1823
1824 static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp,
1825 struct mlxsw_sp_fib_entry *fib_entry,
1826 enum mlxsw_reg_ralue_op op)
1827 {
1828 char ralue_pl[MLXSW_REG_RALUE_LEN];
1829 u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr;
1830 struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr;
1831
1832 mlxsw_reg_ralue_pack4(ralue_pl,
1833 (enum mlxsw_reg_ralxx_protocol) vr->proto, op,
1834 vr->id, fib_entry->fib_node->key.prefix_len,
1835 *p_dip);
1836 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
1837 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
1838 }
1839
1840 static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp,
1841 struct mlxsw_sp_fib_entry *fib_entry,
1842 enum mlxsw_reg_ralue_op op)
1843 {
1844 switch (fib_entry->type) {
1845 case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
1846 return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op);
1847 case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
1848 return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op);
1849 case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
1850 return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op);
1851 }
1852 return -EINVAL;
1853 }
1854
1855 static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
1856 struct mlxsw_sp_fib_entry *fib_entry,
1857 enum mlxsw_reg_ralue_op op)
1858 {
1859 int err = -EINVAL;
1860
1861 switch (fib_entry->fib_node->vr->proto) {
1862 case MLXSW_SP_L3_PROTO_IPV4:
1863 err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op);
1864 break;
1865 case MLXSW_SP_L3_PROTO_IPV6:
1866 return err;
1867 }
1868 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err);
1869 return err;
1870 }
1871
1872 static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1873 struct mlxsw_sp_fib_entry *fib_entry)
1874 {
1875 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1876 MLXSW_REG_RALUE_OP_WRITE_WRITE);
1877 }
1878
1879 static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
1880 struct mlxsw_sp_fib_entry *fib_entry)
1881 {
1882 return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
1883 MLXSW_REG_RALUE_OP_WRITE_DELETE);
1884 }
1885
1886 static int
1887 mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
1888 const struct fib_entry_notifier_info *fen_info,
1889 struct mlxsw_sp_fib_entry *fib_entry)
1890 {
1891 struct fib_info *fi = fen_info->fi;
1892
1893 switch (fen_info->type) {
1894 case RTN_BROADCAST: /* fall through */
1895 case RTN_LOCAL:
1896 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1897 return 0;
1898 case RTN_UNREACHABLE: /* fall through */
1899 case RTN_BLACKHOLE: /* fall through */
1900 case RTN_PROHIBIT:
1901 /* Packets hitting these routes need to be trapped, but
1902 * can do so with a lower priority than packets directed
1903 * at the host, so use action type local instead of trap.
1904 */
1905 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1906 return 0;
1907 case RTN_UNICAST:
1908 if (fi->fib_nh->nh_scope != RT_SCOPE_LINK)
1909 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
1910 else
1911 fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
1912 return 0;
1913 default:
1914 return -EINVAL;
1915 }
1916 }
1917
1918 static struct mlxsw_sp_fib_entry *
1919 mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
1920 struct mlxsw_sp_fib_node *fib_node,
1921 const struct fib_entry_notifier_info *fen_info)
1922 {
1923 struct mlxsw_sp_fib_entry *fib_entry;
1924 int err;
1925
1926 fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL);
1927 if (!fib_entry) {
1928 err = -ENOMEM;
1929 goto err_fib_entry_alloc;
1930 }
1931
1932 err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
1933 if (err)
1934 goto err_fib4_entry_type_set;
1935
1936 err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi);
1937 if (err)
1938 goto err_nexthop_group_get;
1939
1940 fib_entry->params.prio = fen_info->fi->fib_priority;
1941 fib_entry->params.tb_id = fen_info->tb_id;
1942 fib_entry->params.type = fen_info->type;
1943 fib_entry->params.tos = fen_info->tos;
1944
1945 fib_entry->fib_node = fib_node;
1946
1947 return fib_entry;
1948
1949 err_nexthop_group_get:
1950 err_fib4_entry_type_set:
1951 kfree(fib_entry);
1952 err_fib_entry_alloc:
1953 return ERR_PTR(err);
1954 }
1955
1956 static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1957 struct mlxsw_sp_fib_entry *fib_entry)
1958 {
1959 mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry);
1960 kfree(fib_entry);
1961 }
1962
1963 static struct mlxsw_sp_fib_node *
1964 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
1965 const struct fib_entry_notifier_info *fen_info);
1966
1967 static struct mlxsw_sp_fib_entry *
1968 mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
1969 const struct fib_entry_notifier_info *fen_info)
1970 {
1971 struct mlxsw_sp_fib_entry *fib_entry;
1972 struct mlxsw_sp_fib_node *fib_node;
1973
1974 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
1975 if (IS_ERR(fib_node))
1976 return NULL;
1977
1978 list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
1979 if (fib_entry->params.tb_id == fen_info->tb_id &&
1980 fib_entry->params.tos == fen_info->tos &&
1981 fib_entry->params.type == fen_info->type &&
1982 fib_entry->nh_group->key.fi == fen_info->fi) {
1983 return fib_entry;
1984 }
1985 }
1986
1987 return NULL;
1988 }
1989
1990 static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
1991 .key_offset = offsetof(struct mlxsw_sp_fib_node, key),
1992 .head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
1993 .key_len = sizeof(struct mlxsw_sp_fib_key),
1994 .automatic_shrinking = true,
1995 };
1996
1997 static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
1998 struct mlxsw_sp_fib_node *fib_node)
1999 {
2000 return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
2001 mlxsw_sp_fib_ht_params);
2002 }
2003
2004 static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
2005 struct mlxsw_sp_fib_node *fib_node)
2006 {
2007 rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
2008 mlxsw_sp_fib_ht_params);
2009 }
2010
2011 static struct mlxsw_sp_fib_node *
2012 mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
2013 size_t addr_len, unsigned char prefix_len)
2014 {
2015 struct mlxsw_sp_fib_key key;
2016
2017 memset(&key, 0, sizeof(key));
2018 memcpy(key.addr, addr, addr_len);
2019 key.prefix_len = prefix_len;
2020 return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
2021 }
2022
2023 static struct mlxsw_sp_fib_node *
2024 mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr,
2025 size_t addr_len, unsigned char prefix_len)
2026 {
2027 struct mlxsw_sp_fib_node *fib_node;
2028
2029 fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
2030 if (!fib_node)
2031 return NULL;
2032
2033 INIT_LIST_HEAD(&fib_node->entry_list);
2034 list_add(&fib_node->list, &vr->fib->node_list);
2035 memcpy(fib_node->key.addr, addr, addr_len);
2036 fib_node->key.prefix_len = prefix_len;
2037 mlxsw_sp_fib_node_insert(vr->fib, fib_node);
2038 fib_node->vr = vr;
2039
2040 return fib_node;
2041 }
2042
2043 static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
2044 {
2045 mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node);
2046 list_del(&fib_node->list);
2047 WARN_ON(!list_empty(&fib_node->entry_list));
2048 kfree(fib_node);
2049 }
2050
2051 static bool
2052 mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
2053 const struct mlxsw_sp_fib_entry *fib_entry)
2054 {
2055 return list_first_entry(&fib_node->entry_list,
2056 struct mlxsw_sp_fib_entry, list) == fib_entry;
2057 }
2058
2059 static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node)
2060 {
2061 unsigned char prefix_len = fib_node->key.prefix_len;
2062 struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2063
2064 if (fib->prefix_ref_count[prefix_len]++ == 0)
2065 mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len);
2066 }
2067
2068 static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node)
2069 {
2070 unsigned char prefix_len = fib_node->key.prefix_len;
2071 struct mlxsw_sp_fib *fib = fib_node->vr->fib;
2072
2073 if (--fib->prefix_ref_count[prefix_len] == 0)
2074 mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len);
2075 }
2076
2077 static struct mlxsw_sp_fib_node *
2078 mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp,
2079 const struct fib_entry_notifier_info *fen_info)
2080 {
2081 struct mlxsw_sp_fib_node *fib_node;
2082 struct mlxsw_sp_vr *vr;
2083 int err;
2084
2085 vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id,
2086 MLXSW_SP_L3_PROTO_IPV4);
2087 if (IS_ERR(vr))
2088 return ERR_CAST(vr);
2089
2090 fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst,
2091 sizeof(fen_info->dst),
2092 fen_info->dst_len);
2093 if (fib_node)
2094 return fib_node;
2095
2096 fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst,
2097 sizeof(fen_info->dst),
2098 fen_info->dst_len);
2099 if (!fib_node) {
2100 err = -ENOMEM;
2101 goto err_fib_node_create;
2102 }
2103
2104 return fib_node;
2105
2106 err_fib_node_create:
2107 mlxsw_sp_vr_put(mlxsw_sp, vr);
2108 return ERR_PTR(err);
2109 }
2110
2111 static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp,
2112 struct mlxsw_sp_fib_node *fib_node)
2113 {
2114 struct mlxsw_sp_vr *vr = fib_node->vr;
2115
2116 if (!list_empty(&fib_node->entry_list))
2117 return;
2118 mlxsw_sp_fib_node_destroy(fib_node);
2119 mlxsw_sp_vr_put(mlxsw_sp, vr);
2120 }
2121
2122 static struct mlxsw_sp_fib_entry *
2123 mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
2124 const struct mlxsw_sp_fib_entry_params *params)
2125 {
2126 struct mlxsw_sp_fib_entry *fib_entry;
2127
2128 list_for_each_entry(fib_entry, &fib_node->entry_list, list) {
2129 if (fib_entry->params.tb_id > params->tb_id)
2130 continue;
2131 if (fib_entry->params.tb_id != params->tb_id)
2132 break;
2133 if (fib_entry->params.tos > params->tos)
2134 continue;
2135 if (fib_entry->params.prio >= params->prio ||
2136 fib_entry->params.tos < params->tos)
2137 return fib_entry;
2138 }
2139
2140 return NULL;
2141 }
2142
2143 static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry,
2144 struct mlxsw_sp_fib_entry *new_entry)
2145 {
2146 struct mlxsw_sp_fib_node *fib_node;
2147
2148 if (WARN_ON(!fib_entry))
2149 return -EINVAL;
2150
2151 fib_node = fib_entry->fib_node;
2152 list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) {
2153 if (fib_entry->params.tb_id != new_entry->params.tb_id ||
2154 fib_entry->params.tos != new_entry->params.tos ||
2155 fib_entry->params.prio != new_entry->params.prio)
2156 break;
2157 }
2158
2159 list_add_tail(&new_entry->list, &fib_entry->list);
2160 return 0;
2161 }
2162
2163 static int
2164 mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node,
2165 struct mlxsw_sp_fib_entry *new_entry,
2166 bool replace, bool append)
2167 {
2168 struct mlxsw_sp_fib_entry *fib_entry;
2169
2170 fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params);
2171
2172 if (append)
2173 return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry);
2174 if (replace && WARN_ON(!fib_entry))
2175 return -EINVAL;
2176
2177 /* Insert new entry before replaced one, so that we can later
2178 * remove the second.
2179 */
2180 if (fib_entry) {
2181 list_add_tail(&new_entry->list, &fib_entry->list);
2182 } else {
2183 struct mlxsw_sp_fib_entry *last;
2184
2185 list_for_each_entry(last, &fib_node->entry_list, list) {
2186 if (new_entry->params.tb_id > last->params.tb_id)
2187 break;
2188 fib_entry = last;
2189 }
2190
2191 if (fib_entry)
2192 list_add(&new_entry->list, &fib_entry->list);
2193 else
2194 list_add(&new_entry->list, &fib_node->entry_list);
2195 }
2196
2197 return 0;
2198 }
2199
2200 static void
2201 mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry)
2202 {
2203 list_del(&fib_entry->list);
2204 }
2205
2206 static int
2207 mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp,
2208 const struct mlxsw_sp_fib_node *fib_node,
2209 struct mlxsw_sp_fib_entry *fib_entry)
2210 {
2211 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2212 return 0;
2213
2214 /* To prevent packet loss, overwrite the previously offloaded
2215 * entry.
2216 */
2217 if (!list_is_singular(&fib_node->entry_list)) {
2218 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2219 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2220
2221 mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
2222 }
2223
2224 return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
2225 }
2226
2227 static void
2228 mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp,
2229 const struct mlxsw_sp_fib_node *fib_node,
2230 struct mlxsw_sp_fib_entry *fib_entry)
2231 {
2232 if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
2233 return;
2234
2235 /* Promote the next entry by overwriting the deleted entry */
2236 if (!list_is_singular(&fib_node->entry_list)) {
2237 struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
2238 enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
2239
2240 mlxsw_sp_fib_entry_update(mlxsw_sp, n);
2241 mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
2242 return;
2243 }
2244
2245 mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
2246 }
2247
2248 static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
2249 struct mlxsw_sp_fib_entry *fib_entry,
2250 bool replace, bool append)
2251 {
2252 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2253 int err;
2254
2255 err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace,
2256 append);
2257 if (err)
2258 return err;
2259
2260 err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry);
2261 if (err)
2262 goto err_fib4_node_entry_add;
2263
2264 mlxsw_sp_fib_node_prefix_inc(fib_node);
2265
2266 return 0;
2267
2268 err_fib4_node_entry_add:
2269 mlxsw_sp_fib4_node_list_remove(fib_entry);
2270 return err;
2271 }
2272
2273 static void
2274 mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
2275 struct mlxsw_sp_fib_entry *fib_entry)
2276 {
2277 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2278
2279 mlxsw_sp_fib_node_prefix_dec(fib_node);
2280 mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry);
2281 mlxsw_sp_fib4_node_list_remove(fib_entry);
2282 }
2283
2284 static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
2285 struct mlxsw_sp_fib_entry *fib_entry,
2286 bool replace)
2287 {
2288 struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
2289 struct mlxsw_sp_fib_entry *replaced;
2290
2291 if (!replace)
2292 return;
2293
2294 /* We inserted the new entry before replaced one */
2295 replaced = list_next_entry(fib_entry, list);
2296
2297 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
2298 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
2299 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2300 }
2301
2302 static int
2303 mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
2304 const struct fib_entry_notifier_info *fen_info,
2305 bool replace, bool append)
2306 {
2307 struct mlxsw_sp_fib_entry *fib_entry;
2308 struct mlxsw_sp_fib_node *fib_node;
2309 int err;
2310
2311 if (mlxsw_sp->router.aborted)
2312 return 0;
2313
2314 fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info);
2315 if (IS_ERR(fib_node)) {
2316 dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
2317 return PTR_ERR(fib_node);
2318 }
2319
2320 fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
2321 if (IS_ERR(fib_entry)) {
2322 dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
2323 err = PTR_ERR(fib_entry);
2324 goto err_fib4_entry_create;
2325 }
2326
2327 err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace,
2328 append);
2329 if (err) {
2330 dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
2331 goto err_fib4_node_entry_link;
2332 }
2333
2334 mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace);
2335
2336 return 0;
2337
2338 err_fib4_node_entry_link:
2339 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2340 err_fib4_entry_create:
2341 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2342 return err;
2343 }
2344
2345 static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
2346 struct fib_entry_notifier_info *fen_info)
2347 {
2348 struct mlxsw_sp_fib_entry *fib_entry;
2349 struct mlxsw_sp_fib_node *fib_node;
2350
2351 if (mlxsw_sp->router.aborted)
2352 return;
2353
2354 fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
2355 if (WARN_ON(!fib_entry))
2356 return;
2357 fib_node = fib_entry->fib_node;
2358
2359 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2360 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2361 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2362 }
2363
2364 static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
2365 {
2366 char ralta_pl[MLXSW_REG_RALTA_LEN];
2367 char ralst_pl[MLXSW_REG_RALST_LEN];
2368 char raltb_pl[MLXSW_REG_RALTB_LEN];
2369 char ralue_pl[MLXSW_REG_RALUE_LEN];
2370 int err;
2371
2372 mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2373 MLXSW_SP_LPM_TREE_MIN);
2374 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
2375 if (err)
2376 return err;
2377
2378 mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN);
2379 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
2380 if (err)
2381 return err;
2382
2383 mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4,
2384 MLXSW_SP_LPM_TREE_MIN);
2385 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
2386 if (err)
2387 return err;
2388
2389 mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
2390 MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);
2391 mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
2392 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
2393 }
2394
2395 static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
2396 struct mlxsw_sp_fib_node *fib_node)
2397 {
2398 struct mlxsw_sp_fib_entry *fib_entry, *tmp;
2399
2400 list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) {
2401 bool do_break = &tmp->list == &fib_node->entry_list;
2402
2403 mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry);
2404 mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry);
2405 mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node);
2406 /* Break when entry list is empty and node was freed.
2407 * Otherwise, we'll access freed memory in the next
2408 * iteration.
2409 */
2410 if (do_break)
2411 break;
2412 }
2413 }
2414
2415 static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
2416 struct mlxsw_sp_fib_node *fib_node)
2417 {
2418 switch (fib_node->vr->proto) {
2419 case MLXSW_SP_L3_PROTO_IPV4:
2420 mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
2421 break;
2422 case MLXSW_SP_L3_PROTO_IPV6:
2423 WARN_ON_ONCE(1);
2424 break;
2425 }
2426 }
2427
2428 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
2429 {
2430 struct mlxsw_sp_fib_node *fib_node, *tmp;
2431 struct mlxsw_sp_vr *vr;
2432 int i;
2433
2434 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
2435 vr = &mlxsw_sp->router.vrs[i];
2436
2437 if (!vr->used)
2438 continue;
2439
2440 list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list,
2441 list) {
2442 bool do_break = &tmp->list == &vr->fib->node_list;
2443
2444 mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
2445 if (do_break)
2446 break;
2447 }
2448 }
2449 }
2450
2451 static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
2452 {
2453 int err;
2454
2455 if (mlxsw_sp->router.aborted)
2456 return;
2457 dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
2458 mlxsw_sp_router_fib_flush(mlxsw_sp);
2459 mlxsw_sp->router.aborted = true;
2460 err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
2461 if (err)
2462 dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
2463 }
2464
2465 struct mlxsw_sp_fib_event_work {
2466 struct work_struct work;
2467 union {
2468 struct fib_entry_notifier_info fen_info;
2469 struct fib_nh_notifier_info fnh_info;
2470 };
2471 struct mlxsw_sp *mlxsw_sp;
2472 unsigned long event;
2473 };
2474
2475 static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
2476 {
2477 struct mlxsw_sp_fib_event_work *fib_work =
2478 container_of(work, struct mlxsw_sp_fib_event_work, work);
2479 struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
2480 bool replace, append;
2481 int err;
2482
2483 /* Protect internal structures from changes */
2484 rtnl_lock();
2485 switch (fib_work->event) {
2486 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2487 case FIB_EVENT_ENTRY_APPEND: /* fall through */
2488 case FIB_EVENT_ENTRY_ADD:
2489 replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
2490 append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
2491 err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
2492 replace, append);
2493 if (err)
2494 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2495 fib_info_put(fib_work->fen_info.fi);
2496 break;
2497 case FIB_EVENT_ENTRY_DEL:
2498 mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
2499 fib_info_put(fib_work->fen_info.fi);
2500 break;
2501 case FIB_EVENT_RULE_ADD: /* fall through */
2502 case FIB_EVENT_RULE_DEL:
2503 mlxsw_sp_router_fib4_abort(mlxsw_sp);
2504 break;
2505 case FIB_EVENT_NH_ADD: /* fall through */
2506 case FIB_EVENT_NH_DEL:
2507 mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event,
2508 fib_work->fnh_info.fib_nh);
2509 fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
2510 break;
2511 }
2512 rtnl_unlock();
2513 kfree(fib_work);
2514 }
2515
2516 /* Called with rcu_read_lock() */
2517 static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
2518 unsigned long event, void *ptr)
2519 {
2520 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
2521 struct mlxsw_sp_fib_event_work *fib_work;
2522 struct fib_notifier_info *info = ptr;
2523
2524 if (!net_eq(info->net, &init_net))
2525 return NOTIFY_DONE;
2526
2527 fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
2528 if (WARN_ON(!fib_work))
2529 return NOTIFY_BAD;
2530
2531 INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work);
2532 fib_work->mlxsw_sp = mlxsw_sp;
2533 fib_work->event = event;
2534
2535 switch (event) {
2536 case FIB_EVENT_ENTRY_REPLACE: /* fall through */
2537 case FIB_EVENT_ENTRY_APPEND: /* fall through */
2538 case FIB_EVENT_ENTRY_ADD: /* fall through */
2539 case FIB_EVENT_ENTRY_DEL:
2540 memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
2541 /* Take referece on fib_info to prevent it from being
2542 * freed while work is queued. Release it afterwards.
2543 */
2544 fib_info_hold(fib_work->fen_info.fi);
2545 break;
2546 case FIB_EVENT_NH_ADD: /* fall through */
2547 case FIB_EVENT_NH_DEL:
2548 memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info));
2549 fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
2550 break;
2551 }
2552
2553 mlxsw_core_schedule_work(&fib_work->work);
2554
2555 return NOTIFY_DONE;
2556 }
2557
2558 static struct mlxsw_sp_rif *
2559 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
2560 const struct net_device *dev)
2561 {
2562 int i;
2563
2564 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2565 if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
2566 return mlxsw_sp->rifs[i];
2567
2568 return NULL;
2569 }
2570
2571 static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
2572 {
2573 char ritr_pl[MLXSW_REG_RITR_LEN];
2574 int err;
2575
2576 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
2577 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2578 if (WARN_ON_ONCE(err))
2579 return err;
2580
2581 mlxsw_reg_ritr_enable_set(ritr_pl, false);
2582 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2583 }
2584
2585 static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2586 struct mlxsw_sp_rif *r)
2587 {
2588 mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif);
2589 mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r);
2590 mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r);
2591 }
2592
2593 static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
2594 const struct in_device *in_dev,
2595 unsigned long event)
2596 {
2597 switch (event) {
2598 case NETDEV_UP:
2599 if (!r)
2600 return true;
2601 return false;
2602 case NETDEV_DOWN:
2603 if (r && !in_dev->ifa_list)
2604 return true;
2605 /* It is possible we already removed the RIF ourselves
2606 * if it was assigned to a netdev that is now a bridge
2607 * or LAG slave.
2608 */
2609 return false;
2610 }
2611
2612 return false;
2613 }
2614
2615 #define MLXSW_SP_INVALID_RIF 0xffff
2616 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
2617 {
2618 int i;
2619
2620 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
2621 if (!mlxsw_sp->rifs[i])
2622 return i;
2623
2624 return MLXSW_SP_INVALID_RIF;
2625 }
2626
2627 static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
2628 bool *p_lagged, u16 *p_system_port)
2629 {
2630 u8 local_port = mlxsw_sp_vport->local_port;
2631
2632 *p_lagged = mlxsw_sp_vport->lagged;
2633 *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port;
2634 }
2635
2636 static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport,
2637 struct net_device *l3_dev, u16 rif,
2638 bool create)
2639 {
2640 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2641 bool lagged = mlxsw_sp_vport->lagged;
2642 char ritr_pl[MLXSW_REG_RITR_LEN];
2643 u16 system_port;
2644
2645 mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif,
2646 l3_dev->mtu, l3_dev->dev_addr);
2647
2648 mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port);
2649 mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port,
2650 mlxsw_sp_vport_vid_get(mlxsw_sp_vport));
2651
2652 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2653 }
2654
2655 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport);
2656
2657 static u16 mlxsw_sp_rif_sp_to_fid(u16 rif)
2658 {
2659 return MLXSW_SP_RFID_BASE + rif;
2660 }
2661
2662 static struct mlxsw_sp_fid *
2663 mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev)
2664 {
2665 struct mlxsw_sp_fid *f;
2666
2667 f = kzalloc(sizeof(*f), GFP_KERNEL);
2668 if (!f)
2669 return NULL;
2670
2671 f->leave = mlxsw_sp_vport_rif_sp_leave;
2672 f->ref_count = 0;
2673 f->dev = l3_dev;
2674 f->fid = fid;
2675
2676 return f;
2677 }
2678
2679 static struct mlxsw_sp_rif *
2680 mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f)
2681 {
2682 struct mlxsw_sp_rif *r;
2683
2684 r = kzalloc(sizeof(*r), GFP_KERNEL);
2685 if (!r)
2686 return NULL;
2687
2688 INIT_LIST_HEAD(&r->nexthop_list);
2689 INIT_LIST_HEAD(&r->neigh_list);
2690 ether_addr_copy(r->addr, l3_dev->dev_addr);
2691 r->mtu = l3_dev->mtu;
2692 r->dev = l3_dev;
2693 r->rif = rif;
2694 r->f = f;
2695
2696 return r;
2697 }
2698
2699 static struct mlxsw_sp_rif *
2700 mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
2701 struct net_device *l3_dev)
2702 {
2703 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2704 struct mlxsw_sp_fid *f;
2705 struct mlxsw_sp_rif *r;
2706 u16 fid, rif;
2707 int err;
2708
2709 rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
2710 if (rif == MLXSW_SP_INVALID_RIF)
2711 return ERR_PTR(-ERANGE);
2712
2713 err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
2714 if (err)
2715 return ERR_PTR(err);
2716
2717 fid = mlxsw_sp_rif_sp_to_fid(rif);
2718 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true);
2719 if (err)
2720 goto err_rif_fdb_op;
2721
2722 f = mlxsw_sp_rfid_alloc(fid, l3_dev);
2723 if (!f) {
2724 err = -ENOMEM;
2725 goto err_rfid_alloc;
2726 }
2727
2728 r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
2729 if (!r) {
2730 err = -ENOMEM;
2731 goto err_rif_alloc;
2732 }
2733
2734 f->r = r;
2735 mlxsw_sp->rifs[rif] = r;
2736
2737 return r;
2738
2739 err_rif_alloc:
2740 kfree(f);
2741 err_rfid_alloc:
2742 mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
2743 err_rif_fdb_op:
2744 mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
2745 return ERR_PTR(err);
2746 }
2747
2748 static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport,
2749 struct mlxsw_sp_rif *r)
2750 {
2751 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2752 struct net_device *l3_dev = r->dev;
2753 struct mlxsw_sp_fid *f = r->f;
2754 u16 fid = f->fid;
2755 u16 rif = r->rif;
2756
2757 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
2758
2759 mlxsw_sp->rifs[rif] = NULL;
2760 f->r = NULL;
2761
2762 kfree(r);
2763
2764 kfree(f);
2765
2766 mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false);
2767
2768 mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false);
2769 }
2770
2771 static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport,
2772 struct net_device *l3_dev)
2773 {
2774 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp;
2775 struct mlxsw_sp_rif *r;
2776
2777 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
2778 if (!r) {
2779 r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev);
2780 if (IS_ERR(r))
2781 return PTR_ERR(r);
2782 }
2783
2784 mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f);
2785 r->f->ref_count++;
2786
2787 netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid);
2788
2789 return 0;
2790 }
2791
2792 static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport)
2793 {
2794 struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport);
2795
2796 netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid);
2797
2798 mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL);
2799 if (--f->ref_count == 0)
2800 mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r);
2801 }
2802
2803 static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev,
2804 struct net_device *port_dev,
2805 unsigned long event, u16 vid)
2806 {
2807 struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
2808 struct mlxsw_sp_port *mlxsw_sp_vport;
2809
2810 mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid);
2811 if (WARN_ON(!mlxsw_sp_vport))
2812 return -EINVAL;
2813
2814 switch (event) {
2815 case NETDEV_UP:
2816 return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev);
2817 case NETDEV_DOWN:
2818 mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport);
2819 break;
2820 }
2821
2822 return 0;
2823 }
2824
2825 static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
2826 unsigned long event)
2827 {
2828 if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev))
2829 return 0;
2830
2831 return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1);
2832 }
2833
2834 static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
2835 struct net_device *lag_dev,
2836 unsigned long event, u16 vid)
2837 {
2838 struct net_device *port_dev;
2839 struct list_head *iter;
2840 int err;
2841
2842 netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
2843 if (mlxsw_sp_port_dev_check(port_dev)) {
2844 err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev,
2845 event, vid);
2846 if (err)
2847 return err;
2848 }
2849 }
2850
2851 return 0;
2852 }
2853
2854 static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
2855 unsigned long event)
2856 {
2857 if (netif_is_bridge_port(lag_dev))
2858 return 0;
2859
2860 return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
2861 }
2862
2863 static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp,
2864 struct net_device *l3_dev)
2865 {
2866 u16 fid;
2867
2868 if (is_vlan_dev(l3_dev))
2869 fid = vlan_dev_vlan_id(l3_dev);
2870 else if (mlxsw_sp->master_bridge.dev == l3_dev)
2871 fid = 1;
2872 else
2873 return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev);
2874
2875 return mlxsw_sp_fid_find(mlxsw_sp, fid);
2876 }
2877
2878 static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid)
2879 {
2880 return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID :
2881 MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST;
2882 }
2883
2884 static u16 mlxsw_sp_flood_table_index_get(u16 fid)
2885 {
2886 return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid;
2887 }
2888
2889 static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid,
2890 bool set)
2891 {
2892 enum mlxsw_flood_table_type table_type;
2893 char *sftr_pl;
2894 u16 index;
2895 int err;
2896
2897 sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL);
2898 if (!sftr_pl)
2899 return -ENOMEM;
2900
2901 table_type = mlxsw_sp_flood_table_type_get(fid);
2902 index = mlxsw_sp_flood_table_index_get(fid);
2903 mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type,
2904 1, MLXSW_PORT_ROUTER_PORT, set);
2905 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl);
2906
2907 kfree(sftr_pl);
2908 return err;
2909 }
2910
2911 static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid)
2912 {
2913 if (mlxsw_sp_fid_is_vfid(fid))
2914 return MLXSW_REG_RITR_FID_IF;
2915 else
2916 return MLXSW_REG_RITR_VLAN_IF;
2917 }
2918
2919 static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp,
2920 struct net_device *l3_dev,
2921 u16 fid, u16 rif,
2922 bool create)
2923 {
2924 enum mlxsw_reg_ritr_if_type rif_type;
2925 char ritr_pl[MLXSW_REG_RITR_LEN];
2926
2927 rif_type = mlxsw_sp_rif_type_get(fid);
2928 mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu,
2929 l3_dev->dev_addr);
2930 mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid);
2931
2932 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
2933 }
2934
2935 static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
2936 struct net_device *l3_dev,
2937 struct mlxsw_sp_fid *f)
2938 {
2939 struct mlxsw_sp_rif *r;
2940 u16 rif;
2941 int err;
2942
2943 rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
2944 if (rif == MLXSW_SP_INVALID_RIF)
2945 return -ERANGE;
2946
2947 err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
2948 if (err)
2949 return err;
2950
2951 err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true);
2952 if (err)
2953 goto err_rif_bridge_op;
2954
2955 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true);
2956 if (err)
2957 goto err_rif_fdb_op;
2958
2959 r = mlxsw_sp_rif_alloc(rif, l3_dev, f);
2960 if (!r) {
2961 err = -ENOMEM;
2962 goto err_rif_alloc;
2963 }
2964
2965 f->r = r;
2966 mlxsw_sp->rifs[rif] = r;
2967
2968 netdev_dbg(l3_dev, "RIF=%d created\n", rif);
2969
2970 return 0;
2971
2972 err_rif_alloc:
2973 mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
2974 err_rif_fdb_op:
2975 mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
2976 err_rif_bridge_op:
2977 mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
2978 return err;
2979 }
2980
2981 void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp,
2982 struct mlxsw_sp_rif *r)
2983 {
2984 struct net_device *l3_dev = r->dev;
2985 struct mlxsw_sp_fid *f = r->f;
2986 u16 rif = r->rif;
2987
2988 mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r);
2989
2990 mlxsw_sp->rifs[rif] = NULL;
2991 f->r = NULL;
2992
2993 kfree(r);
2994
2995 mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false);
2996
2997 mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false);
2998
2999 mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false);
3000
3001 netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif);
3002 }
3003
3004 static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
3005 struct net_device *br_dev,
3006 unsigned long event)
3007 {
3008 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
3009 struct mlxsw_sp_fid *f;
3010
3011 /* FID can either be an actual FID if the L3 device is the
3012 * VLAN-aware bridge or a VLAN device on top. Otherwise, the
3013 * L3 device is a VLAN-unaware bridge and we get a vFID.
3014 */
3015 f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev);
3016 if (WARN_ON(!f))
3017 return -EINVAL;
3018
3019 switch (event) {
3020 case NETDEV_UP:
3021 return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f);
3022 case NETDEV_DOWN:
3023 mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r);
3024 break;
3025 }
3026
3027 return 0;
3028 }
3029
3030 static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
3031 unsigned long event)
3032 {
3033 struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
3034 struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev);
3035 u16 vid = vlan_dev_vlan_id(vlan_dev);
3036
3037 if (mlxsw_sp_port_dev_check(real_dev))
3038 return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event,
3039 vid);
3040 else if (netif_is_lag_master(real_dev))
3041 return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
3042 vid);
3043 else if (netif_is_bridge_master(real_dev) &&
3044 mlxsw_sp->master_bridge.dev == real_dev)
3045 return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev,
3046 event);
3047
3048 return 0;
3049 }
3050
3051 int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
3052 unsigned long event, void *ptr)
3053 {
3054 struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
3055 struct net_device *dev = ifa->ifa_dev->dev;
3056 struct mlxsw_sp *mlxsw_sp;
3057 struct mlxsw_sp_rif *r;
3058 int err = 0;
3059
3060 mlxsw_sp = mlxsw_sp_lower_get(dev);
3061 if (!mlxsw_sp)
3062 goto out;
3063
3064 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3065 if (!mlxsw_sp_rif_should_config(r, ifa->ifa_dev, event))
3066 goto out;
3067
3068 if (mlxsw_sp_port_dev_check(dev))
3069 err = mlxsw_sp_inetaddr_port_event(dev, event);
3070 else if (netif_is_lag_master(dev))
3071 err = mlxsw_sp_inetaddr_lag_event(dev, event);
3072 else if (netif_is_bridge_master(dev))
3073 err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event);
3074 else if (is_vlan_dev(dev))
3075 err = mlxsw_sp_inetaddr_vlan_event(dev, event);
3076
3077 out:
3078 return notifier_from_errno(err);
3079 }
3080
3081 static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif,
3082 const char *mac, int mtu)
3083 {
3084 char ritr_pl[MLXSW_REG_RITR_LEN];
3085 int err;
3086
3087 mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
3088 err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3089 if (err)
3090 return err;
3091
3092 mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
3093 mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
3094 mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
3095 return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
3096 }
3097
3098 int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
3099 {
3100 struct mlxsw_sp *mlxsw_sp;
3101 struct mlxsw_sp_rif *r;
3102 int err;
3103
3104 mlxsw_sp = mlxsw_sp_lower_get(dev);
3105 if (!mlxsw_sp)
3106 return 0;
3107
3108 r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3109 if (!r)
3110 return 0;
3111
3112 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false);
3113 if (err)
3114 return err;
3115
3116 err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu);
3117 if (err)
3118 goto err_rif_edit;
3119
3120 err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true);
3121 if (err)
3122 goto err_rif_fdb_op;
3123
3124 ether_addr_copy(r->addr, dev->dev_addr);
3125 r->mtu = dev->mtu;
3126
3127 netdev_dbg(dev, "Updated RIF=%d\n", r->rif);
3128
3129 return 0;
3130
3131 err_rif_fdb_op:
3132 mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu);
3133 err_rif_edit:
3134 mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true);
3135 return err;
3136 }
3137
3138 static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
3139 {
3140 struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
3141
3142 /* Flush pending FIB notifications and then flush the device's
3143 * table before requesting another dump. The FIB notification
3144 * block is unregistered, so no need to take RTNL.
3145 */
3146 mlxsw_core_flush_owq();
3147 mlxsw_sp_router_fib_flush(mlxsw_sp);
3148 }
3149
3150 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3151 {
3152 char rgcr_pl[MLXSW_REG_RGCR_LEN];
3153 u64 max_rifs;
3154 int err;
3155
3156 if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
3157 return -EIO;
3158
3159 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
3160 mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
3161 GFP_KERNEL);
3162 if (!mlxsw_sp->rifs)
3163 return -ENOMEM;
3164
3165 mlxsw_reg_rgcr_pack(rgcr_pl, true);
3166 mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
3167 err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3168 if (err)
3169 goto err_rgcr_fail;
3170
3171 return 0;
3172
3173 err_rgcr_fail:
3174 kfree(mlxsw_sp->rifs);
3175 return err;
3176 }
3177
3178 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3179 {
3180 char rgcr_pl[MLXSW_REG_RGCR_LEN];
3181 int i;
3182
3183 mlxsw_reg_rgcr_pack(rgcr_pl, false);
3184 mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
3185
3186 for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
3187 WARN_ON_ONCE(mlxsw_sp->rifs[i]);
3188
3189 kfree(mlxsw_sp->rifs);
3190 }
3191
3192 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
3193 {
3194 int err;
3195
3196 INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list);
3197 err = __mlxsw_sp_router_init(mlxsw_sp);
3198 if (err)
3199 return err;
3200
3201 err = rhashtable_init(&mlxsw_sp->router.nexthop_ht,
3202 &mlxsw_sp_nexthop_ht_params);
3203 if (err)
3204 goto err_nexthop_ht_init;
3205
3206 err = rhashtable_init(&mlxsw_sp->router.nexthop_group_ht,
3207 &mlxsw_sp_nexthop_group_ht_params);
3208 if (err)
3209 goto err_nexthop_group_ht_init;
3210
3211 mlxsw_sp_lpm_init(mlxsw_sp);
3212 err = mlxsw_sp_vrs_init(mlxsw_sp);
3213 if (err)
3214 goto err_vrs_init;
3215
3216 err = mlxsw_sp_neigh_init(mlxsw_sp);
3217 if (err)
3218 goto err_neigh_init;
3219
3220 mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
3221 err = register_fib_notifier(&mlxsw_sp->fib_nb,
3222 mlxsw_sp_router_fib_dump_flush);
3223 if (err)
3224 goto err_register_fib_notifier;
3225
3226 return 0;
3227
3228 err_register_fib_notifier:
3229 mlxsw_sp_neigh_fini(mlxsw_sp);
3230 err_neigh_init:
3231 mlxsw_sp_vrs_fini(mlxsw_sp);
3232 err_vrs_init:
3233 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3234 err_nexthop_group_ht_init:
3235 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3236 err_nexthop_ht_init:
3237 __mlxsw_sp_router_fini(mlxsw_sp);
3238 return err;
3239 }
3240
3241 void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
3242 {
3243 unregister_fib_notifier(&mlxsw_sp->fib_nb);
3244 mlxsw_sp_neigh_fini(mlxsw_sp);
3245 mlxsw_sp_vrs_fini(mlxsw_sp);
3246 rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht);
3247 rhashtable_destroy(&mlxsw_sp->router.nexthop_ht);
3248 __mlxsw_sp_router_fini(mlxsw_sp);
3249 }