]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - net/ipv4/nexthop.c
11bfb1eb7f84ad6a475792ea261fe66c52884b54
[mirror_ubuntu-hirsute-kernel.git] / net / ipv4 / nexthop.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Generic nexthop implementation
3 *
4 * Copyright (c) 2017-19 Cumulus Networks
5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
6 */
7
8 #include <linux/nexthop.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/slab.h>
11 #include <net/arp.h>
12 #include <net/ipv6_stubs.h>
13 #include <net/lwtunnel.h>
14 #include <net/ndisc.h>
15 #include <net/nexthop.h>
16 #include <net/route.h>
17 #include <net/sock.h>
18
19 static void remove_nexthop(struct net *net, struct nexthop *nh,
20 struct nl_info *nlinfo);
21
22 #define NH_DEV_HASHBITS 8
23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
24
25 static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
26 [NHA_ID] = { .type = NLA_U32 },
27 [NHA_GROUP] = { .type = NLA_BINARY },
28 [NHA_GROUP_TYPE] = { .type = NLA_U16 },
29 [NHA_BLACKHOLE] = { .type = NLA_FLAG },
30 [NHA_OIF] = { .type = NLA_U32 },
31 [NHA_GATEWAY] = { .type = NLA_BINARY },
32 [NHA_ENCAP_TYPE] = { .type = NLA_U16 },
33 [NHA_ENCAP] = { .type = NLA_NESTED },
34 [NHA_GROUPS] = { .type = NLA_FLAG },
35 [NHA_MASTER] = { .type = NLA_U32 },
36 [NHA_FDB] = { .type = NLA_FLAG },
37 };
38
39 static bool nexthop_notifiers_is_empty(struct net *net)
40 {
41 return !net->nexthop.notifier_chain.head;
42 }
43
44 static void
45 __nh_notifier_single_info_init(struct nh_notifier_single_info *nh_info,
46 const struct nexthop *nh)
47 {
48 struct nh_info *nhi = rtnl_dereference(nh->nh_info);
49
50 nh_info->dev = nhi->fib_nhc.nhc_dev;
51 nh_info->gw_family = nhi->fib_nhc.nhc_gw_family;
52 if (nh_info->gw_family == AF_INET)
53 nh_info->ipv4 = nhi->fib_nhc.nhc_gw.ipv4;
54 else if (nh_info->gw_family == AF_INET6)
55 nh_info->ipv6 = nhi->fib_nhc.nhc_gw.ipv6;
56
57 nh_info->is_reject = nhi->reject_nh;
58 nh_info->is_fdb = nhi->fdb_nh;
59 nh_info->has_encap = !!nhi->fib_nhc.nhc_lwtstate;
60 }
61
62 static int nh_notifier_single_info_init(struct nh_notifier_info *info,
63 const struct nexthop *nh)
64 {
65 info->nh = kzalloc(sizeof(*info->nh), GFP_KERNEL);
66 if (!info->nh)
67 return -ENOMEM;
68
69 __nh_notifier_single_info_init(info->nh, nh);
70
71 return 0;
72 }
73
74 static void nh_notifier_single_info_fini(struct nh_notifier_info *info)
75 {
76 kfree(info->nh);
77 }
78
79 static int nh_notifier_grp_info_init(struct nh_notifier_info *info,
80 const struct nexthop *nh)
81 {
82 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
83 u16 num_nh = nhg->num_nh;
84 int i;
85
86 info->nh_grp = kzalloc(struct_size(info->nh_grp, nh_entries, num_nh),
87 GFP_KERNEL);
88 if (!info->nh_grp)
89 return -ENOMEM;
90
91 info->nh_grp->num_nh = num_nh;
92 info->nh_grp->is_fdb = nhg->fdb_nh;
93
94 for (i = 0; i < num_nh; i++) {
95 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
96
97 info->nh_grp->nh_entries[i].id = nhge->nh->id;
98 info->nh_grp->nh_entries[i].weight = nhge->weight;
99 __nh_notifier_single_info_init(&info->nh_grp->nh_entries[i].nh,
100 nhge->nh);
101 }
102
103 return 0;
104 }
105
106 static void nh_notifier_grp_info_fini(struct nh_notifier_info *info)
107 {
108 kfree(info->nh_grp);
109 }
110
111 static int nh_notifier_info_init(struct nh_notifier_info *info,
112 const struct nexthop *nh)
113 {
114 info->id = nh->id;
115 info->is_grp = nh->is_group;
116
117 if (info->is_grp)
118 return nh_notifier_grp_info_init(info, nh);
119 else
120 return nh_notifier_single_info_init(info, nh);
121 }
122
123 static void nh_notifier_info_fini(struct nh_notifier_info *info)
124 {
125 if (info->is_grp)
126 nh_notifier_grp_info_fini(info);
127 else
128 nh_notifier_single_info_fini(info);
129 }
130
131 static int call_nexthop_notifiers(struct net *net,
132 enum nexthop_event_type event_type,
133 struct nexthop *nh,
134 struct netlink_ext_ack *extack)
135 {
136 struct nh_notifier_info info = {
137 .net = net,
138 .extack = extack,
139 };
140 int err;
141
142 ASSERT_RTNL();
143
144 if (nexthop_notifiers_is_empty(net))
145 return 0;
146
147 err = nh_notifier_info_init(&info, nh);
148 if (err) {
149 NL_SET_ERR_MSG(extack, "Failed to initialize nexthop notifier info");
150 return err;
151 }
152
153 err = blocking_notifier_call_chain(&net->nexthop.notifier_chain,
154 event_type, &info);
155 nh_notifier_info_fini(&info);
156
157 return notifier_to_errno(err);
158 }
159
160 static unsigned int nh_dev_hashfn(unsigned int val)
161 {
162 unsigned int mask = NH_DEV_HASHSIZE - 1;
163
164 return (val ^
165 (val >> NH_DEV_HASHBITS) ^
166 (val >> (NH_DEV_HASHBITS * 2))) & mask;
167 }
168
169 static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
170 {
171 struct net_device *dev = nhi->fib_nhc.nhc_dev;
172 struct hlist_head *head;
173 unsigned int hash;
174
175 WARN_ON(!dev);
176
177 hash = nh_dev_hashfn(dev->ifindex);
178 head = &net->nexthop.devhash[hash];
179 hlist_add_head(&nhi->dev_hash, head);
180 }
181
182 static void nexthop_free_mpath(struct nexthop *nh)
183 {
184 struct nh_group *nhg;
185 int i;
186
187 nhg = rcu_dereference_raw(nh->nh_grp);
188 for (i = 0; i < nhg->num_nh; ++i) {
189 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
190
191 WARN_ON(!list_empty(&nhge->nh_list));
192 nexthop_put(nhge->nh);
193 }
194
195 WARN_ON(nhg->spare == nhg);
196
197 kfree(nhg->spare);
198 kfree(nhg);
199 }
200
201 static void nexthop_free_single(struct nexthop *nh)
202 {
203 struct nh_info *nhi;
204
205 nhi = rcu_dereference_raw(nh->nh_info);
206 switch (nhi->family) {
207 case AF_INET:
208 fib_nh_release(nh->net, &nhi->fib_nh);
209 break;
210 case AF_INET6:
211 ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
212 break;
213 }
214 kfree(nhi);
215 }
216
217 void nexthop_free_rcu(struct rcu_head *head)
218 {
219 struct nexthop *nh = container_of(head, struct nexthop, rcu);
220
221 if (nh->is_group)
222 nexthop_free_mpath(nh);
223 else
224 nexthop_free_single(nh);
225
226 kfree(nh);
227 }
228 EXPORT_SYMBOL_GPL(nexthop_free_rcu);
229
230 static struct nexthop *nexthop_alloc(void)
231 {
232 struct nexthop *nh;
233
234 nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
235 if (nh) {
236 INIT_LIST_HEAD(&nh->fi_list);
237 INIT_LIST_HEAD(&nh->f6i_list);
238 INIT_LIST_HEAD(&nh->grp_list);
239 INIT_LIST_HEAD(&nh->fdb_list);
240 }
241 return nh;
242 }
243
244 static struct nh_group *nexthop_grp_alloc(u16 num_nh)
245 {
246 struct nh_group *nhg;
247
248 nhg = kzalloc(struct_size(nhg, nh_entries, num_nh), GFP_KERNEL);
249 if (nhg)
250 nhg->num_nh = num_nh;
251
252 return nhg;
253 }
254
255 static void nh_base_seq_inc(struct net *net)
256 {
257 while (++net->nexthop.seq == 0)
258 ;
259 }
260
261 /* no reference taken; rcu lock or rtnl must be held */
262 struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
263 {
264 struct rb_node **pp, *parent = NULL, *next;
265
266 pp = &net->nexthop.rb_root.rb_node;
267 while (1) {
268 struct nexthop *nh;
269
270 next = rcu_dereference_raw(*pp);
271 if (!next)
272 break;
273 parent = next;
274
275 nh = rb_entry(parent, struct nexthop, rb_node);
276 if (id < nh->id)
277 pp = &next->rb_left;
278 else if (id > nh->id)
279 pp = &next->rb_right;
280 else
281 return nh;
282 }
283 return NULL;
284 }
285 EXPORT_SYMBOL_GPL(nexthop_find_by_id);
286
287 /* used for auto id allocation; called with rtnl held */
288 static u32 nh_find_unused_id(struct net *net)
289 {
290 u32 id_start = net->nexthop.last_id_allocated;
291
292 while (1) {
293 net->nexthop.last_id_allocated++;
294 if (net->nexthop.last_id_allocated == id_start)
295 break;
296
297 if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
298 return net->nexthop.last_id_allocated;
299 }
300 return 0;
301 }
302
303 static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
304 {
305 struct nexthop_grp *p;
306 size_t len = nhg->num_nh * sizeof(*p);
307 struct nlattr *nla;
308 u16 group_type = 0;
309 int i;
310
311 if (nhg->mpath)
312 group_type = NEXTHOP_GRP_TYPE_MPATH;
313
314 if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
315 goto nla_put_failure;
316
317 nla = nla_reserve(skb, NHA_GROUP, len);
318 if (!nla)
319 goto nla_put_failure;
320
321 p = nla_data(nla);
322 for (i = 0; i < nhg->num_nh; ++i) {
323 p->id = nhg->nh_entries[i].nh->id;
324 p->weight = nhg->nh_entries[i].weight - 1;
325 p += 1;
326 }
327
328 return 0;
329
330 nla_put_failure:
331 return -EMSGSIZE;
332 }
333
334 static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
335 int event, u32 portid, u32 seq, unsigned int nlflags)
336 {
337 struct fib6_nh *fib6_nh;
338 struct fib_nh *fib_nh;
339 struct nlmsghdr *nlh;
340 struct nh_info *nhi;
341 struct nhmsg *nhm;
342
343 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
344 if (!nlh)
345 return -EMSGSIZE;
346
347 nhm = nlmsg_data(nlh);
348 nhm->nh_family = AF_UNSPEC;
349 nhm->nh_flags = nh->nh_flags;
350 nhm->nh_protocol = nh->protocol;
351 nhm->nh_scope = 0;
352 nhm->resvd = 0;
353
354 if (nla_put_u32(skb, NHA_ID, nh->id))
355 goto nla_put_failure;
356
357 if (nh->is_group) {
358 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
359
360 if (nhg->fdb_nh && nla_put_flag(skb, NHA_FDB))
361 goto nla_put_failure;
362 if (nla_put_nh_group(skb, nhg))
363 goto nla_put_failure;
364 goto out;
365 }
366
367 nhi = rtnl_dereference(nh->nh_info);
368 nhm->nh_family = nhi->family;
369 if (nhi->reject_nh) {
370 if (nla_put_flag(skb, NHA_BLACKHOLE))
371 goto nla_put_failure;
372 goto out;
373 } else if (nhi->fdb_nh) {
374 if (nla_put_flag(skb, NHA_FDB))
375 goto nla_put_failure;
376 } else {
377 const struct net_device *dev;
378
379 dev = nhi->fib_nhc.nhc_dev;
380 if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
381 goto nla_put_failure;
382 }
383
384 nhm->nh_scope = nhi->fib_nhc.nhc_scope;
385 switch (nhi->family) {
386 case AF_INET:
387 fib_nh = &nhi->fib_nh;
388 if (fib_nh->fib_nh_gw_family &&
389 nla_put_be32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
390 goto nla_put_failure;
391 break;
392
393 case AF_INET6:
394 fib6_nh = &nhi->fib6_nh;
395 if (fib6_nh->fib_nh_gw_family &&
396 nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
397 goto nla_put_failure;
398 break;
399 }
400
401 if (nhi->fib_nhc.nhc_lwtstate &&
402 lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
403 NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
404 goto nla_put_failure;
405
406 out:
407 nlmsg_end(skb, nlh);
408 return 0;
409
410 nla_put_failure:
411 nlmsg_cancel(skb, nlh);
412 return -EMSGSIZE;
413 }
414
415 static size_t nh_nlmsg_size_grp(struct nexthop *nh)
416 {
417 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
418 size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
419
420 return nla_total_size(sz) +
421 nla_total_size(2); /* NHA_GROUP_TYPE */
422 }
423
424 static size_t nh_nlmsg_size_single(struct nexthop *nh)
425 {
426 struct nh_info *nhi = rtnl_dereference(nh->nh_info);
427 size_t sz;
428
429 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
430 * are mutually exclusive
431 */
432 sz = nla_total_size(4); /* NHA_OIF */
433
434 switch (nhi->family) {
435 case AF_INET:
436 if (nhi->fib_nh.fib_nh_gw_family)
437 sz += nla_total_size(4); /* NHA_GATEWAY */
438 break;
439
440 case AF_INET6:
441 /* NHA_GATEWAY */
442 if (nhi->fib6_nh.fib_nh_gw_family)
443 sz += nla_total_size(sizeof(const struct in6_addr));
444 break;
445 }
446
447 if (nhi->fib_nhc.nhc_lwtstate) {
448 sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
449 sz += nla_total_size(2); /* NHA_ENCAP_TYPE */
450 }
451
452 return sz;
453 }
454
455 static size_t nh_nlmsg_size(struct nexthop *nh)
456 {
457 size_t sz = NLMSG_ALIGN(sizeof(struct nhmsg));
458
459 sz += nla_total_size(4); /* NHA_ID */
460
461 if (nh->is_group)
462 sz += nh_nlmsg_size_grp(nh);
463 else
464 sz += nh_nlmsg_size_single(nh);
465
466 return sz;
467 }
468
469 static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
470 {
471 unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
472 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
473 struct sk_buff *skb;
474 int err = -ENOBUFS;
475
476 skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
477 if (!skb)
478 goto errout;
479
480 err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
481 if (err < 0) {
482 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
483 WARN_ON(err == -EMSGSIZE);
484 kfree_skb(skb);
485 goto errout;
486 }
487
488 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
489 info->nlh, gfp_any());
490 return;
491 errout:
492 if (err < 0)
493 rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
494 }
495
496 static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
497 bool *is_fdb, struct netlink_ext_ack *extack)
498 {
499 if (nh->is_group) {
500 struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
501
502 /* nested multipath (group within a group) is not
503 * supported
504 */
505 if (nhg->mpath) {
506 NL_SET_ERR_MSG(extack,
507 "Multipath group can not be a nexthop within a group");
508 return false;
509 }
510 *is_fdb = nhg->fdb_nh;
511 } else {
512 struct nh_info *nhi = rtnl_dereference(nh->nh_info);
513
514 if (nhi->reject_nh && npaths > 1) {
515 NL_SET_ERR_MSG(extack,
516 "Blackhole nexthop can not be used in a group with more than 1 path");
517 return false;
518 }
519 *is_fdb = nhi->fdb_nh;
520 }
521
522 return true;
523 }
524
525 static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
526 struct netlink_ext_ack *extack)
527 {
528 struct nh_info *nhi;
529
530 nhi = rtnl_dereference(nh->nh_info);
531
532 if (!nhi->fdb_nh) {
533 NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
534 return -EINVAL;
535 }
536
537 if (*nh_family == AF_UNSPEC) {
538 *nh_family = nhi->family;
539 } else if (*nh_family != nhi->family) {
540 NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
541 return -EINVAL;
542 }
543
544 return 0;
545 }
546
547 static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
548 struct netlink_ext_ack *extack)
549 {
550 unsigned int len = nla_len(tb[NHA_GROUP]);
551 u8 nh_family = AF_UNSPEC;
552 struct nexthop_grp *nhg;
553 unsigned int i, j;
554 u8 nhg_fdb = 0;
555
556 if (!len || len & (sizeof(struct nexthop_grp) - 1)) {
557 NL_SET_ERR_MSG(extack,
558 "Invalid length for nexthop group attribute");
559 return -EINVAL;
560 }
561
562 /* convert len to number of nexthop ids */
563 len /= sizeof(*nhg);
564
565 nhg = nla_data(tb[NHA_GROUP]);
566 for (i = 0; i < len; ++i) {
567 if (nhg[i].resvd1 || nhg[i].resvd2) {
568 NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
569 return -EINVAL;
570 }
571 if (nhg[i].weight > 254) {
572 NL_SET_ERR_MSG(extack, "Invalid value for weight");
573 return -EINVAL;
574 }
575 for (j = i + 1; j < len; ++j) {
576 if (nhg[i].id == nhg[j].id) {
577 NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
578 return -EINVAL;
579 }
580 }
581 }
582
583 if (tb[NHA_FDB])
584 nhg_fdb = 1;
585 nhg = nla_data(tb[NHA_GROUP]);
586 for (i = 0; i < len; ++i) {
587 struct nexthop *nh;
588 bool is_fdb_nh;
589
590 nh = nexthop_find_by_id(net, nhg[i].id);
591 if (!nh) {
592 NL_SET_ERR_MSG(extack, "Invalid nexthop id");
593 return -EINVAL;
594 }
595 if (!valid_group_nh(nh, len, &is_fdb_nh, extack))
596 return -EINVAL;
597
598 if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
599 return -EINVAL;
600
601 if (!nhg_fdb && is_fdb_nh) {
602 NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
603 return -EINVAL;
604 }
605 }
606 for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) {
607 if (!tb[i])
608 continue;
609 if (tb[NHA_FDB])
610 continue;
611 NL_SET_ERR_MSG(extack,
612 "No other attributes can be set in nexthop groups");
613 return -EINVAL;
614 }
615
616 return 0;
617 }
618
619 static bool ipv6_good_nh(const struct fib6_nh *nh)
620 {
621 int state = NUD_REACHABLE;
622 struct neighbour *n;
623
624 rcu_read_lock_bh();
625
626 n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
627 if (n)
628 state = n->nud_state;
629
630 rcu_read_unlock_bh();
631
632 return !!(state & NUD_VALID);
633 }
634
635 static bool ipv4_good_nh(const struct fib_nh *nh)
636 {
637 int state = NUD_REACHABLE;
638 struct neighbour *n;
639
640 rcu_read_lock_bh();
641
642 n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
643 (__force u32)nh->fib_nh_gw4);
644 if (n)
645 state = n->nud_state;
646
647 rcu_read_unlock_bh();
648
649 return !!(state & NUD_VALID);
650 }
651
652 struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
653 {
654 struct nexthop *rc = NULL;
655 struct nh_group *nhg;
656 int i;
657
658 if (!nh->is_group)
659 return nh;
660
661 nhg = rcu_dereference(nh->nh_grp);
662 for (i = 0; i < nhg->num_nh; ++i) {
663 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
664 struct nh_info *nhi;
665
666 if (hash > atomic_read(&nhge->upper_bound))
667 continue;
668
669 nhi = rcu_dereference(nhge->nh->nh_info);
670 if (nhi->fdb_nh)
671 return nhge->nh;
672
673 /* nexthops always check if it is good and does
674 * not rely on a sysctl for this behavior
675 */
676 switch (nhi->family) {
677 case AF_INET:
678 if (ipv4_good_nh(&nhi->fib_nh))
679 return nhge->nh;
680 break;
681 case AF_INET6:
682 if (ipv6_good_nh(&nhi->fib6_nh))
683 return nhge->nh;
684 break;
685 }
686
687 if (!rc)
688 rc = nhge->nh;
689 }
690
691 return rc;
692 }
693 EXPORT_SYMBOL_GPL(nexthop_select_path);
694
695 int nexthop_for_each_fib6_nh(struct nexthop *nh,
696 int (*cb)(struct fib6_nh *nh, void *arg),
697 void *arg)
698 {
699 struct nh_info *nhi;
700 int err;
701
702 if (nh->is_group) {
703 struct nh_group *nhg;
704 int i;
705
706 nhg = rcu_dereference_rtnl(nh->nh_grp);
707 for (i = 0; i < nhg->num_nh; i++) {
708 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
709
710 nhi = rcu_dereference_rtnl(nhge->nh->nh_info);
711 err = cb(&nhi->fib6_nh, arg);
712 if (err)
713 return err;
714 }
715 } else {
716 nhi = rcu_dereference_rtnl(nh->nh_info);
717 err = cb(&nhi->fib6_nh, arg);
718 if (err)
719 return err;
720 }
721
722 return 0;
723 }
724 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh);
725
726 static int check_src_addr(const struct in6_addr *saddr,
727 struct netlink_ext_ack *extack)
728 {
729 if (!ipv6_addr_any(saddr)) {
730 NL_SET_ERR_MSG(extack, "IPv6 routes using source address can not use nexthop objects");
731 return -EINVAL;
732 }
733 return 0;
734 }
735
736 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
737 struct netlink_ext_ack *extack)
738 {
739 struct nh_info *nhi;
740 bool is_fdb_nh;
741
742 /* fib6_src is unique to a fib6_info and limits the ability to cache
743 * routes in fib6_nh within a nexthop that is potentially shared
744 * across multiple fib entries. If the config wants to use source
745 * routing it can not use nexthop objects. mlxsw also does not allow
746 * fib6_src on routes.
747 */
748 if (cfg && check_src_addr(&cfg->fc_src, extack) < 0)
749 return -EINVAL;
750
751 if (nh->is_group) {
752 struct nh_group *nhg;
753
754 nhg = rtnl_dereference(nh->nh_grp);
755 if (nhg->has_v4)
756 goto no_v4_nh;
757 is_fdb_nh = nhg->fdb_nh;
758 } else {
759 nhi = rtnl_dereference(nh->nh_info);
760 if (nhi->family == AF_INET)
761 goto no_v4_nh;
762 is_fdb_nh = nhi->fdb_nh;
763 }
764
765 if (is_fdb_nh) {
766 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
767 return -EINVAL;
768 }
769
770 return 0;
771 no_v4_nh:
772 NL_SET_ERR_MSG(extack, "IPv6 routes can not use an IPv4 nexthop");
773 return -EINVAL;
774 }
775 EXPORT_SYMBOL_GPL(fib6_check_nexthop);
776
777 /* if existing nexthop has ipv6 routes linked to it, need
778 * to verify this new spec works with ipv6
779 */
780 static int fib6_check_nh_list(struct nexthop *old, struct nexthop *new,
781 struct netlink_ext_ack *extack)
782 {
783 struct fib6_info *f6i;
784
785 if (list_empty(&old->f6i_list))
786 return 0;
787
788 list_for_each_entry(f6i, &old->f6i_list, nh_list) {
789 if (check_src_addr(&f6i->fib6_src.addr, extack) < 0)
790 return -EINVAL;
791 }
792
793 return fib6_check_nexthop(new, NULL, extack);
794 }
795
796 static int nexthop_check_scope(struct nh_info *nhi, u8 scope,
797 struct netlink_ext_ack *extack)
798 {
799 if (scope == RT_SCOPE_HOST && nhi->fib_nhc.nhc_gw_family) {
800 NL_SET_ERR_MSG(extack,
801 "Route with host scope can not have a gateway");
802 return -EINVAL;
803 }
804
805 if (nhi->fib_nhc.nhc_flags & RTNH_F_ONLINK && scope >= RT_SCOPE_LINK) {
806 NL_SET_ERR_MSG(extack, "Scope mismatch with nexthop");
807 return -EINVAL;
808 }
809
810 return 0;
811 }
812
813 /* Invoked by fib add code to verify nexthop by id is ok with
814 * config for prefix; parts of fib_check_nh not done when nexthop
815 * object is used.
816 */
817 int fib_check_nexthop(struct nexthop *nh, u8 scope,
818 struct netlink_ext_ack *extack)
819 {
820 struct nh_info *nhi;
821 int err = 0;
822
823 if (nh->is_group) {
824 struct nh_group *nhg;
825
826 nhg = rtnl_dereference(nh->nh_grp);
827 if (nhg->fdb_nh) {
828 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
829 err = -EINVAL;
830 goto out;
831 }
832
833 if (scope == RT_SCOPE_HOST) {
834 NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops");
835 err = -EINVAL;
836 goto out;
837 }
838
839 /* all nexthops in a group have the same scope */
840 nhi = rtnl_dereference(nhg->nh_entries[0].nh->nh_info);
841 err = nexthop_check_scope(nhi, scope, extack);
842 } else {
843 nhi = rtnl_dereference(nh->nh_info);
844 if (nhi->fdb_nh) {
845 NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
846 err = -EINVAL;
847 goto out;
848 }
849 err = nexthop_check_scope(nhi, scope, extack);
850 }
851
852 out:
853 return err;
854 }
855
856 static int fib_check_nh_list(struct nexthop *old, struct nexthop *new,
857 struct netlink_ext_ack *extack)
858 {
859 struct fib_info *fi;
860
861 list_for_each_entry(fi, &old->fi_list, nh_list) {
862 int err;
863
864 err = fib_check_nexthop(new, fi->fib_scope, extack);
865 if (err)
866 return err;
867 }
868 return 0;
869 }
870
871 static void nh_group_rebalance(struct nh_group *nhg)
872 {
873 int total = 0;
874 int w = 0;
875 int i;
876
877 for (i = 0; i < nhg->num_nh; ++i)
878 total += nhg->nh_entries[i].weight;
879
880 for (i = 0; i < nhg->num_nh; ++i) {
881 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
882 int upper_bound;
883
884 w += nhge->weight;
885 upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
886 atomic_set(&nhge->upper_bound, upper_bound);
887 }
888 }
889
890 static void remove_nh_grp_entry(struct net *net, struct nh_grp_entry *nhge,
891 struct nl_info *nlinfo)
892 {
893 struct nh_grp_entry *nhges, *new_nhges;
894 struct nexthop *nhp = nhge->nh_parent;
895 struct nexthop *nh = nhge->nh;
896 struct nh_group *nhg, *newg;
897 int i, j;
898
899 WARN_ON(!nh);
900
901 nhg = rtnl_dereference(nhp->nh_grp);
902 newg = nhg->spare;
903
904 /* last entry, keep it visible and remove the parent */
905 if (nhg->num_nh == 1) {
906 remove_nexthop(net, nhp, nlinfo);
907 return;
908 }
909
910 newg->has_v4 = false;
911 newg->mpath = nhg->mpath;
912 newg->fdb_nh = nhg->fdb_nh;
913 newg->num_nh = nhg->num_nh;
914
915 /* copy old entries to new except the one getting removed */
916 nhges = nhg->nh_entries;
917 new_nhges = newg->nh_entries;
918 for (i = 0, j = 0; i < nhg->num_nh; ++i) {
919 struct nh_info *nhi;
920
921 /* current nexthop getting removed */
922 if (nhg->nh_entries[i].nh == nh) {
923 newg->num_nh--;
924 continue;
925 }
926
927 nhi = rtnl_dereference(nhges[i].nh->nh_info);
928 if (nhi->family == AF_INET)
929 newg->has_v4 = true;
930
931 list_del(&nhges[i].nh_list);
932 new_nhges[j].nh_parent = nhges[i].nh_parent;
933 new_nhges[j].nh = nhges[i].nh;
934 new_nhges[j].weight = nhges[i].weight;
935 list_add(&new_nhges[j].nh_list, &new_nhges[j].nh->grp_list);
936 j++;
937 }
938
939 nh_group_rebalance(newg);
940 rcu_assign_pointer(nhp->nh_grp, newg);
941
942 list_del(&nhge->nh_list);
943 nexthop_put(nhge->nh);
944
945 if (nlinfo)
946 nexthop_notify(RTM_NEWNEXTHOP, nhp, nlinfo);
947 }
948
949 static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
950 struct nl_info *nlinfo)
951 {
952 struct nh_grp_entry *nhge, *tmp;
953
954 list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list)
955 remove_nh_grp_entry(net, nhge, nlinfo);
956
957 /* make sure all see the newly published array before releasing rtnl */
958 synchronize_net();
959 }
960
961 static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
962 {
963 struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
964 int i, num_nh = nhg->num_nh;
965
966 for (i = 0; i < num_nh; ++i) {
967 struct nh_grp_entry *nhge = &nhg->nh_entries[i];
968
969 if (WARN_ON(!nhge->nh))
970 continue;
971
972 list_del_init(&nhge->nh_list);
973 }
974 }
975
976 /* not called for nexthop replace */
977 static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
978 {
979 struct fib6_info *f6i, *tmp;
980 bool do_flush = false;
981 struct fib_info *fi;
982
983 list_for_each_entry(fi, &nh->fi_list, nh_list) {
984 fi->fib_flags |= RTNH_F_DEAD;
985 do_flush = true;
986 }
987 if (do_flush)
988 fib_flush(net);
989
990 /* ip6_del_rt removes the entry from this list hence the _safe */
991 list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
992 /* __ip6_del_rt does a release, so do a hold here */
993 fib6_info_hold(f6i);
994 ipv6_stub->ip6_del_rt(net, f6i,
995 !net->ipv4.sysctl_nexthop_compat_mode);
996 }
997 }
998
999 static void __remove_nexthop(struct net *net, struct nexthop *nh,
1000 struct nl_info *nlinfo)
1001 {
1002 __remove_nexthop_fib(net, nh);
1003
1004 if (nh->is_group) {
1005 remove_nexthop_group(nh, nlinfo);
1006 } else {
1007 struct nh_info *nhi;
1008
1009 nhi = rtnl_dereference(nh->nh_info);
1010 if (nhi->fib_nhc.nhc_dev)
1011 hlist_del(&nhi->dev_hash);
1012
1013 remove_nexthop_from_groups(net, nh, nlinfo);
1014 }
1015 }
1016
1017 static void remove_nexthop(struct net *net, struct nexthop *nh,
1018 struct nl_info *nlinfo)
1019 {
1020 call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh, NULL);
1021
1022 /* remove from the tree */
1023 rb_erase(&nh->rb_node, &net->nexthop.rb_root);
1024
1025 if (nlinfo)
1026 nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
1027
1028 __remove_nexthop(net, nh, nlinfo);
1029 nh_base_seq_inc(net);
1030
1031 nexthop_put(nh);
1032 }
1033
1034 /* if any FIB entries reference this nexthop, any dst entries
1035 * need to be regenerated
1036 */
1037 static void nh_rt_cache_flush(struct net *net, struct nexthop *nh)
1038 {
1039 struct fib6_info *f6i;
1040
1041 if (!list_empty(&nh->fi_list))
1042 rt_cache_flush(net);
1043
1044 list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1045 ipv6_stub->fib6_update_sernum(net, f6i);
1046 }
1047
1048 static int replace_nexthop_grp(struct net *net, struct nexthop *old,
1049 struct nexthop *new,
1050 struct netlink_ext_ack *extack)
1051 {
1052 struct nh_group *oldg, *newg;
1053 int i, err;
1054
1055 if (!new->is_group) {
1056 NL_SET_ERR_MSG(extack, "Can not replace a nexthop group with a nexthop.");
1057 return -EINVAL;
1058 }
1059
1060 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1061 if (err)
1062 return err;
1063
1064 oldg = rtnl_dereference(old->nh_grp);
1065 newg = rtnl_dereference(new->nh_grp);
1066
1067 /* update parents - used by nexthop code for cleanup */
1068 for (i = 0; i < newg->num_nh; i++)
1069 newg->nh_entries[i].nh_parent = old;
1070
1071 rcu_assign_pointer(old->nh_grp, newg);
1072
1073 for (i = 0; i < oldg->num_nh; i++)
1074 oldg->nh_entries[i].nh_parent = new;
1075
1076 rcu_assign_pointer(new->nh_grp, oldg);
1077
1078 return 0;
1079 }
1080
1081 static void nh_group_v4_update(struct nh_group *nhg)
1082 {
1083 struct nh_grp_entry *nhges;
1084 bool has_v4 = false;
1085 int i;
1086
1087 nhges = nhg->nh_entries;
1088 for (i = 0; i < nhg->num_nh; i++) {
1089 struct nh_info *nhi;
1090
1091 nhi = rtnl_dereference(nhges[i].nh->nh_info);
1092 if (nhi->family == AF_INET)
1093 has_v4 = true;
1094 }
1095 nhg->has_v4 = has_v4;
1096 }
1097
1098 static int replace_nexthop_single(struct net *net, struct nexthop *old,
1099 struct nexthop *new,
1100 struct netlink_ext_ack *extack)
1101 {
1102 struct nh_info *oldi, *newi;
1103 int err;
1104
1105 if (new->is_group) {
1106 NL_SET_ERR_MSG(extack, "Can not replace a nexthop with a nexthop group.");
1107 return -EINVAL;
1108 }
1109
1110 err = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new, extack);
1111 if (err)
1112 return err;
1113
1114 /* Hardware flags were set on 'old' as 'new' is not in the red-black
1115 * tree. Therefore, inherit the flags from 'old' to 'new'.
1116 */
1117 new->nh_flags |= old->nh_flags & (RTNH_F_OFFLOAD | RTNH_F_TRAP);
1118
1119 oldi = rtnl_dereference(old->nh_info);
1120 newi = rtnl_dereference(new->nh_info);
1121
1122 newi->nh_parent = old;
1123 oldi->nh_parent = new;
1124
1125 old->protocol = new->protocol;
1126 old->nh_flags = new->nh_flags;
1127
1128 rcu_assign_pointer(old->nh_info, newi);
1129 rcu_assign_pointer(new->nh_info, oldi);
1130
1131 /* When replacing an IPv4 nexthop with an IPv6 nexthop, potentially
1132 * update IPv4 indication in all the groups using the nexthop.
1133 */
1134 if (oldi->family == AF_INET && newi->family == AF_INET6) {
1135 struct nh_grp_entry *nhge;
1136
1137 list_for_each_entry(nhge, &old->grp_list, nh_list) {
1138 struct nexthop *nhp = nhge->nh_parent;
1139 struct nh_group *nhg;
1140
1141 nhg = rtnl_dereference(nhp->nh_grp);
1142 nh_group_v4_update(nhg);
1143 }
1144 }
1145
1146 return 0;
1147 }
1148
1149 static void __nexthop_replace_notify(struct net *net, struct nexthop *nh,
1150 struct nl_info *info)
1151 {
1152 struct fib6_info *f6i;
1153
1154 if (!list_empty(&nh->fi_list)) {
1155 struct fib_info *fi;
1156
1157 /* expectation is a few fib_info per nexthop and then
1158 * a lot of routes per fib_info. So mark the fib_info
1159 * and then walk the fib tables once
1160 */
1161 list_for_each_entry(fi, &nh->fi_list, nh_list)
1162 fi->nh_updated = true;
1163
1164 fib_info_notify_update(net, info);
1165
1166 list_for_each_entry(fi, &nh->fi_list, nh_list)
1167 fi->nh_updated = false;
1168 }
1169
1170 list_for_each_entry(f6i, &nh->f6i_list, nh_list)
1171 ipv6_stub->fib6_rt_update(net, f6i, info);
1172 }
1173
1174 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
1175 * linked to this nexthop and for all groups that the nexthop
1176 * is a member of
1177 */
1178 static void nexthop_replace_notify(struct net *net, struct nexthop *nh,
1179 struct nl_info *info)
1180 {
1181 struct nh_grp_entry *nhge;
1182
1183 __nexthop_replace_notify(net, nh, info);
1184
1185 list_for_each_entry(nhge, &nh->grp_list, nh_list)
1186 __nexthop_replace_notify(net, nhge->nh_parent, info);
1187 }
1188
1189 static int replace_nexthop(struct net *net, struct nexthop *old,
1190 struct nexthop *new, struct netlink_ext_ack *extack)
1191 {
1192 bool new_is_reject = false;
1193 struct nh_grp_entry *nhge;
1194 int err;
1195
1196 /* check that existing FIB entries are ok with the
1197 * new nexthop definition
1198 */
1199 err = fib_check_nh_list(old, new, extack);
1200 if (err)
1201 return err;
1202
1203 err = fib6_check_nh_list(old, new, extack);
1204 if (err)
1205 return err;
1206
1207 if (!new->is_group) {
1208 struct nh_info *nhi = rtnl_dereference(new->nh_info);
1209
1210 new_is_reject = nhi->reject_nh;
1211 }
1212
1213 list_for_each_entry(nhge, &old->grp_list, nh_list) {
1214 /* if new nexthop is a blackhole, any groups using this
1215 * nexthop cannot have more than 1 path
1216 */
1217 if (new_is_reject &&
1218 nexthop_num_path(nhge->nh_parent) > 1) {
1219 NL_SET_ERR_MSG(extack, "Blackhole nexthop can not be a member of a group with more than one path");
1220 return -EINVAL;
1221 }
1222
1223 err = fib_check_nh_list(nhge->nh_parent, new, extack);
1224 if (err)
1225 return err;
1226
1227 err = fib6_check_nh_list(nhge->nh_parent, new, extack);
1228 if (err)
1229 return err;
1230 }
1231
1232 if (old->is_group)
1233 err = replace_nexthop_grp(net, old, new, extack);
1234 else
1235 err = replace_nexthop_single(net, old, new, extack);
1236
1237 if (!err) {
1238 nh_rt_cache_flush(net, old);
1239
1240 __remove_nexthop(net, new, NULL);
1241 nexthop_put(new);
1242 }
1243
1244 return err;
1245 }
1246
1247 /* called with rtnl_lock held */
1248 static int insert_nexthop(struct net *net, struct nexthop *new_nh,
1249 struct nh_config *cfg, struct netlink_ext_ack *extack)
1250 {
1251 struct rb_node **pp, *parent = NULL, *next;
1252 struct rb_root *root = &net->nexthop.rb_root;
1253 bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
1254 bool create = !!(cfg->nlflags & NLM_F_CREATE);
1255 u32 new_id = new_nh->id;
1256 int replace_notify = 0;
1257 int rc = -EEXIST;
1258
1259 pp = &root->rb_node;
1260 while (1) {
1261 struct nexthop *nh;
1262
1263 next = *pp;
1264 if (!next)
1265 break;
1266
1267 parent = next;
1268
1269 nh = rb_entry(parent, struct nexthop, rb_node);
1270 if (new_id < nh->id) {
1271 pp = &next->rb_left;
1272 } else if (new_id > nh->id) {
1273 pp = &next->rb_right;
1274 } else if (replace) {
1275 rc = replace_nexthop(net, nh, new_nh, extack);
1276 if (!rc) {
1277 new_nh = nh; /* send notification with old nh */
1278 replace_notify = 1;
1279 }
1280 goto out;
1281 } else {
1282 /* id already exists and not a replace */
1283 goto out;
1284 }
1285 }
1286
1287 if (replace && !create) {
1288 NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
1289 rc = -ENOENT;
1290 goto out;
1291 }
1292
1293 rb_link_node_rcu(&new_nh->rb_node, parent, pp);
1294 rb_insert_color(&new_nh->rb_node, root);
1295
1296 rc = call_nexthop_notifiers(net, NEXTHOP_EVENT_REPLACE, new_nh, extack);
1297 if (rc)
1298 rb_erase(&new_nh->rb_node, &net->nexthop.rb_root);
1299
1300 out:
1301 if (!rc) {
1302 nh_base_seq_inc(net);
1303 nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
1304 if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
1305 nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
1306 }
1307
1308 return rc;
1309 }
1310
1311 /* rtnl */
1312 /* remove all nexthops tied to a device being deleted */
1313 static void nexthop_flush_dev(struct net_device *dev)
1314 {
1315 unsigned int hash = nh_dev_hashfn(dev->ifindex);
1316 struct net *net = dev_net(dev);
1317 struct hlist_head *head = &net->nexthop.devhash[hash];
1318 struct hlist_node *n;
1319 struct nh_info *nhi;
1320
1321 hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
1322 if (nhi->fib_nhc.nhc_dev != dev)
1323 continue;
1324
1325 remove_nexthop(net, nhi->nh_parent, NULL);
1326 }
1327 }
1328
1329 /* rtnl; called when net namespace is deleted */
1330 static void flush_all_nexthops(struct net *net)
1331 {
1332 struct rb_root *root = &net->nexthop.rb_root;
1333 struct rb_node *node;
1334 struct nexthop *nh;
1335
1336 while ((node = rb_first(root))) {
1337 nh = rb_entry(node, struct nexthop, rb_node);
1338 remove_nexthop(net, nh, NULL);
1339 cond_resched();
1340 }
1341 }
1342
1343 static struct nexthop *nexthop_create_group(struct net *net,
1344 struct nh_config *cfg)
1345 {
1346 struct nlattr *grps_attr = cfg->nh_grp;
1347 struct nexthop_grp *entry = nla_data(grps_attr);
1348 u16 num_nh = nla_len(grps_attr) / sizeof(*entry);
1349 struct nh_group *nhg;
1350 struct nexthop *nh;
1351 int i;
1352
1353 if (WARN_ON(!num_nh))
1354 return ERR_PTR(-EINVAL);
1355
1356 nh = nexthop_alloc();
1357 if (!nh)
1358 return ERR_PTR(-ENOMEM);
1359
1360 nh->is_group = 1;
1361
1362 nhg = nexthop_grp_alloc(num_nh);
1363 if (!nhg) {
1364 kfree(nh);
1365 return ERR_PTR(-ENOMEM);
1366 }
1367
1368 /* spare group used for removals */
1369 nhg->spare = nexthop_grp_alloc(num_nh);
1370 if (!nhg->spare) {
1371 kfree(nhg);
1372 kfree(nh);
1373 return ERR_PTR(-ENOMEM);
1374 }
1375 nhg->spare->spare = nhg;
1376
1377 for (i = 0; i < nhg->num_nh; ++i) {
1378 struct nexthop *nhe;
1379 struct nh_info *nhi;
1380
1381 nhe = nexthop_find_by_id(net, entry[i].id);
1382 if (!nexthop_get(nhe))
1383 goto out_no_nh;
1384
1385 nhi = rtnl_dereference(nhe->nh_info);
1386 if (nhi->family == AF_INET)
1387 nhg->has_v4 = true;
1388
1389 nhg->nh_entries[i].nh = nhe;
1390 nhg->nh_entries[i].weight = entry[i].weight + 1;
1391 list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
1392 nhg->nh_entries[i].nh_parent = nh;
1393 }
1394
1395 if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
1396 nhg->mpath = 1;
1397 nh_group_rebalance(nhg);
1398 }
1399
1400 if (cfg->nh_fdb)
1401 nhg->fdb_nh = 1;
1402
1403 rcu_assign_pointer(nh->nh_grp, nhg);
1404
1405 return nh;
1406
1407 out_no_nh:
1408 for (; i >= 0; --i)
1409 nexthop_put(nhg->nh_entries[i].nh);
1410
1411 kfree(nhg->spare);
1412 kfree(nhg);
1413 kfree(nh);
1414
1415 return ERR_PTR(-ENOENT);
1416 }
1417
1418 static int nh_create_ipv4(struct net *net, struct nexthop *nh,
1419 struct nh_info *nhi, struct nh_config *cfg,
1420 struct netlink_ext_ack *extack)
1421 {
1422 struct fib_nh *fib_nh = &nhi->fib_nh;
1423 struct fib_config fib_cfg = {
1424 .fc_oif = cfg->nh_ifindex,
1425 .fc_gw4 = cfg->gw.ipv4,
1426 .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
1427 .fc_flags = cfg->nh_flags,
1428 .fc_encap = cfg->nh_encap,
1429 .fc_encap_type = cfg->nh_encap_type,
1430 };
1431 u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
1432 int err;
1433
1434 err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
1435 if (err) {
1436 fib_nh_release(net, fib_nh);
1437 goto out;
1438 }
1439
1440 if (nhi->fdb_nh)
1441 goto out;
1442
1443 /* sets nh_dev if successful */
1444 err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
1445 if (!err) {
1446 nh->nh_flags = fib_nh->fib_nh_flags;
1447 fib_info_update_nhc_saddr(net, &fib_nh->nh_common,
1448 fib_nh->fib_nh_scope);
1449 } else {
1450 fib_nh_release(net, fib_nh);
1451 }
1452 out:
1453 return err;
1454 }
1455
1456 static int nh_create_ipv6(struct net *net, struct nexthop *nh,
1457 struct nh_info *nhi, struct nh_config *cfg,
1458 struct netlink_ext_ack *extack)
1459 {
1460 struct fib6_nh *fib6_nh = &nhi->fib6_nh;
1461 struct fib6_config fib6_cfg = {
1462 .fc_table = l3mdev_fib_table(cfg->dev),
1463 .fc_ifindex = cfg->nh_ifindex,
1464 .fc_gateway = cfg->gw.ipv6,
1465 .fc_flags = cfg->nh_flags,
1466 .fc_encap = cfg->nh_encap,
1467 .fc_encap_type = cfg->nh_encap_type,
1468 .fc_is_fdb = cfg->nh_fdb,
1469 };
1470 int err;
1471
1472 if (!ipv6_addr_any(&cfg->gw.ipv6))
1473 fib6_cfg.fc_flags |= RTF_GATEWAY;
1474
1475 /* sets nh_dev if successful */
1476 err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
1477 extack);
1478 if (err)
1479 ipv6_stub->fib6_nh_release(fib6_nh);
1480 else
1481 nh->nh_flags = fib6_nh->fib_nh_flags;
1482
1483 return err;
1484 }
1485
1486 static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
1487 struct netlink_ext_ack *extack)
1488 {
1489 struct nh_info *nhi;
1490 struct nexthop *nh;
1491 int err = 0;
1492
1493 nh = nexthop_alloc();
1494 if (!nh)
1495 return ERR_PTR(-ENOMEM);
1496
1497 nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
1498 if (!nhi) {
1499 kfree(nh);
1500 return ERR_PTR(-ENOMEM);
1501 }
1502
1503 nh->nh_flags = cfg->nh_flags;
1504 nh->net = net;
1505
1506 nhi->nh_parent = nh;
1507 nhi->family = cfg->nh_family;
1508 nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
1509
1510 if (cfg->nh_fdb)
1511 nhi->fdb_nh = 1;
1512
1513 if (cfg->nh_blackhole) {
1514 nhi->reject_nh = 1;
1515 cfg->nh_ifindex = net->loopback_dev->ifindex;
1516 }
1517
1518 switch (cfg->nh_family) {
1519 case AF_INET:
1520 err = nh_create_ipv4(net, nh, nhi, cfg, extack);
1521 break;
1522 case AF_INET6:
1523 err = nh_create_ipv6(net, nh, nhi, cfg, extack);
1524 break;
1525 }
1526
1527 if (err) {
1528 kfree(nhi);
1529 kfree(nh);
1530 return ERR_PTR(err);
1531 }
1532
1533 /* add the entry to the device based hash */
1534 if (!nhi->fdb_nh)
1535 nexthop_devhash_add(net, nhi);
1536
1537 rcu_assign_pointer(nh->nh_info, nhi);
1538
1539 return nh;
1540 }
1541
1542 /* called with rtnl lock held */
1543 static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
1544 struct netlink_ext_ack *extack)
1545 {
1546 struct nexthop *nh;
1547 int err;
1548
1549 if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
1550 NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
1551 return ERR_PTR(-EINVAL);
1552 }
1553
1554 if (!cfg->nh_id) {
1555 cfg->nh_id = nh_find_unused_id(net);
1556 if (!cfg->nh_id) {
1557 NL_SET_ERR_MSG(extack, "No unused id");
1558 return ERR_PTR(-EINVAL);
1559 }
1560 }
1561
1562 if (cfg->nh_grp)
1563 nh = nexthop_create_group(net, cfg);
1564 else
1565 nh = nexthop_create(net, cfg, extack);
1566
1567 if (IS_ERR(nh))
1568 return nh;
1569
1570 refcount_set(&nh->refcnt, 1);
1571 nh->id = cfg->nh_id;
1572 nh->protocol = cfg->nh_protocol;
1573 nh->net = net;
1574
1575 err = insert_nexthop(net, nh, cfg, extack);
1576 if (err) {
1577 __remove_nexthop(net, nh, NULL);
1578 nexthop_put(nh);
1579 nh = ERR_PTR(err);
1580 }
1581
1582 return nh;
1583 }
1584
1585 static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
1586 struct nlmsghdr *nlh, struct nh_config *cfg,
1587 struct netlink_ext_ack *extack)
1588 {
1589 struct nhmsg *nhm = nlmsg_data(nlh);
1590 struct nlattr *tb[NHA_MAX + 1];
1591 int err;
1592
1593 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
1594 extack);
1595 if (err < 0)
1596 return err;
1597
1598 err = -EINVAL;
1599 if (nhm->resvd || nhm->nh_scope) {
1600 NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
1601 goto out;
1602 }
1603 if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
1604 NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
1605 goto out;
1606 }
1607
1608 switch (nhm->nh_family) {
1609 case AF_INET:
1610 case AF_INET6:
1611 break;
1612 case AF_UNSPEC:
1613 if (tb[NHA_GROUP])
1614 break;
1615 fallthrough;
1616 default:
1617 NL_SET_ERR_MSG(extack, "Invalid address family");
1618 goto out;
1619 }
1620
1621 if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
1622 NL_SET_ERR_MSG(extack, "Invalid attributes in request");
1623 goto out;
1624 }
1625
1626 memset(cfg, 0, sizeof(*cfg));
1627 cfg->nlflags = nlh->nlmsg_flags;
1628 cfg->nlinfo.portid = NETLINK_CB(skb).portid;
1629 cfg->nlinfo.nlh = nlh;
1630 cfg->nlinfo.nl_net = net;
1631
1632 cfg->nh_family = nhm->nh_family;
1633 cfg->nh_protocol = nhm->nh_protocol;
1634 cfg->nh_flags = nhm->nh_flags;
1635
1636 if (tb[NHA_ID])
1637 cfg->nh_id = nla_get_u32(tb[NHA_ID]);
1638
1639 if (tb[NHA_FDB]) {
1640 if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
1641 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
1642 NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
1643 goto out;
1644 }
1645 if (nhm->nh_flags) {
1646 NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
1647 goto out;
1648 }
1649 cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
1650 }
1651
1652 if (tb[NHA_GROUP]) {
1653 if (nhm->nh_family != AF_UNSPEC) {
1654 NL_SET_ERR_MSG(extack, "Invalid family for group");
1655 goto out;
1656 }
1657 cfg->nh_grp = tb[NHA_GROUP];
1658
1659 cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
1660 if (tb[NHA_GROUP_TYPE])
1661 cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
1662
1663 if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
1664 NL_SET_ERR_MSG(extack, "Invalid group type");
1665 goto out;
1666 }
1667 err = nh_check_attr_group(net, tb, extack);
1668
1669 /* no other attributes should be set */
1670 goto out;
1671 }
1672
1673 if (tb[NHA_BLACKHOLE]) {
1674 if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
1675 tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
1676 NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
1677 goto out;
1678 }
1679
1680 cfg->nh_blackhole = 1;
1681 err = 0;
1682 goto out;
1683 }
1684
1685 if (!cfg->nh_fdb && !tb[NHA_OIF]) {
1686 NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
1687 goto out;
1688 }
1689
1690 if (!cfg->nh_fdb && tb[NHA_OIF]) {
1691 cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
1692 if (cfg->nh_ifindex)
1693 cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
1694
1695 if (!cfg->dev) {
1696 NL_SET_ERR_MSG(extack, "Invalid device index");
1697 goto out;
1698 } else if (!(cfg->dev->flags & IFF_UP)) {
1699 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
1700 err = -ENETDOWN;
1701 goto out;
1702 } else if (!netif_carrier_ok(cfg->dev)) {
1703 NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
1704 err = -ENETDOWN;
1705 goto out;
1706 }
1707 }
1708
1709 err = -EINVAL;
1710 if (tb[NHA_GATEWAY]) {
1711 struct nlattr *gwa = tb[NHA_GATEWAY];
1712
1713 switch (cfg->nh_family) {
1714 case AF_INET:
1715 if (nla_len(gwa) != sizeof(u32)) {
1716 NL_SET_ERR_MSG(extack, "Invalid gateway");
1717 goto out;
1718 }
1719 cfg->gw.ipv4 = nla_get_be32(gwa);
1720 break;
1721 case AF_INET6:
1722 if (nla_len(gwa) != sizeof(struct in6_addr)) {
1723 NL_SET_ERR_MSG(extack, "Invalid gateway");
1724 goto out;
1725 }
1726 cfg->gw.ipv6 = nla_get_in6_addr(gwa);
1727 break;
1728 default:
1729 NL_SET_ERR_MSG(extack,
1730 "Unknown address family for gateway");
1731 goto out;
1732 }
1733 } else {
1734 /* device only nexthop (no gateway) */
1735 if (cfg->nh_flags & RTNH_F_ONLINK) {
1736 NL_SET_ERR_MSG(extack,
1737 "ONLINK flag can not be set for nexthop without a gateway");
1738 goto out;
1739 }
1740 }
1741
1742 if (tb[NHA_ENCAP]) {
1743 cfg->nh_encap = tb[NHA_ENCAP];
1744
1745 if (!tb[NHA_ENCAP_TYPE]) {
1746 NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
1747 goto out;
1748 }
1749
1750 cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
1751 err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
1752 if (err < 0)
1753 goto out;
1754
1755 } else if (tb[NHA_ENCAP_TYPE]) {
1756 NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
1757 goto out;
1758 }
1759
1760
1761 err = 0;
1762 out:
1763 return err;
1764 }
1765
1766 /* rtnl */
1767 static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1768 struct netlink_ext_ack *extack)
1769 {
1770 struct net *net = sock_net(skb->sk);
1771 struct nh_config cfg;
1772 struct nexthop *nh;
1773 int err;
1774
1775 err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
1776 if (!err) {
1777 nh = nexthop_add(net, &cfg, extack);
1778 if (IS_ERR(nh))
1779 err = PTR_ERR(nh);
1780 }
1781
1782 return err;
1783 }
1784
1785 static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
1786 struct netlink_ext_ack *extack)
1787 {
1788 struct nhmsg *nhm = nlmsg_data(nlh);
1789 struct nlattr *tb[NHA_MAX + 1];
1790 int err, i;
1791
1792 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
1793 extack);
1794 if (err < 0)
1795 return err;
1796
1797 err = -EINVAL;
1798 for (i = 0; i < __NHA_MAX; ++i) {
1799 if (!tb[i])
1800 continue;
1801
1802 switch (i) {
1803 case NHA_ID:
1804 break;
1805 default:
1806 NL_SET_ERR_MSG_ATTR(extack, tb[i],
1807 "Unexpected attribute in request");
1808 goto out;
1809 }
1810 }
1811 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
1812 NL_SET_ERR_MSG(extack, "Invalid values in header");
1813 goto out;
1814 }
1815
1816 if (!tb[NHA_ID]) {
1817 NL_SET_ERR_MSG(extack, "Nexthop id is missing");
1818 goto out;
1819 }
1820
1821 *id = nla_get_u32(tb[NHA_ID]);
1822 if (!(*id))
1823 NL_SET_ERR_MSG(extack, "Invalid nexthop id");
1824 else
1825 err = 0;
1826 out:
1827 return err;
1828 }
1829
1830 /* rtnl */
1831 static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
1832 struct netlink_ext_ack *extack)
1833 {
1834 struct net *net = sock_net(skb->sk);
1835 struct nl_info nlinfo = {
1836 .nlh = nlh,
1837 .nl_net = net,
1838 .portid = NETLINK_CB(skb).portid,
1839 };
1840 struct nexthop *nh;
1841 int err;
1842 u32 id;
1843
1844 err = nh_valid_get_del_req(nlh, &id, extack);
1845 if (err)
1846 return err;
1847
1848 nh = nexthop_find_by_id(net, id);
1849 if (!nh)
1850 return -ENOENT;
1851
1852 remove_nexthop(net, nh, &nlinfo);
1853
1854 return 0;
1855 }
1856
1857 /* rtnl */
1858 static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
1859 struct netlink_ext_ack *extack)
1860 {
1861 struct net *net = sock_net(in_skb->sk);
1862 struct sk_buff *skb = NULL;
1863 struct nexthop *nh;
1864 int err;
1865 u32 id;
1866
1867 err = nh_valid_get_del_req(nlh, &id, extack);
1868 if (err)
1869 return err;
1870
1871 err = -ENOBUFS;
1872 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1873 if (!skb)
1874 goto out;
1875
1876 err = -ENOENT;
1877 nh = nexthop_find_by_id(net, id);
1878 if (!nh)
1879 goto errout_free;
1880
1881 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
1882 nlh->nlmsg_seq, 0);
1883 if (err < 0) {
1884 WARN_ON(err == -EMSGSIZE);
1885 goto errout_free;
1886 }
1887
1888 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
1889 out:
1890 return err;
1891 errout_free:
1892 kfree_skb(skb);
1893 goto out;
1894 }
1895
1896 static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
1897 bool group_filter, u8 family)
1898 {
1899 const struct net_device *dev;
1900 const struct nh_info *nhi;
1901
1902 if (group_filter && !nh->is_group)
1903 return true;
1904
1905 if (!dev_idx && !master_idx && !family)
1906 return false;
1907
1908 if (nh->is_group)
1909 return true;
1910
1911 nhi = rtnl_dereference(nh->nh_info);
1912 if (family && nhi->family != family)
1913 return true;
1914
1915 dev = nhi->fib_nhc.nhc_dev;
1916 if (dev_idx && (!dev || dev->ifindex != dev_idx))
1917 return true;
1918
1919 if (master_idx) {
1920 struct net_device *master;
1921
1922 if (!dev)
1923 return true;
1924
1925 master = netdev_master_upper_dev_get((struct net_device *)dev);
1926 if (!master || master->ifindex != master_idx)
1927 return true;
1928 }
1929
1930 return false;
1931 }
1932
1933 static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
1934 int *master_idx, bool *group_filter,
1935 bool *fdb_filter, struct netlink_callback *cb)
1936 {
1937 struct netlink_ext_ack *extack = cb->extack;
1938 struct nlattr *tb[NHA_MAX + 1];
1939 struct nhmsg *nhm;
1940 int err, i;
1941 u32 idx;
1942
1943 err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
1944 NULL);
1945 if (err < 0)
1946 return err;
1947
1948 for (i = 0; i <= NHA_MAX; ++i) {
1949 if (!tb[i])
1950 continue;
1951
1952 switch (i) {
1953 case NHA_OIF:
1954 idx = nla_get_u32(tb[i]);
1955 if (idx > INT_MAX) {
1956 NL_SET_ERR_MSG(extack, "Invalid device index");
1957 return -EINVAL;
1958 }
1959 *dev_idx = idx;
1960 break;
1961 case NHA_MASTER:
1962 idx = nla_get_u32(tb[i]);
1963 if (idx > INT_MAX) {
1964 NL_SET_ERR_MSG(extack, "Invalid master device index");
1965 return -EINVAL;
1966 }
1967 *master_idx = idx;
1968 break;
1969 case NHA_GROUPS:
1970 *group_filter = true;
1971 break;
1972 case NHA_FDB:
1973 *fdb_filter = true;
1974 break;
1975 default:
1976 NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
1977 return -EINVAL;
1978 }
1979 }
1980
1981 nhm = nlmsg_data(nlh);
1982 if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
1983 NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
1984 return -EINVAL;
1985 }
1986
1987 return 0;
1988 }
1989
1990 /* rtnl */
1991 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
1992 {
1993 bool group_filter = false, fdb_filter = false;
1994 struct nhmsg *nhm = nlmsg_data(cb->nlh);
1995 int dev_filter_idx = 0, master_idx = 0;
1996 struct net *net = sock_net(skb->sk);
1997 struct rb_root *root = &net->nexthop.rb_root;
1998 struct rb_node *node;
1999 int idx = 0, s_idx;
2000 int err;
2001
2002 err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
2003 &group_filter, &fdb_filter, cb);
2004 if (err < 0)
2005 return err;
2006
2007 s_idx = cb->args[0];
2008 for (node = rb_first(root); node; node = rb_next(node)) {
2009 struct nexthop *nh;
2010
2011 if (idx < s_idx)
2012 goto cont;
2013
2014 nh = rb_entry(node, struct nexthop, rb_node);
2015 if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
2016 group_filter, nhm->nh_family))
2017 goto cont;
2018
2019 err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
2020 NETLINK_CB(cb->skb).portid,
2021 cb->nlh->nlmsg_seq, NLM_F_MULTI);
2022 if (err < 0) {
2023 if (likely(skb->len))
2024 goto out;
2025
2026 goto out_err;
2027 }
2028 cont:
2029 idx++;
2030 }
2031
2032 out:
2033 err = skb->len;
2034 out_err:
2035 cb->args[0] = idx;
2036 cb->seq = net->nexthop.seq;
2037 nl_dump_check_consistent(cb, nlmsg_hdr(skb));
2038
2039 return err;
2040 }
2041
2042 static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
2043 {
2044 unsigned int hash = nh_dev_hashfn(dev->ifindex);
2045 struct net *net = dev_net(dev);
2046 struct hlist_head *head = &net->nexthop.devhash[hash];
2047 struct hlist_node *n;
2048 struct nh_info *nhi;
2049
2050 hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
2051 if (nhi->fib_nhc.nhc_dev == dev) {
2052 if (nhi->family == AF_INET)
2053 fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
2054 orig_mtu);
2055 }
2056 }
2057 }
2058
2059 /* rtnl */
2060 static int nh_netdev_event(struct notifier_block *this,
2061 unsigned long event, void *ptr)
2062 {
2063 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2064 struct netdev_notifier_info_ext *info_ext;
2065
2066 switch (event) {
2067 case NETDEV_DOWN:
2068 case NETDEV_UNREGISTER:
2069 nexthop_flush_dev(dev);
2070 break;
2071 case NETDEV_CHANGE:
2072 if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
2073 nexthop_flush_dev(dev);
2074 break;
2075 case NETDEV_CHANGEMTU:
2076 info_ext = ptr;
2077 nexthop_sync_mtu(dev, info_ext->ext.mtu);
2078 rt_cache_flush(dev_net(dev));
2079 break;
2080 }
2081 return NOTIFY_DONE;
2082 }
2083
2084 static struct notifier_block nh_netdev_notifier = {
2085 .notifier_call = nh_netdev_event,
2086 };
2087
2088 int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
2089 {
2090 return blocking_notifier_chain_register(&net->nexthop.notifier_chain,
2091 nb);
2092 }
2093 EXPORT_SYMBOL(register_nexthop_notifier);
2094
2095 int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
2096 {
2097 return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
2098 nb);
2099 }
2100 EXPORT_SYMBOL(unregister_nexthop_notifier);
2101
2102 void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap)
2103 {
2104 struct nexthop *nexthop;
2105
2106 rcu_read_lock();
2107
2108 nexthop = nexthop_find_by_id(net, id);
2109 if (!nexthop)
2110 goto out;
2111
2112 nexthop->nh_flags &= ~(RTNH_F_OFFLOAD | RTNH_F_TRAP);
2113 if (offload)
2114 nexthop->nh_flags |= RTNH_F_OFFLOAD;
2115 if (trap)
2116 nexthop->nh_flags |= RTNH_F_TRAP;
2117
2118 out:
2119 rcu_read_unlock();
2120 }
2121 EXPORT_SYMBOL(nexthop_set_hw_flags);
2122
2123 static void __net_exit nexthop_net_exit(struct net *net)
2124 {
2125 rtnl_lock();
2126 flush_all_nexthops(net);
2127 rtnl_unlock();
2128 kfree(net->nexthop.devhash);
2129 }
2130
2131 static int __net_init nexthop_net_init(struct net *net)
2132 {
2133 size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
2134
2135 net->nexthop.rb_root = RB_ROOT;
2136 net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
2137 if (!net->nexthop.devhash)
2138 return -ENOMEM;
2139 BLOCKING_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
2140
2141 return 0;
2142 }
2143
2144 static struct pernet_operations nexthop_net_ops = {
2145 .init = nexthop_net_init,
2146 .exit = nexthop_net_exit,
2147 };
2148
2149 static int __init nexthop_init(void)
2150 {
2151 register_pernet_subsys(&nexthop_net_ops);
2152
2153 register_netdevice_notifier(&nh_netdev_notifier);
2154
2155 rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2156 rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
2157 rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
2158 rtm_dump_nexthop, 0);
2159
2160 rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2161 rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2162
2163 rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
2164 rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
2165
2166 return 0;
2167 }
2168 subsys_initcall(nexthop_init);