1 // SPDX-License-Identifier: GPL-2.0
2 /* Generic nexthop implementation
4 * Copyright (c) 2017-19 Cumulus Networks
5 * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
8 #include <linux/nexthop.h>
9 #include <linux/rtnetlink.h>
10 #include <linux/slab.h>
12 #include <net/ipv6_stubs.h>
13 #include <net/lwtunnel.h>
14 #include <net/ndisc.h>
15 #include <net/nexthop.h>
16 #include <net/route.h>
19 static void remove_nexthop(struct net
*net
, struct nexthop
*nh
,
20 struct nl_info
*nlinfo
);
22 #define NH_DEV_HASHBITS 8
23 #define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
25 static const struct nla_policy rtm_nh_policy
[NHA_MAX
+ 1] = {
26 [NHA_UNSPEC
] = { .strict_start_type
= NHA_UNSPEC
+ 1 },
27 [NHA_ID
] = { .type
= NLA_U32
},
28 [NHA_GROUP
] = { .type
= NLA_BINARY
},
29 [NHA_GROUP_TYPE
] = { .type
= NLA_U16
},
30 [NHA_BLACKHOLE
] = { .type
= NLA_FLAG
},
31 [NHA_OIF
] = { .type
= NLA_U32
},
32 [NHA_GATEWAY
] = { .type
= NLA_BINARY
},
33 [NHA_ENCAP_TYPE
] = { .type
= NLA_U16
},
34 [NHA_ENCAP
] = { .type
= NLA_NESTED
},
35 [NHA_GROUPS
] = { .type
= NLA_FLAG
},
36 [NHA_MASTER
] = { .type
= NLA_U32
},
39 static unsigned int nh_dev_hashfn(unsigned int val
)
41 unsigned int mask
= NH_DEV_HASHSIZE
- 1;
44 (val
>> NH_DEV_HASHBITS
) ^
45 (val
>> (NH_DEV_HASHBITS
* 2))) & mask
;
48 static void nexthop_devhash_add(struct net
*net
, struct nh_info
*nhi
)
50 struct net_device
*dev
= nhi
->fib_nhc
.nhc_dev
;
51 struct hlist_head
*head
;
56 hash
= nh_dev_hashfn(dev
->ifindex
);
57 head
= &net
->nexthop
.devhash
[hash
];
58 hlist_add_head(&nhi
->dev_hash
, head
);
61 static void nexthop_free_mpath(struct nexthop
*nh
)
66 nhg
= rcu_dereference_raw(nh
->nh_grp
);
67 for (i
= 0; i
< nhg
->num_nh
; ++i
)
68 WARN_ON(nhg
->nh_entries
[i
].nh
);
73 static void nexthop_free_single(struct nexthop
*nh
)
77 nhi
= rcu_dereference_raw(nh
->nh_info
);
78 switch (nhi
->family
) {
80 fib_nh_release(nh
->net
, &nhi
->fib_nh
);
83 ipv6_stub
->fib6_nh_release(&nhi
->fib6_nh
);
89 void nexthop_free_rcu(struct rcu_head
*head
)
91 struct nexthop
*nh
= container_of(head
, struct nexthop
, rcu
);
94 nexthop_free_mpath(nh
);
96 nexthop_free_single(nh
);
100 EXPORT_SYMBOL_GPL(nexthop_free_rcu
);
102 static struct nexthop
*nexthop_alloc(void)
106 nh
= kzalloc(sizeof(struct nexthop
), GFP_KERNEL
);
108 INIT_LIST_HEAD(&nh
->fi_list
);
109 INIT_LIST_HEAD(&nh
->f6i_list
);
110 INIT_LIST_HEAD(&nh
->grp_list
);
115 static struct nh_group
*nexthop_grp_alloc(u16 num_nh
)
117 size_t sz
= offsetof(struct nexthop
, nh_grp
)
118 + sizeof(struct nh_group
)
119 + sizeof(struct nh_grp_entry
) * num_nh
;
120 struct nh_group
*nhg
;
122 nhg
= kzalloc(sz
, GFP_KERNEL
);
124 nhg
->num_nh
= num_nh
;
129 static void nh_base_seq_inc(struct net
*net
)
131 while (++net
->nexthop
.seq
== 0)
135 /* no reference taken; rcu lock or rtnl must be held */
136 struct nexthop
*nexthop_find_by_id(struct net
*net
, u32 id
)
138 struct rb_node
**pp
, *parent
= NULL
, *next
;
140 pp
= &net
->nexthop
.rb_root
.rb_node
;
144 next
= rcu_dereference_raw(*pp
);
149 nh
= rb_entry(parent
, struct nexthop
, rb_node
);
152 else if (id
> nh
->id
)
153 pp
= &next
->rb_right
;
159 EXPORT_SYMBOL_GPL(nexthop_find_by_id
);
161 /* used for auto id allocation; called with rtnl held */
162 static u32
nh_find_unused_id(struct net
*net
)
164 u32 id_start
= net
->nexthop
.last_id_allocated
;
167 net
->nexthop
.last_id_allocated
++;
168 if (net
->nexthop
.last_id_allocated
== id_start
)
171 if (!nexthop_find_by_id(net
, net
->nexthop
.last_id_allocated
))
172 return net
->nexthop
.last_id_allocated
;
177 static int nla_put_nh_group(struct sk_buff
*skb
, struct nh_group
*nhg
)
179 struct nexthop_grp
*p
;
180 size_t len
= nhg
->num_nh
* sizeof(*p
);
186 group_type
= NEXTHOP_GRP_TYPE_MPATH
;
188 if (nla_put_u16(skb
, NHA_GROUP_TYPE
, group_type
))
189 goto nla_put_failure
;
191 nla
= nla_reserve(skb
, NHA_GROUP
, len
);
193 goto nla_put_failure
;
196 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
197 p
->id
= nhg
->nh_entries
[i
].nh
->id
;
198 p
->weight
= nhg
->nh_entries
[i
].weight
- 1;
208 static int nh_fill_node(struct sk_buff
*skb
, struct nexthop
*nh
,
209 int event
, u32 portid
, u32 seq
, unsigned int nlflags
)
211 struct fib6_nh
*fib6_nh
;
212 struct fib_nh
*fib_nh
;
213 struct nlmsghdr
*nlh
;
217 nlh
= nlmsg_put(skb
, portid
, seq
, event
, sizeof(*nhm
), nlflags
);
221 nhm
= nlmsg_data(nlh
);
222 nhm
->nh_family
= AF_UNSPEC
;
223 nhm
->nh_flags
= nh
->nh_flags
;
224 nhm
->nh_protocol
= nh
->protocol
;
228 if (nla_put_u32(skb
, NHA_ID
, nh
->id
))
229 goto nla_put_failure
;
232 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
234 if (nla_put_nh_group(skb
, nhg
))
235 goto nla_put_failure
;
239 nhi
= rtnl_dereference(nh
->nh_info
);
240 nhm
->nh_family
= nhi
->family
;
241 if (nhi
->reject_nh
) {
242 if (nla_put_flag(skb
, NHA_BLACKHOLE
))
243 goto nla_put_failure
;
246 const struct net_device
*dev
;
248 dev
= nhi
->fib_nhc
.nhc_dev
;
249 if (dev
&& nla_put_u32(skb
, NHA_OIF
, dev
->ifindex
))
250 goto nla_put_failure
;
253 nhm
->nh_scope
= nhi
->fib_nhc
.nhc_scope
;
254 switch (nhi
->family
) {
256 fib_nh
= &nhi
->fib_nh
;
257 if (fib_nh
->fib_nh_gw_family
&&
258 nla_put_u32(skb
, NHA_GATEWAY
, fib_nh
->fib_nh_gw4
))
259 goto nla_put_failure
;
263 fib6_nh
= &nhi
->fib6_nh
;
264 if (fib6_nh
->fib_nh_gw_family
&&
265 nla_put_in6_addr(skb
, NHA_GATEWAY
, &fib6_nh
->fib_nh_gw6
))
266 goto nla_put_failure
;
270 if (nhi
->fib_nhc
.nhc_lwtstate
&&
271 lwtunnel_fill_encap(skb
, nhi
->fib_nhc
.nhc_lwtstate
,
272 NHA_ENCAP
, NHA_ENCAP_TYPE
) < 0)
273 goto nla_put_failure
;
283 static size_t nh_nlmsg_size_grp(struct nexthop
*nh
)
285 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
286 size_t sz
= sizeof(struct nexthop_grp
) * nhg
->num_nh
;
288 return nla_total_size(sz
) +
289 nla_total_size(2); /* NHA_GROUP_TYPE */
292 static size_t nh_nlmsg_size_single(struct nexthop
*nh
)
294 struct nh_info
*nhi
= rtnl_dereference(nh
->nh_info
);
297 /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
298 * are mutually exclusive
300 sz
= nla_total_size(4); /* NHA_OIF */
302 switch (nhi
->family
) {
304 if (nhi
->fib_nh
.fib_nh_gw_family
)
305 sz
+= nla_total_size(4); /* NHA_GATEWAY */
310 if (nhi
->fib6_nh
.fib_nh_gw_family
)
311 sz
+= nla_total_size(sizeof(const struct in6_addr
));
315 if (nhi
->fib_nhc
.nhc_lwtstate
) {
316 sz
+= lwtunnel_get_encap_size(nhi
->fib_nhc
.nhc_lwtstate
);
317 sz
+= nla_total_size(2); /* NHA_ENCAP_TYPE */
323 static size_t nh_nlmsg_size(struct nexthop
*nh
)
325 size_t sz
= nla_total_size(4); /* NHA_ID */
328 sz
+= nh_nlmsg_size_grp(nh
);
330 sz
+= nh_nlmsg_size_single(nh
);
335 static void nexthop_notify(int event
, struct nexthop
*nh
, struct nl_info
*info
)
337 unsigned int nlflags
= info
->nlh
? info
->nlh
->nlmsg_flags
: 0;
338 u32 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
342 skb
= nlmsg_new(nh_nlmsg_size(nh
), gfp_any());
346 err
= nh_fill_node(skb
, nh
, event
, info
->portid
, seq
, nlflags
);
348 /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
349 WARN_ON(err
== -EMSGSIZE
);
354 rtnl_notify(skb
, info
->nl_net
, info
->portid
, RTNLGRP_NEXTHOP
,
355 info
->nlh
, gfp_any());
359 rtnl_set_sk_err(info
->nl_net
, RTNLGRP_NEXTHOP
, err
);
362 static bool valid_group_nh(struct nexthop
*nh
, unsigned int npaths
,
363 struct netlink_ext_ack
*extack
)
366 struct nh_group
*nhg
= rtnl_dereference(nh
->nh_grp
);
368 /* nested multipath (group within a group) is not
372 NL_SET_ERR_MSG(extack
,
373 "Multipath group can not be a nexthop within a group");
377 struct nh_info
*nhi
= rtnl_dereference(nh
->nh_info
);
379 if (nhi
->reject_nh
&& npaths
> 1) {
380 NL_SET_ERR_MSG(extack
,
381 "Blackhole nexthop can not be used in a group with more than 1 path");
389 static int nh_check_attr_group(struct net
*net
, struct nlattr
*tb
[],
390 struct netlink_ext_ack
*extack
)
392 unsigned int len
= nla_len(tb
[NHA_GROUP
]);
393 struct nexthop_grp
*nhg
;
396 if (len
& (sizeof(struct nexthop_grp
) - 1)) {
397 NL_SET_ERR_MSG(extack
,
398 "Invalid length for nexthop group attribute");
402 /* convert len to number of nexthop ids */
405 nhg
= nla_data(tb
[NHA_GROUP
]);
406 for (i
= 0; i
< len
; ++i
) {
407 if (nhg
[i
].resvd1
|| nhg
[i
].resvd2
) {
408 NL_SET_ERR_MSG(extack
, "Reserved fields in nexthop_grp must be 0");
411 if (nhg
[i
].weight
> 254) {
412 NL_SET_ERR_MSG(extack
, "Invalid value for weight");
415 for (j
= i
+ 1; j
< len
; ++j
) {
416 if (nhg
[i
].id
== nhg
[j
].id
) {
417 NL_SET_ERR_MSG(extack
, "Nexthop id can not be used twice in a group");
423 nhg
= nla_data(tb
[NHA_GROUP
]);
424 for (i
= 0; i
< len
; ++i
) {
427 nh
= nexthop_find_by_id(net
, nhg
[i
].id
);
429 NL_SET_ERR_MSG(extack
, "Invalid nexthop id");
432 if (!valid_group_nh(nh
, len
, extack
))
435 for (i
= NHA_GROUP
+ 1; i
< __NHA_MAX
; ++i
) {
439 NL_SET_ERR_MSG(extack
,
440 "No other attributes can be set in nexthop groups");
447 static bool ipv6_good_nh(const struct fib6_nh
*nh
)
449 int state
= NUD_REACHABLE
;
454 n
= __ipv6_neigh_lookup_noref_stub(nh
->fib_nh_dev
, &nh
->fib_nh_gw6
);
456 state
= n
->nud_state
;
458 rcu_read_unlock_bh();
460 return !!(state
& NUD_VALID
);
463 static bool ipv4_good_nh(const struct fib_nh
*nh
)
465 int state
= NUD_REACHABLE
;
470 n
= __ipv4_neigh_lookup_noref(nh
->fib_nh_dev
,
471 (__force u32
)nh
->fib_nh_gw4
);
473 state
= n
->nud_state
;
475 rcu_read_unlock_bh();
477 return !!(state
& NUD_VALID
);
480 struct nexthop
*nexthop_select_path(struct nexthop
*nh
, int hash
)
482 struct nexthop
*rc
= NULL
;
483 struct nh_group
*nhg
;
489 nhg
= rcu_dereference(nh
->nh_grp
);
490 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
491 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
494 if (hash
> atomic_read(&nhge
->upper_bound
))
497 /* nexthops always check if it is good and does
498 * not rely on a sysctl for this behavior
500 nhi
= rcu_dereference(nhge
->nh
->nh_info
);
501 switch (nhi
->family
) {
503 if (ipv4_good_nh(&nhi
->fib_nh
))
507 if (ipv6_good_nh(&nhi
->fib6_nh
))
518 EXPORT_SYMBOL_GPL(nexthop_select_path
);
520 int nexthop_for_each_fib6_nh(struct nexthop
*nh
,
521 int (*cb
)(struct fib6_nh
*nh
, void *arg
),
528 struct nh_group
*nhg
;
531 nhg
= rcu_dereference_rtnl(nh
->nh_grp
);
532 for (i
= 0; i
< nhg
->num_nh
; i
++) {
533 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
535 nhi
= rcu_dereference_rtnl(nhge
->nh
->nh_info
);
536 err
= cb(&nhi
->fib6_nh
, arg
);
541 nhi
= rcu_dereference_rtnl(nh
->nh_info
);
542 err
= cb(&nhi
->fib6_nh
, arg
);
549 EXPORT_SYMBOL_GPL(nexthop_for_each_fib6_nh
);
551 static int check_src_addr(const struct in6_addr
*saddr
,
552 struct netlink_ext_ack
*extack
)
554 if (!ipv6_addr_any(saddr
)) {
555 NL_SET_ERR_MSG(extack
, "IPv6 routes using source address can not use nexthop objects");
561 int fib6_check_nexthop(struct nexthop
*nh
, struct fib6_config
*cfg
,
562 struct netlink_ext_ack
*extack
)
566 /* fib6_src is unique to a fib6_info and limits the ability to cache
567 * routes in fib6_nh within a nexthop that is potentially shared
568 * across multiple fib entries. If the config wants to use source
569 * routing it can not use nexthop objects. mlxsw also does not allow
570 * fib6_src on routes.
572 if (cfg
&& check_src_addr(&cfg
->fc_src
, extack
) < 0)
576 struct nh_group
*nhg
;
578 nhg
= rtnl_dereference(nh
->nh_grp
);
582 nhi
= rtnl_dereference(nh
->nh_info
);
583 if (nhi
->family
== AF_INET
)
589 NL_SET_ERR_MSG(extack
, "IPv6 routes can not use an IPv4 nexthop");
592 EXPORT_SYMBOL_GPL(fib6_check_nexthop
);
594 /* if existing nexthop has ipv6 routes linked to it, need
595 * to verify this new spec works with ipv6
597 static int fib6_check_nh_list(struct nexthop
*old
, struct nexthop
*new,
598 struct netlink_ext_ack
*extack
)
600 struct fib6_info
*f6i
;
602 if (list_empty(&old
->f6i_list
))
605 list_for_each_entry(f6i
, &old
->f6i_list
, nh_list
) {
606 if (check_src_addr(&f6i
->fib6_src
.addr
, extack
) < 0)
610 return fib6_check_nexthop(new, NULL
, extack
);
613 static int nexthop_check_scope(struct nexthop
*nh
, u8 scope
,
614 struct netlink_ext_ack
*extack
)
618 nhi
= rtnl_dereference(nh
->nh_info
);
619 if (scope
== RT_SCOPE_HOST
&& nhi
->fib_nhc
.nhc_gw_family
) {
620 NL_SET_ERR_MSG(extack
,
621 "Route with host scope can not have a gateway");
625 if (nhi
->fib_nhc
.nhc_flags
& RTNH_F_ONLINK
&& scope
>= RT_SCOPE_LINK
) {
626 NL_SET_ERR_MSG(extack
, "Scope mismatch with nexthop");
633 /* Invoked by fib add code to verify nexthop by id is ok with
634 * config for prefix; parts of fib_check_nh not done when nexthop
637 int fib_check_nexthop(struct nexthop
*nh
, u8 scope
,
638 struct netlink_ext_ack
*extack
)
643 struct nh_group
*nhg
;
645 if (scope
== RT_SCOPE_HOST
) {
646 NL_SET_ERR_MSG(extack
, "Route with host scope can not have multiple nexthops");
651 nhg
= rtnl_dereference(nh
->nh_grp
);
652 /* all nexthops in a group have the same scope */
653 err
= nexthop_check_scope(nhg
->nh_entries
[0].nh
, scope
, extack
);
655 err
= nexthop_check_scope(nh
, scope
, extack
);
661 static int fib_check_nh_list(struct nexthop
*old
, struct nexthop
*new,
662 struct netlink_ext_ack
*extack
)
666 list_for_each_entry(fi
, &old
->fi_list
, nh_list
) {
669 err
= fib_check_nexthop(new, fi
->fib_scope
, extack
);
676 static void nh_group_rebalance(struct nh_group
*nhg
)
682 for (i
= 0; i
< nhg
->num_nh
; ++i
)
683 total
+= nhg
->nh_entries
[i
].weight
;
685 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
686 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
690 upper_bound
= DIV_ROUND_CLOSEST_ULL((u64
)w
<< 31, total
) - 1;
691 atomic_set(&nhge
->upper_bound
, upper_bound
);
695 static void remove_nh_grp_entry(struct nh_grp_entry
*nhge
,
696 struct nh_group
*nhg
,
697 struct nl_info
*nlinfo
)
699 struct nexthop
*nh
= nhge
->nh
;
700 struct nh_grp_entry
*nhges
;
706 nhges
= nhg
->nh_entries
;
707 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
709 nhges
[i
-1].nh
= nhges
[i
].nh
;
710 nhges
[i
-1].weight
= nhges
[i
].weight
;
711 list_del(&nhges
[i
].nh_list
);
712 list_add(&nhges
[i
-1].nh_list
, &nhges
[i
-1].nh
->grp_list
);
713 } else if (nhg
->nh_entries
[i
].nh
== nh
) {
722 nhg
->nh_entries
[nhg
->num_nh
].nh
= NULL
;
724 nh_group_rebalance(nhg
);
729 nexthop_notify(RTM_NEWNEXTHOP
, nhge
->nh_parent
, nlinfo
);
732 static void remove_nexthop_from_groups(struct net
*net
, struct nexthop
*nh
,
733 struct nl_info
*nlinfo
)
735 struct nh_grp_entry
*nhge
, *tmp
;
737 list_for_each_entry_safe(nhge
, tmp
, &nh
->grp_list
, nh_list
) {
738 struct nh_group
*nhg
;
740 list_del(&nhge
->nh_list
);
741 nhg
= rtnl_dereference(nhge
->nh_parent
->nh_grp
);
742 remove_nh_grp_entry(nhge
, nhg
, nlinfo
);
744 /* if this group has no more entries then remove it */
746 remove_nexthop(net
, nhge
->nh_parent
, nlinfo
);
750 static void remove_nexthop_group(struct nexthop
*nh
, struct nl_info
*nlinfo
)
752 struct nh_group
*nhg
= rcu_dereference_rtnl(nh
->nh_grp
);
753 int i
, num_nh
= nhg
->num_nh
;
755 for (i
= 0; i
< num_nh
; ++i
) {
756 struct nh_grp_entry
*nhge
= &nhg
->nh_entries
[i
];
758 if (WARN_ON(!nhge
->nh
))
761 list_del(&nhge
->nh_list
);
762 nexthop_put(nhge
->nh
);
768 /* not called for nexthop replace */
769 static void __remove_nexthop_fib(struct net
*net
, struct nexthop
*nh
)
771 struct fib6_info
*f6i
, *tmp
;
772 bool do_flush
= false;
775 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
) {
776 fi
->fib_flags
|= RTNH_F_DEAD
;
782 /* ip6_del_rt removes the entry from this list hence the _safe */
783 list_for_each_entry_safe(f6i
, tmp
, &nh
->f6i_list
, nh_list
) {
784 /* __ip6_del_rt does a release, so do a hold here */
786 ipv6_stub
->ip6_del_rt(net
, f6i
);
790 static void __remove_nexthop(struct net
*net
, struct nexthop
*nh
,
791 struct nl_info
*nlinfo
)
793 __remove_nexthop_fib(net
, nh
);
796 remove_nexthop_group(nh
, nlinfo
);
800 nhi
= rtnl_dereference(nh
->nh_info
);
801 if (nhi
->fib_nhc
.nhc_dev
)
802 hlist_del(&nhi
->dev_hash
);
804 remove_nexthop_from_groups(net
, nh
, nlinfo
);
808 static void remove_nexthop(struct net
*net
, struct nexthop
*nh
,
809 struct nl_info
*nlinfo
)
811 /* remove from the tree */
812 rb_erase(&nh
->rb_node
, &net
->nexthop
.rb_root
);
815 nexthop_notify(RTM_DELNEXTHOP
, nh
, nlinfo
);
817 __remove_nexthop(net
, nh
, nlinfo
);
818 nh_base_seq_inc(net
);
823 /* if any FIB entries reference this nexthop, any dst entries
824 * need to be regenerated
826 static void nh_rt_cache_flush(struct net
*net
, struct nexthop
*nh
)
828 struct fib6_info
*f6i
;
830 if (!list_empty(&nh
->fi_list
))
833 list_for_each_entry(f6i
, &nh
->f6i_list
, nh_list
)
834 ipv6_stub
->fib6_update_sernum(net
, f6i
);
837 static int replace_nexthop_grp(struct net
*net
, struct nexthop
*old
,
839 struct netlink_ext_ack
*extack
)
841 struct nh_group
*oldg
, *newg
;
844 if (!new->is_group
) {
845 NL_SET_ERR_MSG(extack
, "Can not replace a nexthop group with a nexthop.");
849 oldg
= rtnl_dereference(old
->nh_grp
);
850 newg
= rtnl_dereference(new->nh_grp
);
852 /* update parents - used by nexthop code for cleanup */
853 for (i
= 0; i
< newg
->num_nh
; i
++)
854 newg
->nh_entries
[i
].nh_parent
= old
;
856 rcu_assign_pointer(old
->nh_grp
, newg
);
858 for (i
= 0; i
< oldg
->num_nh
; i
++)
859 oldg
->nh_entries
[i
].nh_parent
= new;
861 rcu_assign_pointer(new->nh_grp
, oldg
);
866 static int replace_nexthop_single(struct net
*net
, struct nexthop
*old
,
868 struct netlink_ext_ack
*extack
)
870 struct nh_info
*oldi
, *newi
;
873 NL_SET_ERR_MSG(extack
, "Can not replace a nexthop with a nexthop group.");
877 oldi
= rtnl_dereference(old
->nh_info
);
878 newi
= rtnl_dereference(new->nh_info
);
880 newi
->nh_parent
= old
;
881 oldi
->nh_parent
= new;
883 old
->protocol
= new->protocol
;
884 old
->nh_flags
= new->nh_flags
;
886 rcu_assign_pointer(old
->nh_info
, newi
);
887 rcu_assign_pointer(new->nh_info
, oldi
);
892 static void __nexthop_replace_notify(struct net
*net
, struct nexthop
*nh
,
893 struct nl_info
*info
)
895 struct fib6_info
*f6i
;
897 if (!list_empty(&nh
->fi_list
)) {
900 /* expectation is a few fib_info per nexthop and then
901 * a lot of routes per fib_info. So mark the fib_info
902 * and then walk the fib tables once
904 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
)
905 fi
->nh_updated
= true;
907 fib_info_notify_update(net
, info
);
909 list_for_each_entry(fi
, &nh
->fi_list
, nh_list
)
910 fi
->nh_updated
= false;
913 list_for_each_entry(f6i
, &nh
->f6i_list
, nh_list
)
914 ipv6_stub
->fib6_rt_update(net
, f6i
, info
);
917 /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries
918 * linked to this nexthop and for all groups that the nexthop
921 static void nexthop_replace_notify(struct net
*net
, struct nexthop
*nh
,
922 struct nl_info
*info
)
924 struct nh_grp_entry
*nhge
;
926 __nexthop_replace_notify(net
, nh
, info
);
928 list_for_each_entry(nhge
, &nh
->grp_list
, nh_list
)
929 __nexthop_replace_notify(net
, nhge
->nh_parent
, info
);
932 static int replace_nexthop(struct net
*net
, struct nexthop
*old
,
933 struct nexthop
*new, struct netlink_ext_ack
*extack
)
935 bool new_is_reject
= false;
936 struct nh_grp_entry
*nhge
;
939 /* check that existing FIB entries are ok with the
940 * new nexthop definition
942 err
= fib_check_nh_list(old
, new, extack
);
946 err
= fib6_check_nh_list(old
, new, extack
);
950 if (!new->is_group
) {
951 struct nh_info
*nhi
= rtnl_dereference(new->nh_info
);
953 new_is_reject
= nhi
->reject_nh
;
956 list_for_each_entry(nhge
, &old
->grp_list
, nh_list
) {
957 /* if new nexthop is a blackhole, any groups using this
958 * nexthop cannot have more than 1 path
961 nexthop_num_path(nhge
->nh_parent
) > 1) {
962 NL_SET_ERR_MSG(extack
, "Blackhole nexthop can not be a member of a group with more than one path");
966 err
= fib_check_nh_list(nhge
->nh_parent
, new, extack
);
970 err
= fib6_check_nh_list(nhge
->nh_parent
, new, extack
);
976 err
= replace_nexthop_grp(net
, old
, new, extack
);
978 err
= replace_nexthop_single(net
, old
, new, extack
);
981 nh_rt_cache_flush(net
, old
);
983 __remove_nexthop(net
, new, NULL
);
990 /* called with rtnl_lock held */
991 static int insert_nexthop(struct net
*net
, struct nexthop
*new_nh
,
992 struct nh_config
*cfg
, struct netlink_ext_ack
*extack
)
994 struct rb_node
**pp
, *parent
= NULL
, *next
;
995 struct rb_root
*root
= &net
->nexthop
.rb_root
;
996 bool replace
= !!(cfg
->nlflags
& NLM_F_REPLACE
);
997 bool create
= !!(cfg
->nlflags
& NLM_F_CREATE
);
998 u32 new_id
= new_nh
->id
;
999 int replace_notify
= 0;
1002 pp
= &root
->rb_node
;
1006 next
= rtnl_dereference(*pp
);
1012 nh
= rb_entry(parent
, struct nexthop
, rb_node
);
1013 if (new_id
< nh
->id
) {
1014 pp
= &next
->rb_left
;
1015 } else if (new_id
> nh
->id
) {
1016 pp
= &next
->rb_right
;
1017 } else if (replace
) {
1018 rc
= replace_nexthop(net
, nh
, new_nh
, extack
);
1020 new_nh
= nh
; /* send notification with old nh */
1025 /* id already exists and not a replace */
1030 if (replace
&& !create
) {
1031 NL_SET_ERR_MSG(extack
, "Replace specified without create and no entry exists");
1036 rb_link_node_rcu(&new_nh
->rb_node
, parent
, pp
);
1037 rb_insert_color(&new_nh
->rb_node
, root
);
1041 nh_base_seq_inc(net
);
1042 nexthop_notify(RTM_NEWNEXTHOP
, new_nh
, &cfg
->nlinfo
);
1044 nexthop_replace_notify(net
, new_nh
, &cfg
->nlinfo
);
1051 /* remove all nexthops tied to a device being deleted */
1052 static void nexthop_flush_dev(struct net_device
*dev
)
1054 unsigned int hash
= nh_dev_hashfn(dev
->ifindex
);
1055 struct net
*net
= dev_net(dev
);
1056 struct hlist_head
*head
= &net
->nexthop
.devhash
[hash
];
1057 struct hlist_node
*n
;
1058 struct nh_info
*nhi
;
1060 hlist_for_each_entry_safe(nhi
, n
, head
, dev_hash
) {
1061 if (nhi
->fib_nhc
.nhc_dev
!= dev
)
1064 remove_nexthop(net
, nhi
->nh_parent
, NULL
);
1068 /* rtnl; called when net namespace is deleted */
1069 static void flush_all_nexthops(struct net
*net
)
1071 struct rb_root
*root
= &net
->nexthop
.rb_root
;
1072 struct rb_node
*node
;
1075 while ((node
= rb_first(root
))) {
1076 nh
= rb_entry(node
, struct nexthop
, rb_node
);
1077 remove_nexthop(net
, nh
, NULL
);
1082 static struct nexthop
*nexthop_create_group(struct net
*net
,
1083 struct nh_config
*cfg
)
1085 struct nlattr
*grps_attr
= cfg
->nh_grp
;
1086 struct nexthop_grp
*entry
= nla_data(grps_attr
);
1087 struct nh_group
*nhg
;
1091 nh
= nexthop_alloc();
1093 return ERR_PTR(-ENOMEM
);
1097 nhg
= nexthop_grp_alloc(nla_len(grps_attr
) / sizeof(*entry
));
1100 return ERR_PTR(-ENOMEM
);
1103 for (i
= 0; i
< nhg
->num_nh
; ++i
) {
1104 struct nexthop
*nhe
;
1105 struct nh_info
*nhi
;
1107 nhe
= nexthop_find_by_id(net
, entry
[i
].id
);
1108 if (!nexthop_get(nhe
))
1111 nhi
= rtnl_dereference(nhe
->nh_info
);
1112 if (nhi
->family
== AF_INET
)
1115 nhg
->nh_entries
[i
].nh
= nhe
;
1116 nhg
->nh_entries
[i
].weight
= entry
[i
].weight
+ 1;
1117 list_add(&nhg
->nh_entries
[i
].nh_list
, &nhe
->grp_list
);
1118 nhg
->nh_entries
[i
].nh_parent
= nh
;
1121 if (cfg
->nh_grp_type
== NEXTHOP_GRP_TYPE_MPATH
) {
1123 nh_group_rebalance(nhg
);
1126 rcu_assign_pointer(nh
->nh_grp
, nhg
);
1132 nexthop_put(nhg
->nh_entries
[i
].nh
);
1137 return ERR_PTR(-ENOENT
);
1140 static int nh_create_ipv4(struct net
*net
, struct nexthop
*nh
,
1141 struct nh_info
*nhi
, struct nh_config
*cfg
,
1142 struct netlink_ext_ack
*extack
)
1144 struct fib_nh
*fib_nh
= &nhi
->fib_nh
;
1145 struct fib_config fib_cfg
= {
1146 .fc_oif
= cfg
->nh_ifindex
,
1147 .fc_gw4
= cfg
->gw
.ipv4
,
1148 .fc_gw_family
= cfg
->gw
.ipv4
? AF_INET
: 0,
1149 .fc_flags
= cfg
->nh_flags
,
1150 .fc_encap
= cfg
->nh_encap
,
1151 .fc_encap_type
= cfg
->nh_encap_type
,
1153 u32 tb_id
= l3mdev_fib_table(cfg
->dev
);
1156 err
= fib_nh_init(net
, fib_nh
, &fib_cfg
, 1, extack
);
1158 fib_nh_release(net
, fib_nh
);
1162 /* sets nh_dev if successful */
1163 err
= fib_check_nh(net
, fib_nh
, tb_id
, 0, extack
);
1165 nh
->nh_flags
= fib_nh
->fib_nh_flags
;
1166 fib_info_update_nhc_saddr(net
, &fib_nh
->nh_common
,
1167 fib_nh
->fib_nh_scope
);
1169 fib_nh_release(net
, fib_nh
);
1175 static int nh_create_ipv6(struct net
*net
, struct nexthop
*nh
,
1176 struct nh_info
*nhi
, struct nh_config
*cfg
,
1177 struct netlink_ext_ack
*extack
)
1179 struct fib6_nh
*fib6_nh
= &nhi
->fib6_nh
;
1180 struct fib6_config fib6_cfg
= {
1181 .fc_table
= l3mdev_fib_table(cfg
->dev
),
1182 .fc_ifindex
= cfg
->nh_ifindex
,
1183 .fc_gateway
= cfg
->gw
.ipv6
,
1184 .fc_flags
= cfg
->nh_flags
,
1185 .fc_encap
= cfg
->nh_encap
,
1186 .fc_encap_type
= cfg
->nh_encap_type
,
1190 if (!ipv6_addr_any(&cfg
->gw
.ipv6
))
1191 fib6_cfg
.fc_flags
|= RTF_GATEWAY
;
1193 /* sets nh_dev if successful */
1194 err
= ipv6_stub
->fib6_nh_init(net
, fib6_nh
, &fib6_cfg
, GFP_KERNEL
,
1197 ipv6_stub
->fib6_nh_release(fib6_nh
);
1199 nh
->nh_flags
= fib6_nh
->fib_nh_flags
;
1204 static struct nexthop
*nexthop_create(struct net
*net
, struct nh_config
*cfg
,
1205 struct netlink_ext_ack
*extack
)
1207 struct nh_info
*nhi
;
1211 nh
= nexthop_alloc();
1213 return ERR_PTR(-ENOMEM
);
1215 nhi
= kzalloc(sizeof(*nhi
), GFP_KERNEL
);
1218 return ERR_PTR(-ENOMEM
);
1221 nh
->nh_flags
= cfg
->nh_flags
;
1224 nhi
->nh_parent
= nh
;
1225 nhi
->family
= cfg
->nh_family
;
1226 nhi
->fib_nhc
.nhc_scope
= RT_SCOPE_LINK
;
1228 if (cfg
->nh_blackhole
) {
1230 cfg
->nh_ifindex
= net
->loopback_dev
->ifindex
;
1233 switch (cfg
->nh_family
) {
1235 err
= nh_create_ipv4(net
, nh
, nhi
, cfg
, extack
);
1238 err
= nh_create_ipv6(net
, nh
, nhi
, cfg
, extack
);
1245 return ERR_PTR(err
);
1248 /* add the entry to the device based hash */
1249 nexthop_devhash_add(net
, nhi
);
1251 rcu_assign_pointer(nh
->nh_info
, nhi
);
1256 /* called with rtnl lock held */
1257 static struct nexthop
*nexthop_add(struct net
*net
, struct nh_config
*cfg
,
1258 struct netlink_ext_ack
*extack
)
1263 if (cfg
->nlflags
& NLM_F_REPLACE
&& !cfg
->nh_id
) {
1264 NL_SET_ERR_MSG(extack
, "Replace requires nexthop id");
1265 return ERR_PTR(-EINVAL
);
1269 cfg
->nh_id
= nh_find_unused_id(net
);
1271 NL_SET_ERR_MSG(extack
, "No unused id");
1272 return ERR_PTR(-EINVAL
);
1277 nh
= nexthop_create_group(net
, cfg
);
1279 nh
= nexthop_create(net
, cfg
, extack
);
1284 refcount_set(&nh
->refcnt
, 1);
1285 nh
->id
= cfg
->nh_id
;
1286 nh
->protocol
= cfg
->nh_protocol
;
1289 err
= insert_nexthop(net
, nh
, cfg
, extack
);
1291 __remove_nexthop(net
, nh
, NULL
);
1299 static int rtm_to_nh_config(struct net
*net
, struct sk_buff
*skb
,
1300 struct nlmsghdr
*nlh
, struct nh_config
*cfg
,
1301 struct netlink_ext_ack
*extack
)
1303 struct nhmsg
*nhm
= nlmsg_data(nlh
);
1304 struct nlattr
*tb
[NHA_MAX
+ 1];
1307 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
1313 if (nhm
->resvd
|| nhm
->nh_scope
) {
1314 NL_SET_ERR_MSG(extack
, "Invalid values in ancillary header");
1317 if (nhm
->nh_flags
& ~NEXTHOP_VALID_USER_FLAGS
) {
1318 NL_SET_ERR_MSG(extack
, "Invalid nexthop flags in ancillary header");
1322 switch (nhm
->nh_family
) {
1331 NL_SET_ERR_MSG(extack
, "Invalid address family");
1335 if (tb
[NHA_GROUPS
] || tb
[NHA_MASTER
]) {
1336 NL_SET_ERR_MSG(extack
, "Invalid attributes in request");
1340 memset(cfg
, 0, sizeof(*cfg
));
1341 cfg
->nlflags
= nlh
->nlmsg_flags
;
1342 cfg
->nlinfo
.portid
= NETLINK_CB(skb
).portid
;
1343 cfg
->nlinfo
.nlh
= nlh
;
1344 cfg
->nlinfo
.nl_net
= net
;
1346 cfg
->nh_family
= nhm
->nh_family
;
1347 cfg
->nh_protocol
= nhm
->nh_protocol
;
1348 cfg
->nh_flags
= nhm
->nh_flags
;
1351 cfg
->nh_id
= nla_get_u32(tb
[NHA_ID
]);
1353 if (tb
[NHA_GROUP
]) {
1354 if (nhm
->nh_family
!= AF_UNSPEC
) {
1355 NL_SET_ERR_MSG(extack
, "Invalid family for group");
1358 cfg
->nh_grp
= tb
[NHA_GROUP
];
1360 cfg
->nh_grp_type
= NEXTHOP_GRP_TYPE_MPATH
;
1361 if (tb
[NHA_GROUP_TYPE
])
1362 cfg
->nh_grp_type
= nla_get_u16(tb
[NHA_GROUP_TYPE
]);
1364 if (cfg
->nh_grp_type
> NEXTHOP_GRP_TYPE_MAX
) {
1365 NL_SET_ERR_MSG(extack
, "Invalid group type");
1368 err
= nh_check_attr_group(net
, tb
, extack
);
1370 /* no other attributes should be set */
1374 if (tb
[NHA_BLACKHOLE
]) {
1375 if (tb
[NHA_GATEWAY
] || tb
[NHA_OIF
] ||
1376 tb
[NHA_ENCAP
] || tb
[NHA_ENCAP_TYPE
]) {
1377 NL_SET_ERR_MSG(extack
, "Blackhole attribute can not be used with gateway or oif");
1381 cfg
->nh_blackhole
= 1;
1387 NL_SET_ERR_MSG(extack
, "Device attribute required for non-blackhole nexthops");
1391 cfg
->nh_ifindex
= nla_get_u32(tb
[NHA_OIF
]);
1392 if (cfg
->nh_ifindex
)
1393 cfg
->dev
= __dev_get_by_index(net
, cfg
->nh_ifindex
);
1396 NL_SET_ERR_MSG(extack
, "Invalid device index");
1398 } else if (!(cfg
->dev
->flags
& IFF_UP
)) {
1399 NL_SET_ERR_MSG(extack
, "Nexthop device is not up");
1402 } else if (!netif_carrier_ok(cfg
->dev
)) {
1403 NL_SET_ERR_MSG(extack
, "Carrier for nexthop device is down");
1409 if (tb
[NHA_GATEWAY
]) {
1410 struct nlattr
*gwa
= tb
[NHA_GATEWAY
];
1412 switch (cfg
->nh_family
) {
1414 if (nla_len(gwa
) != sizeof(u32
)) {
1415 NL_SET_ERR_MSG(extack
, "Invalid gateway");
1418 cfg
->gw
.ipv4
= nla_get_be32(gwa
);
1421 if (nla_len(gwa
) != sizeof(struct in6_addr
)) {
1422 NL_SET_ERR_MSG(extack
, "Invalid gateway");
1425 cfg
->gw
.ipv6
= nla_get_in6_addr(gwa
);
1428 NL_SET_ERR_MSG(extack
,
1429 "Unknown address family for gateway");
1433 /* device only nexthop (no gateway) */
1434 if (cfg
->nh_flags
& RTNH_F_ONLINK
) {
1435 NL_SET_ERR_MSG(extack
,
1436 "ONLINK flag can not be set for nexthop without a gateway");
1441 if (tb
[NHA_ENCAP
]) {
1442 cfg
->nh_encap
= tb
[NHA_ENCAP
];
1444 if (!tb
[NHA_ENCAP_TYPE
]) {
1445 NL_SET_ERR_MSG(extack
, "LWT encapsulation type is missing");
1449 cfg
->nh_encap_type
= nla_get_u16(tb
[NHA_ENCAP_TYPE
]);
1450 err
= lwtunnel_valid_encap_type(cfg
->nh_encap_type
, extack
);
1454 } else if (tb
[NHA_ENCAP_TYPE
]) {
1455 NL_SET_ERR_MSG(extack
, "LWT encapsulation attribute is missing");
1466 static int rtm_new_nexthop(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1467 struct netlink_ext_ack
*extack
)
1469 struct net
*net
= sock_net(skb
->sk
);
1470 struct nh_config cfg
;
1474 err
= rtm_to_nh_config(net
, skb
, nlh
, &cfg
, extack
);
1476 nh
= nexthop_add(net
, &cfg
, extack
);
1484 static int nh_valid_get_del_req(struct nlmsghdr
*nlh
, u32
*id
,
1485 struct netlink_ext_ack
*extack
)
1487 struct nhmsg
*nhm
= nlmsg_data(nlh
);
1488 struct nlattr
*tb
[NHA_MAX
+ 1];
1491 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
1497 for (i
= 0; i
< __NHA_MAX
; ++i
) {
1505 NL_SET_ERR_MSG_ATTR(extack
, tb
[i
],
1506 "Unexpected attribute in request");
1510 if (nhm
->nh_protocol
|| nhm
->resvd
|| nhm
->nh_scope
|| nhm
->nh_flags
) {
1511 NL_SET_ERR_MSG(extack
, "Invalid values in header");
1516 NL_SET_ERR_MSG(extack
, "Nexthop id is missing");
1520 *id
= nla_get_u32(tb
[NHA_ID
]);
1522 NL_SET_ERR_MSG(extack
, "Invalid nexthop id");
1530 static int rtm_del_nexthop(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
1531 struct netlink_ext_ack
*extack
)
1533 struct net
*net
= sock_net(skb
->sk
);
1534 struct nl_info nlinfo
= {
1537 .portid
= NETLINK_CB(skb
).portid
,
1543 err
= nh_valid_get_del_req(nlh
, &id
, extack
);
1547 nh
= nexthop_find_by_id(net
, id
);
1551 remove_nexthop(net
, nh
, &nlinfo
);
1557 static int rtm_get_nexthop(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
,
1558 struct netlink_ext_ack
*extack
)
1560 struct net
*net
= sock_net(in_skb
->sk
);
1561 struct sk_buff
*skb
= NULL
;
1566 err
= nh_valid_get_del_req(nlh
, &id
, extack
);
1571 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
1576 nh
= nexthop_find_by_id(net
, id
);
1580 err
= nh_fill_node(skb
, nh
, RTM_NEWNEXTHOP
, NETLINK_CB(in_skb
).portid
,
1583 WARN_ON(err
== -EMSGSIZE
);
1587 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
1595 static bool nh_dump_filtered(struct nexthop
*nh
, int dev_idx
, int master_idx
,
1596 bool group_filter
, u8 family
)
1598 const struct net_device
*dev
;
1599 const struct nh_info
*nhi
;
1601 if (group_filter
&& !nh
->is_group
)
1604 if (!dev_idx
&& !master_idx
&& !family
)
1610 nhi
= rtnl_dereference(nh
->nh_info
);
1611 if (family
&& nhi
->family
!= family
)
1614 dev
= nhi
->fib_nhc
.nhc_dev
;
1615 if (dev_idx
&& (!dev
|| dev
->ifindex
!= dev_idx
))
1619 struct net_device
*master
;
1624 master
= netdev_master_upper_dev_get((struct net_device
*)dev
);
1625 if (!master
|| master
->ifindex
!= master_idx
)
1632 static int nh_valid_dump_req(const struct nlmsghdr
*nlh
, int *dev_idx
,
1633 int *master_idx
, bool *group_filter
,
1634 struct netlink_callback
*cb
)
1636 struct netlink_ext_ack
*extack
= cb
->extack
;
1637 struct nlattr
*tb
[NHA_MAX
+ 1];
1642 err
= nlmsg_parse(nlh
, sizeof(*nhm
), tb
, NHA_MAX
, rtm_nh_policy
,
1647 for (i
= 0; i
<= NHA_MAX
; ++i
) {
1653 idx
= nla_get_u32(tb
[i
]);
1654 if (idx
> INT_MAX
) {
1655 NL_SET_ERR_MSG(extack
, "Invalid device index");
1661 idx
= nla_get_u32(tb
[i
]);
1662 if (idx
> INT_MAX
) {
1663 NL_SET_ERR_MSG(extack
, "Invalid master device index");
1669 *group_filter
= true;
1672 NL_SET_ERR_MSG(extack
, "Unsupported attribute in dump request");
1677 nhm
= nlmsg_data(nlh
);
1678 if (nhm
->nh_protocol
|| nhm
->resvd
|| nhm
->nh_scope
|| nhm
->nh_flags
) {
1679 NL_SET_ERR_MSG(extack
, "Invalid values in header for nexthop dump request");
1687 static int rtm_dump_nexthop(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1689 struct nhmsg
*nhm
= nlmsg_data(cb
->nlh
);
1690 int dev_filter_idx
= 0, master_idx
= 0;
1691 struct net
*net
= sock_net(skb
->sk
);
1692 struct rb_root
*root
= &net
->nexthop
.rb_root
;
1693 bool group_filter
= false;
1694 struct rb_node
*node
;
1698 err
= nh_valid_dump_req(cb
->nlh
, &dev_filter_idx
, &master_idx
,
1703 s_idx
= cb
->args
[0];
1704 for (node
= rb_first(root
); node
; node
= rb_next(node
)) {
1710 nh
= rb_entry(node
, struct nexthop
, rb_node
);
1711 if (nh_dump_filtered(nh
, dev_filter_idx
, master_idx
,
1712 group_filter
, nhm
->nh_family
))
1715 err
= nh_fill_node(skb
, nh
, RTM_NEWNEXTHOP
,
1716 NETLINK_CB(cb
->skb
).portid
,
1717 cb
->nlh
->nlmsg_seq
, NLM_F_MULTI
);
1719 if (likely(skb
->len
))
1732 cb
->seq
= net
->nexthop
.seq
;
1733 nl_dump_check_consistent(cb
, nlmsg_hdr(skb
));
1738 static void nexthop_sync_mtu(struct net_device
*dev
, u32 orig_mtu
)
1740 unsigned int hash
= nh_dev_hashfn(dev
->ifindex
);
1741 struct net
*net
= dev_net(dev
);
1742 struct hlist_head
*head
= &net
->nexthop
.devhash
[hash
];
1743 struct hlist_node
*n
;
1744 struct nh_info
*nhi
;
1746 hlist_for_each_entry_safe(nhi
, n
, head
, dev_hash
) {
1747 if (nhi
->fib_nhc
.nhc_dev
== dev
) {
1748 if (nhi
->family
== AF_INET
)
1749 fib_nhc_update_mtu(&nhi
->fib_nhc
, dev
->mtu
,
1756 static int nh_netdev_event(struct notifier_block
*this,
1757 unsigned long event
, void *ptr
)
1759 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
1760 struct netdev_notifier_info_ext
*info_ext
;
1764 case NETDEV_UNREGISTER
:
1765 nexthop_flush_dev(dev
);
1768 if (!(dev_get_flags(dev
) & (IFF_RUNNING
| IFF_LOWER_UP
)))
1769 nexthop_flush_dev(dev
);
1771 case NETDEV_CHANGEMTU
:
1773 nexthop_sync_mtu(dev
, info_ext
->ext
.mtu
);
1774 rt_cache_flush(dev_net(dev
));
1780 static struct notifier_block nh_netdev_notifier
= {
1781 .notifier_call
= nh_netdev_event
,
1784 static void __net_exit
nexthop_net_exit(struct net
*net
)
1787 flush_all_nexthops(net
);
1789 kfree(net
->nexthop
.devhash
);
1792 static int __net_init
nexthop_net_init(struct net
*net
)
1794 size_t sz
= sizeof(struct hlist_head
) * NH_DEV_HASHSIZE
;
1796 net
->nexthop
.rb_root
= RB_ROOT
;
1797 net
->nexthop
.devhash
= kzalloc(sz
, GFP_KERNEL
);
1798 if (!net
->nexthop
.devhash
)
1804 static struct pernet_operations nexthop_net_ops
= {
1805 .init
= nexthop_net_init
,
1806 .exit
= nexthop_net_exit
,
1809 static int __init
nexthop_init(void)
1811 register_pernet_subsys(&nexthop_net_ops
);
1813 register_netdevice_notifier(&nh_netdev_notifier
);
1815 rtnl_register(PF_UNSPEC
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
1816 rtnl_register(PF_UNSPEC
, RTM_DELNEXTHOP
, rtm_del_nexthop
, NULL
, 0);
1817 rtnl_register(PF_UNSPEC
, RTM_GETNEXTHOP
, rtm_get_nexthop
,
1818 rtm_dump_nexthop
, 0);
1820 rtnl_register(PF_INET
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
1821 rtnl_register(PF_INET
, RTM_GETNEXTHOP
, NULL
, rtm_dump_nexthop
, 0);
1823 rtnl_register(PF_INET6
, RTM_NEWNEXTHOP
, rtm_new_nexthop
, NULL
, 0);
1824 rtnl_register(PF_INET6
, RTM_GETNEXTHOP
, NULL
, rtm_dump_nexthop
, 0);
1828 subsys_initcall(nexthop_init
);