2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #define pr_fmt(fmt) "IPv6: " fmt
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
55 #include <linux/rtnetlink.h>
57 #include <net/dst_metadata.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
67 #include <linux/uaccess.h>
70 #include <linux/sysctl.h>
74 RT6_NUD_FAIL_HARD
= -3,
75 RT6_NUD_FAIL_PROBE
= -2,
76 RT6_NUD_FAIL_DO_RR
= -1,
80 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
);
81 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
82 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
);
83 static unsigned int ip6_mtu(const struct dst_entry
*dst
);
84 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
85 static void ip6_dst_destroy(struct dst_entry
*);
86 static void ip6_dst_ifdown(struct dst_entry
*,
87 struct net_device
*dev
, int how
);
88 static int ip6_dst_gc(struct dst_ops
*ops
);
90 static int ip6_pkt_discard(struct sk_buff
*skb
);
91 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
92 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
93 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
);
94 static void ip6_link_failure(struct sk_buff
*skb
);
95 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
96 struct sk_buff
*skb
, u32 mtu
);
97 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
,
99 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
);
100 static int rt6_score_route(struct rt6_info
*rt
, int oif
, int strict
);
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
104 const struct in6_addr
*prefix
, int prefixlen
,
105 const struct in6_addr
*gwaddr
,
106 struct net_device
*dev
,
108 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
109 const struct in6_addr
*prefix
, int prefixlen
,
110 const struct in6_addr
*gwaddr
,
111 struct net_device
*dev
);
114 struct uncached_list
{
116 struct list_head head
;
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list
, rt6_uncached_list
);
121 static void rt6_uncached_list_add(struct rt6_info
*rt
)
123 struct uncached_list
*ul
= raw_cpu_ptr(&rt6_uncached_list
);
125 rt
->dst
.flags
|= DST_NOCACHE
;
126 rt
->rt6i_uncached_list
= ul
;
128 spin_lock_bh(&ul
->lock
);
129 list_add_tail(&rt
->rt6i_uncached
, &ul
->head
);
130 spin_unlock_bh(&ul
->lock
);
133 static void rt6_uncached_list_del(struct rt6_info
*rt
)
135 if (!list_empty(&rt
->rt6i_uncached
)) {
136 struct uncached_list
*ul
= rt
->rt6i_uncached_list
;
138 spin_lock_bh(&ul
->lock
);
139 list_del(&rt
->rt6i_uncached
);
140 spin_unlock_bh(&ul
->lock
);
144 static void rt6_uncached_list_flush_dev(struct net
*net
, struct net_device
*dev
)
146 struct net_device
*loopback_dev
= net
->loopback_dev
;
149 if (dev
== loopback_dev
)
152 for_each_possible_cpu(cpu
) {
153 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
156 spin_lock_bh(&ul
->lock
);
157 list_for_each_entry(rt
, &ul
->head
, rt6i_uncached
) {
158 struct inet6_dev
*rt_idev
= rt
->rt6i_idev
;
159 struct net_device
*rt_dev
= rt
->dst
.dev
;
161 if (rt_idev
->dev
== dev
) {
162 rt
->rt6i_idev
= in6_dev_get(loopback_dev
);
163 in6_dev_put(rt_idev
);
167 rt
->dst
.dev
= loopback_dev
;
168 dev_hold(rt
->dst
.dev
);
172 spin_unlock_bh(&ul
->lock
);
176 static u32
*rt6_pcpu_cow_metrics(struct rt6_info
*rt
)
178 return dst_metrics_write_ptr(rt
->dst
.from
);
181 static u32
*ipv6_cow_metrics(struct dst_entry
*dst
, unsigned long old
)
183 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
185 if (rt
->rt6i_flags
& RTF_PCPU
)
186 return rt6_pcpu_cow_metrics(rt
);
187 else if (rt
->rt6i_flags
& RTF_CACHE
)
190 return dst_cow_metrics_generic(dst
, old
);
193 static inline const void *choose_neigh_daddr(struct rt6_info
*rt
,
197 struct in6_addr
*p
= &rt
->rt6i_gateway
;
199 if (!ipv6_addr_any(p
))
200 return (const void *) p
;
202 return &ipv6_hdr(skb
)->daddr
;
206 static struct neighbour
*ip6_neigh_lookup(const struct dst_entry
*dst
,
210 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
213 daddr
= choose_neigh_daddr(rt
, skb
, daddr
);
214 n
= __ipv6_neigh_lookup(dst
->dev
, daddr
);
217 return neigh_create(&nd_tbl
, daddr
, dst
->dev
);
220 static void ip6_confirm_neigh(const struct dst_entry
*dst
, const void *daddr
)
222 struct net_device
*dev
= dst
->dev
;
223 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
225 daddr
= choose_neigh_daddr(rt
, NULL
, daddr
);
228 if (dev
->flags
& (IFF_NOARP
| IFF_LOOPBACK
))
230 if (ipv6_addr_is_multicast((const struct in6_addr
*)daddr
))
232 __ipv6_confirm_neigh(dev
, daddr
);
235 static struct dst_ops ip6_dst_ops_template
= {
239 .check
= ip6_dst_check
,
240 .default_advmss
= ip6_default_advmss
,
242 .cow_metrics
= ipv6_cow_metrics
,
243 .destroy
= ip6_dst_destroy
,
244 .ifdown
= ip6_dst_ifdown
,
245 .negative_advice
= ip6_negative_advice
,
246 .link_failure
= ip6_link_failure
,
247 .update_pmtu
= ip6_rt_update_pmtu
,
248 .redirect
= rt6_do_redirect
,
249 .local_out
= __ip6_local_out
,
250 .neigh_lookup
= ip6_neigh_lookup
,
251 .confirm_neigh
= ip6_confirm_neigh
,
254 static unsigned int ip6_blackhole_mtu(const struct dst_entry
*dst
)
256 unsigned int mtu
= dst_metric_raw(dst
, RTAX_MTU
);
258 return mtu
? : dst
->dev
->mtu
;
261 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
262 struct sk_buff
*skb
, u32 mtu
)
266 static void ip6_rt_blackhole_redirect(struct dst_entry
*dst
, struct sock
*sk
,
271 static struct dst_ops ip6_dst_blackhole_ops
= {
273 .destroy
= ip6_dst_destroy
,
274 .check
= ip6_dst_check
,
275 .mtu
= ip6_blackhole_mtu
,
276 .default_advmss
= ip6_default_advmss
,
277 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
278 .redirect
= ip6_rt_blackhole_redirect
,
279 .cow_metrics
= dst_cow_metrics_generic
,
280 .neigh_lookup
= ip6_neigh_lookup
,
283 static const u32 ip6_template_metrics
[RTAX_MAX
] = {
284 [RTAX_HOPLIMIT
- 1] = 0,
287 static const struct rt6_info ip6_null_entry_template
= {
289 .__refcnt
= ATOMIC_INIT(1),
291 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
292 .error
= -ENETUNREACH
,
293 .input
= ip6_pkt_discard
,
294 .output
= ip6_pkt_discard_out
,
296 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
297 .rt6i_protocol
= RTPROT_KERNEL
,
298 .rt6i_metric
= ~(u32
) 0,
299 .rt6i_ref
= ATOMIC_INIT(1),
302 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
304 static const struct rt6_info ip6_prohibit_entry_template
= {
306 .__refcnt
= ATOMIC_INIT(1),
308 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
310 .input
= ip6_pkt_prohibit
,
311 .output
= ip6_pkt_prohibit_out
,
313 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
314 .rt6i_protocol
= RTPROT_KERNEL
,
315 .rt6i_metric
= ~(u32
) 0,
316 .rt6i_ref
= ATOMIC_INIT(1),
319 static const struct rt6_info ip6_blk_hole_entry_template
= {
321 .__refcnt
= ATOMIC_INIT(1),
323 .obsolete
= DST_OBSOLETE_FORCE_CHK
,
325 .input
= dst_discard
,
326 .output
= dst_discard_out
,
328 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
329 .rt6i_protocol
= RTPROT_KERNEL
,
330 .rt6i_metric
= ~(u32
) 0,
331 .rt6i_ref
= ATOMIC_INIT(1),
336 static void rt6_info_init(struct rt6_info
*rt
)
338 struct dst_entry
*dst
= &rt
->dst
;
340 memset(dst
+ 1, 0, sizeof(*rt
) - sizeof(*dst
));
341 INIT_LIST_HEAD(&rt
->rt6i_siblings
);
342 INIT_LIST_HEAD(&rt
->rt6i_uncached
);
345 /* allocate dst with ip6_dst_ops */
346 static struct rt6_info
*__ip6_dst_alloc(struct net
*net
,
347 struct net_device
*dev
,
350 struct rt6_info
*rt
= dst_alloc(&net
->ipv6
.ip6_dst_ops
, dev
,
351 0, DST_OBSOLETE_FORCE_CHK
, flags
);
359 struct rt6_info
*ip6_dst_alloc(struct net
*net
,
360 struct net_device
*dev
,
363 struct rt6_info
*rt
= __ip6_dst_alloc(net
, dev
, flags
);
366 rt
->rt6i_pcpu
= alloc_percpu_gfp(struct rt6_info
*, GFP_ATOMIC
);
370 for_each_possible_cpu(cpu
) {
373 p
= per_cpu_ptr(rt
->rt6i_pcpu
, cpu
);
374 /* no one shares rt */
378 dst_destroy((struct dst_entry
*)rt
);
385 EXPORT_SYMBOL(ip6_dst_alloc
);
387 static void ip6_dst_destroy(struct dst_entry
*dst
)
389 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
390 struct dst_entry
*from
= dst
->from
;
391 struct inet6_dev
*idev
;
393 dst_destroy_metrics_generic(dst
);
394 free_percpu(rt
->rt6i_pcpu
);
395 rt6_uncached_list_del(rt
);
397 idev
= rt
->rt6i_idev
;
399 rt
->rt6i_idev
= NULL
;
407 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
410 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
411 struct inet6_dev
*idev
= rt
->rt6i_idev
;
412 struct net_device
*loopback_dev
=
413 dev_net(dev
)->loopback_dev
;
415 if (dev
!= loopback_dev
) {
416 if (idev
&& idev
->dev
== dev
) {
417 struct inet6_dev
*loopback_idev
=
418 in6_dev_get(loopback_dev
);
420 rt
->rt6i_idev
= loopback_idev
;
427 static bool __rt6_check_expired(const struct rt6_info
*rt
)
429 if (rt
->rt6i_flags
& RTF_EXPIRES
)
430 return time_after(jiffies
, rt
->dst
.expires
);
435 static bool rt6_check_expired(const struct rt6_info
*rt
)
437 if (rt
->rt6i_flags
& RTF_EXPIRES
) {
438 if (time_after(jiffies
, rt
->dst
.expires
))
440 } else if (rt
->dst
.from
) {
441 return rt6_check_expired((struct rt6_info
*) rt
->dst
.from
);
446 /* Multipath route selection:
447 * Hash based function using packet header and flowlabel.
448 * Adapted from fib_info_hashfn()
450 static int rt6_info_hash_nhsfn(unsigned int candidate_count
,
451 const struct flowi6
*fl6
)
453 return get_hash_from_flowi6(fl6
) % candidate_count
;
456 static struct rt6_info
*rt6_multipath_select(struct rt6_info
*match
,
457 struct flowi6
*fl6
, int oif
,
460 struct rt6_info
*sibling
, *next_sibling
;
463 route_choosen
= rt6_info_hash_nhsfn(match
->rt6i_nsiblings
+ 1, fl6
);
464 /* Don't change the route, if route_choosen == 0
465 * (siblings does not include ourself)
468 list_for_each_entry_safe(sibling
, next_sibling
,
469 &match
->rt6i_siblings
, rt6i_siblings
) {
471 if (route_choosen
== 0) {
472 if (rt6_score_route(sibling
, oif
, strict
) < 0)
482 * Route lookup. Any table->tb6_lock is implied.
485 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
487 const struct in6_addr
*saddr
,
491 struct rt6_info
*local
= NULL
;
492 struct rt6_info
*sprt
;
494 if (!oif
&& ipv6_addr_any(saddr
))
497 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
498 struct net_device
*dev
= sprt
->dst
.dev
;
501 if (dev
->ifindex
== oif
)
503 if (dev
->flags
& IFF_LOOPBACK
) {
504 if (!sprt
->rt6i_idev
||
505 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
506 if (flags
& RT6_LOOKUP_F_IFACE
)
509 local
->rt6i_idev
->dev
->ifindex
== oif
)
515 if (ipv6_chk_addr(net
, saddr
, dev
,
516 flags
& RT6_LOOKUP_F_IFACE
))
525 if (flags
& RT6_LOOKUP_F_IFACE
)
526 return net
->ipv6
.ip6_null_entry
;
532 #ifdef CONFIG_IPV6_ROUTER_PREF
533 struct __rt6_probe_work
{
534 struct work_struct work
;
535 struct in6_addr target
;
536 struct net_device
*dev
;
539 static void rt6_probe_deferred(struct work_struct
*w
)
541 struct in6_addr mcaddr
;
542 struct __rt6_probe_work
*work
=
543 container_of(w
, struct __rt6_probe_work
, work
);
545 addrconf_addr_solict_mult(&work
->target
, &mcaddr
);
546 ndisc_send_ns(work
->dev
, &work
->target
, &mcaddr
, NULL
, 0);
551 static void rt6_probe(struct rt6_info
*rt
)
553 struct __rt6_probe_work
*work
;
554 struct neighbour
*neigh
;
556 * Okay, this does not seem to be appropriate
557 * for now, however, we need to check if it
558 * is really so; aka Router Reachability Probing.
560 * Router Reachability Probe MUST be rate-limited
561 * to no more than one per minute.
563 if (!rt
|| !(rt
->rt6i_flags
& RTF_GATEWAY
))
566 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
568 if (neigh
->nud_state
& NUD_VALID
)
572 write_lock(&neigh
->lock
);
573 if (!(neigh
->nud_state
& NUD_VALID
) &&
576 rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
577 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
579 __neigh_set_probe_once(neigh
);
581 write_unlock(&neigh
->lock
);
583 work
= kmalloc(sizeof(*work
), GFP_ATOMIC
);
587 INIT_WORK(&work
->work
, rt6_probe_deferred
);
588 work
->target
= rt
->rt6i_gateway
;
589 dev_hold(rt
->dst
.dev
);
590 work
->dev
= rt
->dst
.dev
;
591 schedule_work(&work
->work
);
595 rcu_read_unlock_bh();
598 static inline void rt6_probe(struct rt6_info
*rt
)
604 * Default Router Selection (RFC 2461 6.3.6)
606 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
608 struct net_device
*dev
= rt
->dst
.dev
;
609 if (!oif
|| dev
->ifindex
== oif
)
611 if ((dev
->flags
& IFF_LOOPBACK
) &&
612 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
617 static inline enum rt6_nud_state
rt6_check_neigh(struct rt6_info
*rt
)
619 struct neighbour
*neigh
;
620 enum rt6_nud_state ret
= RT6_NUD_FAIL_HARD
;
622 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
623 !(rt
->rt6i_flags
& RTF_GATEWAY
))
624 return RT6_NUD_SUCCEED
;
627 neigh
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
, &rt
->rt6i_gateway
);
629 read_lock(&neigh
->lock
);
630 if (neigh
->nud_state
& NUD_VALID
)
631 ret
= RT6_NUD_SUCCEED
;
632 #ifdef CONFIG_IPV6_ROUTER_PREF
633 else if (!(neigh
->nud_state
& NUD_FAILED
))
634 ret
= RT6_NUD_SUCCEED
;
636 ret
= RT6_NUD_FAIL_PROBE
;
638 read_unlock(&neigh
->lock
);
640 ret
= IS_ENABLED(CONFIG_IPV6_ROUTER_PREF
) ?
641 RT6_NUD_SUCCEED
: RT6_NUD_FAIL_DO_RR
;
643 rcu_read_unlock_bh();
648 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
653 m
= rt6_check_dev(rt
, oif
);
654 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
655 return RT6_NUD_FAIL_HARD
;
656 #ifdef CONFIG_IPV6_ROUTER_PREF
657 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
659 if (strict
& RT6_LOOKUP_F_REACHABLE
) {
660 int n
= rt6_check_neigh(rt
);
667 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
668 int *mpri
, struct rt6_info
*match
,
672 bool match_do_rr
= false;
673 struct inet6_dev
*idev
= rt
->rt6i_idev
;
674 struct net_device
*dev
= rt
->dst
.dev
;
676 if (dev
&& !netif_carrier_ok(dev
) &&
677 idev
->cnf
.ignore_routes_with_linkdown
&&
678 !(strict
& RT6_LOOKUP_F_IGNORE_LINKSTATE
))
681 if (rt6_check_expired(rt
))
684 m
= rt6_score_route(rt
, oif
, strict
);
685 if (m
== RT6_NUD_FAIL_DO_RR
) {
687 m
= 0; /* lowest valid score */
688 } else if (m
== RT6_NUD_FAIL_HARD
) {
692 if (strict
& RT6_LOOKUP_F_REACHABLE
)
695 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
697 *do_rr
= match_do_rr
;
705 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
706 struct rt6_info
*rr_head
,
707 u32 metric
, int oif
, int strict
,
710 struct rt6_info
*rt
, *match
, *cont
;
715 for (rt
= rr_head
; rt
; rt
= rt
->dst
.rt6_next
) {
716 if (rt
->rt6i_metric
!= metric
) {
721 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
724 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
; rt
= rt
->dst
.rt6_next
) {
725 if (rt
->rt6i_metric
!= metric
) {
730 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
736 for (rt
= cont
; rt
; rt
= rt
->dst
.rt6_next
)
737 match
= find_match(rt
, oif
, strict
, &mpri
, match
, do_rr
);
742 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
744 struct rt6_info
*match
, *rt0
;
750 fn
->rr_ptr
= rt0
= fn
->leaf
;
752 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
,
756 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
758 /* no entries matched; do round-robin */
759 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
766 net
= dev_net(rt0
->dst
.dev
);
767 return match
? match
: net
->ipv6
.ip6_null_entry
;
770 static bool rt6_is_gw_or_nonexthop(const struct rt6_info
*rt
)
772 return (rt
->rt6i_flags
& (RTF_NONEXTHOP
| RTF_GATEWAY
));
775 #ifdef CONFIG_IPV6_ROUTE_INFO
776 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
777 const struct in6_addr
*gwaddr
)
779 struct net
*net
= dev_net(dev
);
780 struct route_info
*rinfo
= (struct route_info
*) opt
;
781 struct in6_addr prefix_buf
, *prefix
;
783 unsigned long lifetime
;
786 if (len
< sizeof(struct route_info
)) {
790 /* Sanity check for prefix_len and length */
791 if (rinfo
->length
> 3) {
793 } else if (rinfo
->prefix_len
> 128) {
795 } else if (rinfo
->prefix_len
> 64) {
796 if (rinfo
->length
< 2) {
799 } else if (rinfo
->prefix_len
> 0) {
800 if (rinfo
->length
< 1) {
805 pref
= rinfo
->route_pref
;
806 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
809 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
811 if (rinfo
->length
== 3)
812 prefix
= (struct in6_addr
*)rinfo
->prefix
;
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf
,
816 (struct in6_addr
*)rinfo
->prefix
,
818 prefix
= &prefix_buf
;
821 if (rinfo
->prefix_len
== 0)
822 rt
= rt6_get_dflt_router(gwaddr
, dev
);
824 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
,
827 if (rt
&& !lifetime
) {
833 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
836 rt
->rt6i_flags
= RTF_ROUTEINFO
|
837 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
840 if (!addrconf_finite_timeout(lifetime
))
841 rt6_clean_expires(rt
);
843 rt6_set_expires(rt
, jiffies
+ HZ
* lifetime
);
851 static struct fib6_node
* fib6_backtrack(struct fib6_node
*fn
,
852 struct in6_addr
*saddr
)
854 struct fib6_node
*pn
;
856 if (fn
->fn_flags
& RTN_TL_ROOT
)
859 if (FIB6_SUBTREE(pn
) && FIB6_SUBTREE(pn
) != fn
)
860 fn
= fib6_lookup(FIB6_SUBTREE(pn
), NULL
, saddr
);
863 if (fn
->fn_flags
& RTN_RTINFO
)
868 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
869 struct fib6_table
*table
,
870 struct flowi6
*fl6
, int flags
)
872 struct fib6_node
*fn
;
875 read_lock_bh(&table
->tb6_lock
);
876 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
879 rt
= rt6_device_match(net
, rt
, &fl6
->saddr
, fl6
->flowi6_oif
, flags
);
880 if (rt
->rt6i_nsiblings
&& fl6
->flowi6_oif
== 0)
881 rt
= rt6_multipath_select(rt
, fl6
, fl6
->flowi6_oif
, flags
);
882 if (rt
== net
->ipv6
.ip6_null_entry
) {
883 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
887 dst_use(&rt
->dst
, jiffies
);
888 read_unlock_bh(&table
->tb6_lock
);
890 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
896 struct dst_entry
*ip6_route_lookup(struct net
*net
, struct flowi6
*fl6
,
899 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_lookup
);
901 EXPORT_SYMBOL_GPL(ip6_route_lookup
);
903 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
904 const struct in6_addr
*saddr
, int oif
, int strict
)
906 struct flowi6 fl6
= {
910 struct dst_entry
*dst
;
911 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
914 memcpy(&fl6
.saddr
, saddr
, sizeof(*saddr
));
915 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
918 dst
= fib6_rule_lookup(net
, &fl6
, flags
, ip6_pol_route_lookup
);
920 return (struct rt6_info
*) dst
;
926 EXPORT_SYMBOL(rt6_lookup
);
928 /* ip6_ins_rt is called with FREE table->tb6_lock.
929 It takes new route entry, the addition fails by any reason the
930 route is freed. In any case, if caller does not hold it, it may
934 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
,
935 struct mx6_config
*mxc
)
938 struct fib6_table
*table
;
940 table
= rt
->rt6i_table
;
941 write_lock_bh(&table
->tb6_lock
);
942 err
= fib6_add(&table
->tb6_root
, rt
, info
, mxc
);
943 write_unlock_bh(&table
->tb6_lock
);
948 int ip6_ins_rt(struct rt6_info
*rt
)
950 struct nl_info info
= { .nl_net
= dev_net(rt
->dst
.dev
), };
951 struct mx6_config mxc
= { .mx
= NULL
, };
953 return __ip6_ins_rt(rt
, &info
, &mxc
);
956 static struct rt6_info
*ip6_rt_cache_alloc(struct rt6_info
*ort
,
957 const struct in6_addr
*daddr
,
958 const struct in6_addr
*saddr
)
966 if (ort
->rt6i_flags
& (RTF_CACHE
| RTF_PCPU
))
967 ort
= (struct rt6_info
*)ort
->dst
.from
;
969 rt
= __ip6_dst_alloc(dev_net(ort
->dst
.dev
), ort
->dst
.dev
, 0);
974 ip6_rt_copy_init(rt
, ort
);
975 rt
->rt6i_flags
|= RTF_CACHE
;
977 rt
->dst
.flags
|= DST_HOST
;
978 rt
->rt6i_dst
.addr
= *daddr
;
979 rt
->rt6i_dst
.plen
= 128;
981 if (!rt6_is_gw_or_nonexthop(ort
)) {
982 if (ort
->rt6i_dst
.plen
!= 128 &&
983 ipv6_addr_equal(&ort
->rt6i_dst
.addr
, daddr
))
984 rt
->rt6i_flags
|= RTF_ANYCAST
;
985 #ifdef CONFIG_IPV6_SUBTREES
986 if (rt
->rt6i_src
.plen
&& saddr
) {
987 rt
->rt6i_src
.addr
= *saddr
;
988 rt
->rt6i_src
.plen
= 128;
996 static struct rt6_info
*ip6_rt_pcpu_alloc(struct rt6_info
*rt
)
998 struct rt6_info
*pcpu_rt
;
1000 pcpu_rt
= __ip6_dst_alloc(dev_net(rt
->dst
.dev
),
1001 rt
->dst
.dev
, rt
->dst
.flags
);
1005 ip6_rt_copy_init(pcpu_rt
, rt
);
1006 pcpu_rt
->rt6i_protocol
= rt
->rt6i_protocol
;
1007 pcpu_rt
->rt6i_flags
|= RTF_PCPU
;
1011 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1012 static struct rt6_info
*rt6_get_pcpu_route(struct rt6_info
*rt
)
1014 struct rt6_info
*pcpu_rt
, **p
;
1016 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1020 dst_hold(&pcpu_rt
->dst
);
1021 rt6_dst_from_metrics_check(pcpu_rt
);
1026 static struct rt6_info
*rt6_make_pcpu_route(struct rt6_info
*rt
)
1028 struct fib6_table
*table
= rt
->rt6i_table
;
1029 struct rt6_info
*pcpu_rt
, *prev
, **p
;
1031 pcpu_rt
= ip6_rt_pcpu_alloc(rt
);
1033 struct net
*net
= dev_net(rt
->dst
.dev
);
1035 dst_hold(&net
->ipv6
.ip6_null_entry
->dst
);
1036 return net
->ipv6
.ip6_null_entry
;
1039 read_lock_bh(&table
->tb6_lock
);
1040 if (rt
->rt6i_pcpu
) {
1041 p
= this_cpu_ptr(rt
->rt6i_pcpu
);
1042 prev
= cmpxchg(p
, NULL
, pcpu_rt
);
1044 /* If someone did it before us, return prev instead */
1045 dst_destroy(&pcpu_rt
->dst
);
1049 /* rt has been removed from the fib6 tree
1050 * before we have a chance to acquire the read_lock.
1051 * In this case, don't brother to create a pcpu rt
1052 * since rt is going away anyway. The next
1053 * dst_check() will trigger a re-lookup.
1055 dst_destroy(&pcpu_rt
->dst
);
1058 dst_hold(&pcpu_rt
->dst
);
1059 rt6_dst_from_metrics_check(pcpu_rt
);
1060 read_unlock_bh(&table
->tb6_lock
);
1064 struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
,
1065 int oif
, struct flowi6
*fl6
, int flags
)
1067 struct fib6_node
*fn
, *saved_fn
;
1068 struct rt6_info
*rt
;
1071 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
1072 strict
|= flags
& RT6_LOOKUP_F_IGNORE_LINKSTATE
;
1073 if (net
->ipv6
.devconf_all
->forwarding
== 0)
1074 strict
|= RT6_LOOKUP_F_REACHABLE
;
1076 read_lock_bh(&table
->tb6_lock
);
1078 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1081 if (fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
)
1085 rt
= rt6_select(fn
, oif
, strict
);
1086 if (rt
->rt6i_nsiblings
)
1087 rt
= rt6_multipath_select(rt
, fl6
, oif
, strict
);
1088 if (rt
== net
->ipv6
.ip6_null_entry
) {
1089 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1091 goto redo_rt6_select
;
1092 else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
1093 /* also consider unreachable route */
1094 strict
&= ~RT6_LOOKUP_F_REACHABLE
;
1096 goto redo_rt6_select
;
1101 if (rt
== net
->ipv6
.ip6_null_entry
|| (rt
->rt6i_flags
& RTF_CACHE
)) {
1102 dst_use(&rt
->dst
, jiffies
);
1103 read_unlock_bh(&table
->tb6_lock
);
1105 rt6_dst_from_metrics_check(rt
);
1107 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1109 } else if (unlikely((fl6
->flowi6_flags
& FLOWI_FLAG_KNOWN_NH
) &&
1110 !(rt
->rt6i_flags
& RTF_GATEWAY
))) {
1111 /* Create a RTF_CACHE clone which will not be
1112 * owned by the fib6 tree. It is for the special case where
1113 * the daddr in the skb during the neighbor look-up is different
1114 * from the fl6->daddr used to look-up route here.
1117 struct rt6_info
*uncached_rt
;
1119 dst_use(&rt
->dst
, jiffies
);
1120 read_unlock_bh(&table
->tb6_lock
);
1122 uncached_rt
= ip6_rt_cache_alloc(rt
, &fl6
->daddr
, NULL
);
1123 dst_release(&rt
->dst
);
1126 rt6_uncached_list_add(uncached_rt
);
1128 uncached_rt
= net
->ipv6
.ip6_null_entry
;
1130 dst_hold(&uncached_rt
->dst
);
1132 trace_fib6_table_lookup(net
, uncached_rt
, table
->tb6_id
, fl6
);
1136 /* Get a percpu copy */
1138 struct rt6_info
*pcpu_rt
;
1140 rt
->dst
.lastuse
= jiffies
;
1142 pcpu_rt
= rt6_get_pcpu_route(rt
);
1145 read_unlock_bh(&table
->tb6_lock
);
1147 /* We have to do the read_unlock first
1148 * because rt6_make_pcpu_route() may trigger
1149 * ip6_dst_gc() which will take the write_lock.
1152 read_unlock_bh(&table
->tb6_lock
);
1153 pcpu_rt
= rt6_make_pcpu_route(rt
);
1154 dst_release(&rt
->dst
);
1157 trace_fib6_table_lookup(net
, pcpu_rt
, table
->tb6_id
, fl6
);
1162 EXPORT_SYMBOL_GPL(ip6_pol_route
);
1164 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
1165 struct flowi6
*fl6
, int flags
)
1167 return ip6_pol_route(net
, table
, fl6
->flowi6_iif
, fl6
, flags
);
1170 struct dst_entry
*ip6_route_input_lookup(struct net
*net
,
1171 struct net_device
*dev
,
1172 struct flowi6
*fl6
, int flags
)
1174 if (rt6_need_strict(&fl6
->daddr
) && dev
->type
!= ARPHRD_PIMREG
)
1175 flags
|= RT6_LOOKUP_F_IFACE
;
1177 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_input
);
1179 EXPORT_SYMBOL_GPL(ip6_route_input_lookup
);
1181 void ip6_route_input(struct sk_buff
*skb
)
1183 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1184 struct net
*net
= dev_net(skb
->dev
);
1185 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1186 struct ip_tunnel_info
*tun_info
;
1187 struct flowi6 fl6
= {
1188 .flowi6_iif
= skb
->dev
->ifindex
,
1189 .daddr
= iph
->daddr
,
1190 .saddr
= iph
->saddr
,
1191 .flowlabel
= ip6_flowinfo(iph
),
1192 .flowi6_mark
= skb
->mark
,
1193 .flowi6_proto
= iph
->nexthdr
,
1196 tun_info
= skb_tunnel_info(skb
);
1197 if (tun_info
&& !(tun_info
->mode
& IP_TUNNEL_INFO_TX
))
1198 fl6
.flowi6_tun_key
.tun_id
= tun_info
->key
.tun_id
;
1200 skb_dst_set(skb
, ip6_route_input_lookup(net
, skb
->dev
, &fl6
, flags
));
1203 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
1204 struct flowi6
*fl6
, int flags
)
1206 return ip6_pol_route(net
, table
, fl6
->flowi6_oif
, fl6
, flags
);
1209 struct dst_entry
*ip6_route_output_flags(struct net
*net
, const struct sock
*sk
,
1210 struct flowi6
*fl6
, int flags
)
1214 if (rt6_need_strict(&fl6
->daddr
)) {
1215 struct dst_entry
*dst
;
1217 dst
= l3mdev_link_scope_lookup(net
, fl6
);
1222 fl6
->flowi6_iif
= LOOPBACK_IFINDEX
;
1224 any_src
= ipv6_addr_any(&fl6
->saddr
);
1225 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl6
->daddr
) ||
1226 (fl6
->flowi6_oif
&& any_src
))
1227 flags
|= RT6_LOOKUP_F_IFACE
;
1230 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
1232 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
1234 return fib6_rule_lookup(net
, fl6
, flags
, ip6_pol_route_output
);
1236 EXPORT_SYMBOL_GPL(ip6_route_output_flags
);
1238 struct dst_entry
*ip6_blackhole_route(struct net
*net
, struct dst_entry
*dst_orig
)
1240 struct rt6_info
*rt
, *ort
= (struct rt6_info
*) dst_orig
;
1241 struct dst_entry
*new = NULL
;
1243 rt
= dst_alloc(&ip6_dst_blackhole_ops
, ort
->dst
.dev
, 1, DST_OBSOLETE_NONE
, 0);
1249 new->input
= dst_discard
;
1250 new->output
= dst_discard_out
;
1252 dst_copy_metrics(new, &ort
->dst
);
1253 rt
->rt6i_idev
= ort
->rt6i_idev
;
1255 in6_dev_hold(rt
->rt6i_idev
);
1257 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
1258 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_PCPU
;
1259 rt
->rt6i_metric
= 0;
1261 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1262 #ifdef CONFIG_IPV6_SUBTREES
1263 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1269 dst_release(dst_orig
);
1270 return new ? new : ERR_PTR(-ENOMEM
);
1274 * Destination cache support functions
1277 static void rt6_dst_from_metrics_check(struct rt6_info
*rt
)
1280 dst_metrics_ptr(&rt
->dst
) != dst_metrics_ptr(rt
->dst
.from
))
1281 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(rt
->dst
.from
), true);
1284 static struct dst_entry
*rt6_check(struct rt6_info
*rt
, u32 cookie
)
1286 if (!rt
->rt6i_node
|| (rt
->rt6i_node
->fn_sernum
!= cookie
))
1289 if (rt6_check_expired(rt
))
1295 static struct dst_entry
*rt6_dst_from_check(struct rt6_info
*rt
, u32 cookie
)
1297 if (!__rt6_check_expired(rt
) &&
1298 rt
->dst
.obsolete
== DST_OBSOLETE_FORCE_CHK
&&
1299 rt6_check((struct rt6_info
*)(rt
->dst
.from
), cookie
))
1305 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
1307 struct rt6_info
*rt
;
1309 rt
= (struct rt6_info
*) dst
;
1311 /* All IPV6 dsts are created with ->obsolete set to the value
1312 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1313 * into this function always.
1316 rt6_dst_from_metrics_check(rt
);
1318 if (rt
->rt6i_flags
& RTF_PCPU
||
1319 (unlikely(dst
->flags
& DST_NOCACHE
) && rt
->dst
.from
))
1320 return rt6_dst_from_check(rt
, cookie
);
1322 return rt6_check(rt
, cookie
);
1325 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
1327 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1330 if (rt
->rt6i_flags
& RTF_CACHE
) {
1331 if (rt6_check_expired(rt
)) {
1343 static void ip6_link_failure(struct sk_buff
*skb
)
1345 struct rt6_info
*rt
;
1347 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
1349 rt
= (struct rt6_info
*) skb_dst(skb
);
1351 if (rt
->rt6i_flags
& RTF_CACHE
) {
1354 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
)) {
1355 rt
->rt6i_node
->fn_sernum
= -1;
1360 static void rt6_do_update_pmtu(struct rt6_info
*rt
, u32 mtu
)
1362 struct net
*net
= dev_net(rt
->dst
.dev
);
1364 rt
->rt6i_flags
|= RTF_MODIFIED
;
1365 rt
->rt6i_pmtu
= mtu
;
1366 rt6_update_expires(rt
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1369 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info
*rt
)
1371 return !(rt
->rt6i_flags
& RTF_CACHE
) &&
1372 (rt
->rt6i_flags
& RTF_PCPU
|| rt
->rt6i_node
);
1375 static void __ip6_rt_update_pmtu(struct dst_entry
*dst
, const struct sock
*sk
,
1376 const struct ipv6hdr
*iph
, u32 mtu
)
1378 const struct in6_addr
*daddr
, *saddr
;
1379 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
1381 if (rt6
->rt6i_flags
& RTF_LOCAL
)
1384 if (dst_metric_locked(dst
, RTAX_MTU
))
1388 daddr
= &iph
->daddr
;
1389 saddr
= &iph
->saddr
;
1391 daddr
= &sk
->sk_v6_daddr
;
1392 saddr
= &inet6_sk(sk
)->saddr
;
1397 dst_confirm_neigh(dst
, daddr
);
1398 mtu
= max_t(u32
, mtu
, IPV6_MIN_MTU
);
1399 if (mtu
>= dst_mtu(dst
))
1402 if (!rt6_cache_allowed_for_pmtu(rt6
)) {
1403 rt6_do_update_pmtu(rt6
, mtu
);
1405 struct rt6_info
*nrt6
;
1407 nrt6
= ip6_rt_cache_alloc(rt6
, daddr
, saddr
);
1409 rt6_do_update_pmtu(nrt6
, mtu
);
1411 /* ip6_ins_rt(nrt6) will bump the
1412 * rt6->rt6i_node->fn_sernum
1413 * which will fail the next rt6_check() and
1414 * invalidate the sk->sk_dst_cache.
1421 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, struct sock
*sk
,
1422 struct sk_buff
*skb
, u32 mtu
)
1424 __ip6_rt_update_pmtu(dst
, sk
, skb
? ipv6_hdr(skb
) : NULL
, mtu
);
1427 void ip6_update_pmtu(struct sk_buff
*skb
, struct net
*net
, __be32 mtu
,
1428 int oif
, u32 mark
, kuid_t uid
)
1430 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1431 struct dst_entry
*dst
;
1434 memset(&fl6
, 0, sizeof(fl6
));
1435 fl6
.flowi6_oif
= oif
;
1436 fl6
.flowi6_mark
= mark
? mark
: IP6_REPLY_MARK(net
, skb
->mark
);
1437 fl6
.daddr
= iph
->daddr
;
1438 fl6
.saddr
= iph
->saddr
;
1439 fl6
.flowlabel
= ip6_flowinfo(iph
);
1440 fl6
.flowi6_uid
= uid
;
1442 dst
= ip6_route_output(net
, NULL
, &fl6
);
1444 __ip6_rt_update_pmtu(dst
, NULL
, iph
, ntohl(mtu
));
1447 EXPORT_SYMBOL_GPL(ip6_update_pmtu
);
1449 void ip6_sk_update_pmtu(struct sk_buff
*skb
, struct sock
*sk
, __be32 mtu
)
1451 struct dst_entry
*dst
;
1453 ip6_update_pmtu(skb
, sock_net(sk
), mtu
,
1454 sk
->sk_bound_dev_if
, sk
->sk_mark
, sk
->sk_uid
);
1456 dst
= __sk_dst_get(sk
);
1457 if (!dst
|| !dst
->obsolete
||
1458 dst
->ops
->check(dst
, inet6_sk(sk
)->dst_cookie
))
1462 if (!sock_owned_by_user(sk
) && !ipv6_addr_v4mapped(&sk
->sk_v6_daddr
))
1463 ip6_datagram_dst_update(sk
, false);
1466 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu
);
1468 /* Handle redirects */
1469 struct ip6rd_flowi
{
1471 struct in6_addr gateway
;
1474 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1475 struct fib6_table
*table
,
1479 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl6
;
1480 struct rt6_info
*rt
;
1481 struct fib6_node
*fn
;
1483 /* Get the "current" route for this destination and
1484 * check if the redirect has come from appropriate router.
1486 * RFC 4861 specifies that redirects should only be
1487 * accepted if they come from the nexthop to the target.
1488 * Due to the way the routes are chosen, this notion
1489 * is a bit fuzzy and one might need to check all possible
1493 read_lock_bh(&table
->tb6_lock
);
1494 fn
= fib6_lookup(&table
->tb6_root
, &fl6
->daddr
, &fl6
->saddr
);
1496 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1497 if (rt6_check_expired(rt
))
1501 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1503 if (fl6
->flowi6_oif
!= rt
->dst
.dev
->ifindex
)
1505 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1511 rt
= net
->ipv6
.ip6_null_entry
;
1512 else if (rt
->dst
.error
) {
1513 rt
= net
->ipv6
.ip6_null_entry
;
1517 if (rt
== net
->ipv6
.ip6_null_entry
) {
1518 fn
= fib6_backtrack(fn
, &fl6
->saddr
);
1526 read_unlock_bh(&table
->tb6_lock
);
1528 trace_fib6_table_lookup(net
, rt
, table
->tb6_id
, fl6
);
1532 static struct dst_entry
*ip6_route_redirect(struct net
*net
,
1533 const struct flowi6
*fl6
,
1534 const struct in6_addr
*gateway
)
1536 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1537 struct ip6rd_flowi rdfl
;
1540 rdfl
.gateway
= *gateway
;
1542 return fib6_rule_lookup(net
, &rdfl
.fl6
,
1543 flags
, __ip6_route_redirect
);
1546 void ip6_redirect(struct sk_buff
*skb
, struct net
*net
, int oif
, u32 mark
,
1549 const struct ipv6hdr
*iph
= (struct ipv6hdr
*) skb
->data
;
1550 struct dst_entry
*dst
;
1553 memset(&fl6
, 0, sizeof(fl6
));
1554 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1555 fl6
.flowi6_oif
= oif
;
1556 fl6
.flowi6_mark
= mark
;
1557 fl6
.daddr
= iph
->daddr
;
1558 fl6
.saddr
= iph
->saddr
;
1559 fl6
.flowlabel
= ip6_flowinfo(iph
);
1560 fl6
.flowi6_uid
= uid
;
1562 dst
= ip6_route_redirect(net
, &fl6
, &ipv6_hdr(skb
)->saddr
);
1563 rt6_do_redirect(dst
, NULL
, skb
);
1566 EXPORT_SYMBOL_GPL(ip6_redirect
);
1568 void ip6_redirect_no_header(struct sk_buff
*skb
, struct net
*net
, int oif
,
1571 const struct ipv6hdr
*iph
= ipv6_hdr(skb
);
1572 const struct rd_msg
*msg
= (struct rd_msg
*)icmp6_hdr(skb
);
1573 struct dst_entry
*dst
;
1576 memset(&fl6
, 0, sizeof(fl6
));
1577 fl6
.flowi6_iif
= LOOPBACK_IFINDEX
;
1578 fl6
.flowi6_oif
= oif
;
1579 fl6
.flowi6_mark
= mark
;
1580 fl6
.daddr
= msg
->dest
;
1581 fl6
.saddr
= iph
->daddr
;
1582 fl6
.flowi6_uid
= sock_net_uid(net
, NULL
);
1584 dst
= ip6_route_redirect(net
, &fl6
, &iph
->saddr
);
1585 rt6_do_redirect(dst
, NULL
, skb
);
1589 void ip6_sk_redirect(struct sk_buff
*skb
, struct sock
*sk
)
1591 ip6_redirect(skb
, sock_net(sk
), sk
->sk_bound_dev_if
, sk
->sk_mark
,
1594 EXPORT_SYMBOL_GPL(ip6_sk_redirect
);
1596 static unsigned int ip6_default_advmss(const struct dst_entry
*dst
)
1598 struct net_device
*dev
= dst
->dev
;
1599 unsigned int mtu
= dst_mtu(dst
);
1600 struct net
*net
= dev_net(dev
);
1602 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
1604 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
1605 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
1608 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1609 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1610 * IPV6_MAXPLEN is also valid and means: "any MSS,
1611 * rely only on pmtu discovery"
1613 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
1618 static unsigned int ip6_mtu(const struct dst_entry
*dst
)
1620 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
1621 unsigned int mtu
= rt
->rt6i_pmtu
;
1622 struct inet6_dev
*idev
;
1627 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
1634 idev
= __in6_dev_get(dst
->dev
);
1636 mtu
= idev
->cnf
.mtu6
;
1640 mtu
= min_t(unsigned int, mtu
, IP6_MAX_MTU
);
1642 return mtu
- lwtunnel_headroom(dst
->lwtstate
, mtu
);
1645 static struct dst_entry
*icmp6_dst_gc_list
;
1646 static DEFINE_SPINLOCK(icmp6_dst_lock
);
1648 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
1651 struct dst_entry
*dst
;
1652 struct rt6_info
*rt
;
1653 struct inet6_dev
*idev
= in6_dev_get(dev
);
1654 struct net
*net
= dev_net(dev
);
1656 if (unlikely(!idev
))
1657 return ERR_PTR(-ENODEV
);
1659 rt
= ip6_dst_alloc(net
, dev
, 0);
1660 if (unlikely(!rt
)) {
1662 dst
= ERR_PTR(-ENOMEM
);
1666 rt
->dst
.flags
|= DST_HOST
;
1667 rt
->dst
.output
= ip6_output
;
1668 atomic_set(&rt
->dst
.__refcnt
, 1);
1669 rt
->rt6i_gateway
= fl6
->daddr
;
1670 rt
->rt6i_dst
.addr
= fl6
->daddr
;
1671 rt
->rt6i_dst
.plen
= 128;
1672 rt
->rt6i_idev
= idev
;
1673 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 0);
1675 spin_lock_bh(&icmp6_dst_lock
);
1676 rt
->dst
.next
= icmp6_dst_gc_list
;
1677 icmp6_dst_gc_list
= &rt
->dst
;
1678 spin_unlock_bh(&icmp6_dst_lock
);
1680 fib6_force_start_gc(net
);
1682 dst
= xfrm_lookup(net
, &rt
->dst
, flowi6_to_flowi(fl6
), NULL
, 0);
1688 int icmp6_dst_gc(void)
1690 struct dst_entry
*dst
, **pprev
;
1693 spin_lock_bh(&icmp6_dst_lock
);
1694 pprev
= &icmp6_dst_gc_list
;
1696 while ((dst
= *pprev
) != NULL
) {
1697 if (!atomic_read(&dst
->__refcnt
)) {
1706 spin_unlock_bh(&icmp6_dst_lock
);
1711 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1714 struct dst_entry
*dst
, **pprev
;
1716 spin_lock_bh(&icmp6_dst_lock
);
1717 pprev
= &icmp6_dst_gc_list
;
1718 while ((dst
= *pprev
) != NULL
) {
1719 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1720 if (func(rt
, arg
)) {
1727 spin_unlock_bh(&icmp6_dst_lock
);
1730 static int ip6_dst_gc(struct dst_ops
*ops
)
1732 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1733 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1734 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1735 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1736 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1737 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1740 entries
= dst_entries_get_fast(ops
);
1741 if (time_after(rt_last_gc
+ rt_min_interval
, jiffies
) &&
1742 entries
<= rt_max_size
)
1745 net
->ipv6
.ip6_rt_gc_expire
++;
1746 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
, true);
1747 entries
= dst_entries_get_slow(ops
);
1748 if (entries
< ops
->gc_thresh
)
1749 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1751 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1752 return entries
> rt_max_size
;
1755 static int ip6_convert_metrics(struct mx6_config
*mxc
,
1756 const struct fib6_config
*cfg
)
1758 bool ecn_ca
= false;
1766 mp
= kzalloc(sizeof(u32
) * RTAX_MAX
, GFP_KERNEL
);
1770 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1771 int type
= nla_type(nla
);
1776 if (unlikely(type
> RTAX_MAX
))
1779 if (type
== RTAX_CC_ALGO
) {
1780 char tmp
[TCP_CA_NAME_MAX
];
1782 nla_strlcpy(tmp
, nla
, sizeof(tmp
));
1783 val
= tcp_ca_get_key_by_name(tmp
, &ecn_ca
);
1784 if (val
== TCP_CA_UNSPEC
)
1787 val
= nla_get_u32(nla
);
1789 if (type
== RTAX_HOPLIMIT
&& val
> 255)
1791 if (type
== RTAX_FEATURES
&& (val
& ~RTAX_FEATURE_MASK
))
1795 __set_bit(type
- 1, mxc
->mx_valid
);
1799 __set_bit(RTAX_FEATURES
- 1, mxc
->mx_valid
);
1800 mp
[RTAX_FEATURES
- 1] |= DST_FEATURE_ECN_CA
;
1810 static struct rt6_info
*ip6_nh_lookup_table(struct net
*net
,
1811 struct fib6_config
*cfg
,
1812 const struct in6_addr
*gw_addr
)
1814 struct flowi6 fl6
= {
1815 .flowi6_oif
= cfg
->fc_ifindex
,
1817 .saddr
= cfg
->fc_prefsrc
,
1819 struct fib6_table
*table
;
1820 struct rt6_info
*rt
;
1821 int flags
= RT6_LOOKUP_F_IFACE
| RT6_LOOKUP_F_IGNORE_LINKSTATE
;
1823 table
= fib6_get_table(net
, cfg
->fc_table
);
1827 if (!ipv6_addr_any(&cfg
->fc_prefsrc
))
1828 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
1830 rt
= ip6_pol_route(net
, table
, cfg
->fc_ifindex
, &fl6
, flags
);
1832 /* if table lookup failed, fall back to full lookup */
1833 if (rt
== net
->ipv6
.ip6_null_entry
) {
1841 static struct rt6_info
*ip6_route_info_create(struct fib6_config
*cfg
)
1843 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1844 struct rt6_info
*rt
= NULL
;
1845 struct net_device
*dev
= NULL
;
1846 struct inet6_dev
*idev
= NULL
;
1847 struct fib6_table
*table
;
1851 /* RTF_PCPU is an internal flag; can not be set by userspace */
1852 if (cfg
->fc_flags
& RTF_PCPU
)
1855 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1857 #ifndef CONFIG_IPV6_SUBTREES
1858 if (cfg
->fc_src_len
)
1861 if (cfg
->fc_ifindex
) {
1863 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1866 idev
= in6_dev_get(dev
);
1871 if (cfg
->fc_metric
== 0)
1872 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1875 if (cfg
->fc_nlinfo
.nlh
&&
1876 !(cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_CREATE
)) {
1877 table
= fib6_get_table(net
, cfg
->fc_table
);
1879 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1880 table
= fib6_new_table(net
, cfg
->fc_table
);
1883 table
= fib6_new_table(net
, cfg
->fc_table
);
1889 rt
= ip6_dst_alloc(net
, NULL
,
1890 (cfg
->fc_flags
& RTF_ADDRCONF
) ? 0 : DST_NOCOUNT
);
1897 if (cfg
->fc_flags
& RTF_EXPIRES
)
1898 rt6_set_expires(rt
, jiffies
+
1899 clock_t_to_jiffies(cfg
->fc_expires
));
1901 rt6_clean_expires(rt
);
1903 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1904 cfg
->fc_protocol
= RTPROT_BOOT
;
1905 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1907 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1909 if (addr_type
& IPV6_ADDR_MULTICAST
)
1910 rt
->dst
.input
= ip6_mc_input
;
1911 else if (cfg
->fc_flags
& RTF_LOCAL
)
1912 rt
->dst
.input
= ip6_input
;
1914 rt
->dst
.input
= ip6_forward
;
1916 rt
->dst
.output
= ip6_output
;
1918 if (cfg
->fc_encap
) {
1919 struct lwtunnel_state
*lwtstate
;
1921 err
= lwtunnel_build_state(dev
, cfg
->fc_encap_type
,
1922 cfg
->fc_encap
, AF_INET6
, cfg
,
1926 rt
->dst
.lwtstate
= lwtstate_get(lwtstate
);
1927 if (lwtunnel_output_redirect(rt
->dst
.lwtstate
)) {
1928 rt
->dst
.lwtstate
->orig_output
= rt
->dst
.output
;
1929 rt
->dst
.output
= lwtunnel_output
;
1931 if (lwtunnel_input_redirect(rt
->dst
.lwtstate
)) {
1932 rt
->dst
.lwtstate
->orig_input
= rt
->dst
.input
;
1933 rt
->dst
.input
= lwtunnel_input
;
1937 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1938 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1939 if (rt
->rt6i_dst
.plen
== 128)
1940 rt
->dst
.flags
|= DST_HOST
;
1942 #ifdef CONFIG_IPV6_SUBTREES
1943 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1944 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1947 rt
->rt6i_metric
= cfg
->fc_metric
;
1949 /* We cannot add true routes via loopback here,
1950 they would result in kernel looping; promote them to reject routes
1952 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1953 (dev
&& (dev
->flags
& IFF_LOOPBACK
) &&
1954 !(addr_type
& IPV6_ADDR_LOOPBACK
) &&
1955 !(cfg
->fc_flags
& RTF_LOCAL
))) {
1956 /* hold loopback dev/idev if we haven't done so. */
1957 if (dev
!= net
->loopback_dev
) {
1962 dev
= net
->loopback_dev
;
1964 idev
= in6_dev_get(dev
);
1970 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1971 switch (cfg
->fc_type
) {
1973 rt
->dst
.error
= -EINVAL
;
1974 rt
->dst
.output
= dst_discard_out
;
1975 rt
->dst
.input
= dst_discard
;
1978 rt
->dst
.error
= -EACCES
;
1979 rt
->dst
.output
= ip6_pkt_prohibit_out
;
1980 rt
->dst
.input
= ip6_pkt_prohibit
;
1983 case RTN_UNREACHABLE
:
1985 rt
->dst
.error
= (cfg
->fc_type
== RTN_THROW
) ? -EAGAIN
1986 : (cfg
->fc_type
== RTN_UNREACHABLE
)
1987 ? -EHOSTUNREACH
: -ENETUNREACH
;
1988 rt
->dst
.output
= ip6_pkt_discard_out
;
1989 rt
->dst
.input
= ip6_pkt_discard
;
1995 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1996 const struct in6_addr
*gw_addr
;
1999 gw_addr
= &cfg
->fc_gateway
;
2000 gwa_type
= ipv6_addr_type(gw_addr
);
2002 /* if gw_addr is local we will fail to detect this in case
2003 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2004 * will return already-added prefix route via interface that
2005 * prefix route was assigned to, which might be non-loopback.
2008 if (ipv6_chk_addr_and_flags(net
, gw_addr
,
2009 gwa_type
& IPV6_ADDR_LINKLOCAL
?
2013 rt
->rt6i_gateway
= *gw_addr
;
2015 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
2016 struct rt6_info
*grt
= NULL
;
2018 /* IPv6 strictly inhibits using not link-local
2019 addresses as nexthop address.
2020 Otherwise, router will not able to send redirects.
2021 It is very good, but in some (rare!) circumstances
2022 (SIT, PtP, NBMA NOARP links) it is handy to allow
2023 some exceptions. --ANK
2024 We allow IPv4-mapped nexthops to support RFC4798-type
2027 if (!(gwa_type
& (IPV6_ADDR_UNICAST
|
2031 if (cfg
->fc_table
) {
2032 grt
= ip6_nh_lookup_table(net
, cfg
, gw_addr
);
2035 if (grt
->rt6i_flags
& RTF_GATEWAY
||
2036 (dev
&& dev
!= grt
->dst
.dev
)) {
2044 grt
= rt6_lookup(net
, gw_addr
, NULL
,
2045 cfg
->fc_ifindex
, 1);
2047 err
= -EHOSTUNREACH
;
2051 if (dev
!= grt
->dst
.dev
) {
2057 idev
= grt
->rt6i_idev
;
2059 in6_dev_hold(grt
->rt6i_idev
);
2061 if (!(grt
->rt6i_flags
& RTF_GATEWAY
))
2069 if (!dev
|| (dev
->flags
& IFF_LOOPBACK
))
2077 if (!ipv6_addr_any(&cfg
->fc_prefsrc
)) {
2078 if (!ipv6_chk_addr(net
, &cfg
->fc_prefsrc
, dev
, 0)) {
2082 rt
->rt6i_prefsrc
.addr
= cfg
->fc_prefsrc
;
2083 rt
->rt6i_prefsrc
.plen
= 128;
2085 rt
->rt6i_prefsrc
.plen
= 0;
2087 rt
->rt6i_flags
= cfg
->fc_flags
;
2091 rt
->rt6i_idev
= idev
;
2092 rt
->rt6i_table
= table
;
2094 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
2105 return ERR_PTR(err
);
2108 int ip6_route_add(struct fib6_config
*cfg
)
2110 struct mx6_config mxc
= { .mx
= NULL
, };
2111 struct rt6_info
*rt
;
2114 rt
= ip6_route_info_create(cfg
);
2121 err
= ip6_convert_metrics(&mxc
, cfg
);
2125 err
= __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
, &mxc
);
2137 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
2140 struct fib6_table
*table
;
2141 struct net
*net
= dev_net(rt
->dst
.dev
);
2143 if (rt
== net
->ipv6
.ip6_null_entry
||
2144 rt
->dst
.flags
& DST_NOCACHE
) {
2149 table
= rt
->rt6i_table
;
2150 write_lock_bh(&table
->tb6_lock
);
2151 err
= fib6_del(rt
, info
);
2152 write_unlock_bh(&table
->tb6_lock
);
2159 int ip6_del_rt(struct rt6_info
*rt
)
2161 struct nl_info info
= {
2162 .nl_net
= dev_net(rt
->dst
.dev
),
2164 return __ip6_del_rt(rt
, &info
);
2167 static int ip6_route_del(struct fib6_config
*cfg
)
2169 struct fib6_table
*table
;
2170 struct fib6_node
*fn
;
2171 struct rt6_info
*rt
;
2174 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
2178 read_lock_bh(&table
->tb6_lock
);
2180 fn
= fib6_locate(&table
->tb6_root
,
2181 &cfg
->fc_dst
, cfg
->fc_dst_len
,
2182 &cfg
->fc_src
, cfg
->fc_src_len
);
2185 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2186 if ((rt
->rt6i_flags
& RTF_CACHE
) &&
2187 !(cfg
->fc_flags
& RTF_CACHE
))
2189 if (cfg
->fc_ifindex
&&
2191 rt
->dst
.dev
->ifindex
!= cfg
->fc_ifindex
))
2193 if (cfg
->fc_flags
& RTF_GATEWAY
&&
2194 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
2196 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
2198 if (cfg
->fc_protocol
&& cfg
->fc_protocol
!= rt
->rt6i_protocol
)
2201 read_unlock_bh(&table
->tb6_lock
);
2203 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
2206 read_unlock_bh(&table
->tb6_lock
);
2211 static void rt6_do_redirect(struct dst_entry
*dst
, struct sock
*sk
, struct sk_buff
*skb
)
2213 struct netevent_redirect netevent
;
2214 struct rt6_info
*rt
, *nrt
= NULL
;
2215 struct ndisc_options ndopts
;
2216 struct inet6_dev
*in6_dev
;
2217 struct neighbour
*neigh
;
2219 int optlen
, on_link
;
2222 optlen
= skb_tail_pointer(skb
) - skb_transport_header(skb
);
2223 optlen
-= sizeof(*msg
);
2226 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2230 msg
= (struct rd_msg
*)icmp6_hdr(skb
);
2232 if (ipv6_addr_is_multicast(&msg
->dest
)) {
2233 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2238 if (ipv6_addr_equal(&msg
->dest
, &msg
->target
)) {
2240 } else if (ipv6_addr_type(&msg
->target
) !=
2241 (IPV6_ADDR_UNICAST
|IPV6_ADDR_LINKLOCAL
)) {
2242 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2246 in6_dev
= __in6_dev_get(skb
->dev
);
2249 if (in6_dev
->cnf
.forwarding
|| !in6_dev
->cnf
.accept_redirects
)
2253 * The IP source address of the Redirect MUST be the same as the current
2254 * first-hop router for the specified ICMP Destination Address.
2257 if (!ndisc_parse_options(skb
->dev
, msg
->opt
, optlen
, &ndopts
)) {
2258 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2263 if (ndopts
.nd_opts_tgt_lladdr
) {
2264 lladdr
= ndisc_opt_addr_data(ndopts
.nd_opts_tgt_lladdr
,
2267 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2272 rt
= (struct rt6_info
*) dst
;
2273 if (rt
->rt6i_flags
& RTF_REJECT
) {
2274 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2278 /* Redirect received -> path was valid.
2279 * Look, redirects are sent only in response to data packets,
2280 * so that this nexthop apparently is reachable. --ANK
2282 dst_confirm_neigh(&rt
->dst
, &ipv6_hdr(skb
)->saddr
);
2284 neigh
= __neigh_lookup(&nd_tbl
, &msg
->target
, skb
->dev
, 1);
2289 * We have finally decided to accept it.
2292 ndisc_update(skb
->dev
, neigh
, lladdr
, NUD_STALE
,
2293 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
2294 NEIGH_UPDATE_F_OVERRIDE
|
2295 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
2296 NEIGH_UPDATE_F_ISROUTER
)),
2297 NDISC_REDIRECT
, &ndopts
);
2299 nrt
= ip6_rt_cache_alloc(rt
, &msg
->dest
, NULL
);
2303 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
2305 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
2307 nrt
->rt6i_gateway
= *(struct in6_addr
*)neigh
->primary_key
;
2309 if (ip6_ins_rt(nrt
))
2312 netevent
.old
= &rt
->dst
;
2313 netevent
.new = &nrt
->dst
;
2314 netevent
.daddr
= &msg
->dest
;
2315 netevent
.neigh
= neigh
;
2316 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
2318 if (rt
->rt6i_flags
& RTF_CACHE
) {
2319 rt
= (struct rt6_info
*) dst_clone(&rt
->dst
);
2324 neigh_release(neigh
);
2328 * Misc support functions
2331 static void rt6_set_from(struct rt6_info
*rt
, struct rt6_info
*from
)
2333 BUG_ON(from
->dst
.from
);
2335 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
2336 dst_hold(&from
->dst
);
2337 rt
->dst
.from
= &from
->dst
;
2338 dst_init_metrics(&rt
->dst
, dst_metrics_ptr(&from
->dst
), true);
2341 static void ip6_rt_copy_init(struct rt6_info
*rt
, struct rt6_info
*ort
)
2343 rt
->dst
.input
= ort
->dst
.input
;
2344 rt
->dst
.output
= ort
->dst
.output
;
2345 rt
->rt6i_dst
= ort
->rt6i_dst
;
2346 rt
->dst
.error
= ort
->dst
.error
;
2347 rt
->rt6i_idev
= ort
->rt6i_idev
;
2349 in6_dev_hold(rt
->rt6i_idev
);
2350 rt
->dst
.lastuse
= jiffies
;
2351 rt
->rt6i_gateway
= ort
->rt6i_gateway
;
2352 rt
->rt6i_flags
= ort
->rt6i_flags
;
2353 rt6_set_from(rt
, ort
);
2354 rt
->rt6i_metric
= ort
->rt6i_metric
;
2355 #ifdef CONFIG_IPV6_SUBTREES
2356 rt
->rt6i_src
= ort
->rt6i_src
;
2358 rt
->rt6i_prefsrc
= ort
->rt6i_prefsrc
;
2359 rt
->rt6i_table
= ort
->rt6i_table
;
2360 rt
->dst
.lwtstate
= lwtstate_get(ort
->dst
.lwtstate
);
2363 #ifdef CONFIG_IPV6_ROUTE_INFO
2364 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
2365 const struct in6_addr
*prefix
, int prefixlen
,
2366 const struct in6_addr
*gwaddr
,
2367 struct net_device
*dev
)
2369 u32 tb_id
= l3mdev_fib_table(dev
) ? : RT6_TABLE_INFO
;
2370 int ifindex
= dev
->ifindex
;
2371 struct fib6_node
*fn
;
2372 struct rt6_info
*rt
= NULL
;
2373 struct fib6_table
*table
;
2375 table
= fib6_get_table(net
, tb_id
);
2379 read_lock_bh(&table
->tb6_lock
);
2380 fn
= fib6_locate(&table
->tb6_root
, prefix
, prefixlen
, NULL
, 0);
2384 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2385 if (rt
->dst
.dev
->ifindex
!= ifindex
)
2387 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
2389 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
2395 read_unlock_bh(&table
->tb6_lock
);
2399 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
2400 const struct in6_addr
*prefix
, int prefixlen
,
2401 const struct in6_addr
*gwaddr
,
2402 struct net_device
*dev
,
2405 struct fib6_config cfg
= {
2406 .fc_metric
= IP6_RT_PRIO_USER
,
2407 .fc_ifindex
= dev
->ifindex
,
2408 .fc_dst_len
= prefixlen
,
2409 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
2410 RTF_UP
| RTF_PREF(pref
),
2411 .fc_nlinfo
.portid
= 0,
2412 .fc_nlinfo
.nlh
= NULL
,
2413 .fc_nlinfo
.nl_net
= net
,
2416 cfg
.fc_table
= l3mdev_fib_table(dev
) ? : RT6_TABLE_INFO
,
2417 cfg
.fc_dst
= *prefix
;
2418 cfg
.fc_gateway
= *gwaddr
;
2420 /* We should treat it as a default route if prefix length is 0. */
2422 cfg
.fc_flags
|= RTF_DEFAULT
;
2424 ip6_route_add(&cfg
);
2426 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, dev
);
2430 struct rt6_info
*rt6_get_dflt_router(const struct in6_addr
*addr
, struct net_device
*dev
)
2432 u32 tb_id
= l3mdev_fib_table(dev
) ? : RT6_TABLE_DFLT
;
2433 struct rt6_info
*rt
;
2434 struct fib6_table
*table
;
2436 table
= fib6_get_table(dev_net(dev
), tb_id
);
2440 read_lock_bh(&table
->tb6_lock
);
2441 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2442 if (dev
== rt
->dst
.dev
&&
2443 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
2444 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
2449 read_unlock_bh(&table
->tb6_lock
);
2453 struct rt6_info
*rt6_add_dflt_router(const struct in6_addr
*gwaddr
,
2454 struct net_device
*dev
,
2457 struct fib6_config cfg
= {
2458 .fc_table
= l3mdev_fib_table(dev
) ? : RT6_TABLE_DFLT
,
2459 .fc_metric
= IP6_RT_PRIO_USER
,
2460 .fc_ifindex
= dev
->ifindex
,
2461 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
2462 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
2463 .fc_nlinfo
.portid
= 0,
2464 .fc_nlinfo
.nlh
= NULL
,
2465 .fc_nlinfo
.nl_net
= dev_net(dev
),
2468 cfg
.fc_gateway
= *gwaddr
;
2470 if (!ip6_route_add(&cfg
)) {
2471 struct fib6_table
*table
;
2473 table
= fib6_get_table(dev_net(dev
), cfg
.fc_table
);
2475 table
->flags
|= RT6_TABLE_HAS_DFLT_ROUTER
;
2478 return rt6_get_dflt_router(gwaddr
, dev
);
2481 static void __rt6_purge_dflt_routers(struct fib6_table
*table
)
2483 struct rt6_info
*rt
;
2486 read_lock_bh(&table
->tb6_lock
);
2487 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
2488 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
) &&
2489 (!rt
->rt6i_idev
|| rt
->rt6i_idev
->cnf
.accept_ra
!= 2)) {
2491 read_unlock_bh(&table
->tb6_lock
);
2496 read_unlock_bh(&table
->tb6_lock
);
2498 table
->flags
&= ~RT6_TABLE_HAS_DFLT_ROUTER
;
2501 void rt6_purge_dflt_routers(struct net
*net
)
2503 struct fib6_table
*table
;
2504 struct hlist_head
*head
;
2509 for (h
= 0; h
< FIB6_TABLE_HASHSZ
; h
++) {
2510 head
= &net
->ipv6
.fib_table_hash
[h
];
2511 hlist_for_each_entry_rcu(table
, head
, tb6_hlist
) {
2512 if (table
->flags
& RT6_TABLE_HAS_DFLT_ROUTER
)
2513 __rt6_purge_dflt_routers(table
);
2520 static void rtmsg_to_fib6_config(struct net
*net
,
2521 struct in6_rtmsg
*rtmsg
,
2522 struct fib6_config
*cfg
)
2524 memset(cfg
, 0, sizeof(*cfg
));
2526 cfg
->fc_table
= l3mdev_fib_table_by_index(net
, rtmsg
->rtmsg_ifindex
) ?
2528 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
2529 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
2530 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
2531 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
2532 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
2533 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
2535 cfg
->fc_nlinfo
.nl_net
= net
;
2537 cfg
->fc_dst
= rtmsg
->rtmsg_dst
;
2538 cfg
->fc_src
= rtmsg
->rtmsg_src
;
2539 cfg
->fc_gateway
= rtmsg
->rtmsg_gateway
;
2542 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
2544 struct fib6_config cfg
;
2545 struct in6_rtmsg rtmsg
;
2549 case SIOCADDRT
: /* Add a route */
2550 case SIOCDELRT
: /* Delete a route */
2551 if (!ns_capable(net
->user_ns
, CAP_NET_ADMIN
))
2553 err
= copy_from_user(&rtmsg
, arg
,
2554 sizeof(struct in6_rtmsg
));
2558 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
2563 err
= ip6_route_add(&cfg
);
2566 err
= ip6_route_del(&cfg
);
2580 * Drop the packet on the floor
2583 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
2586 struct dst_entry
*dst
= skb_dst(skb
);
2587 switch (ipstats_mib_noroutes
) {
2588 case IPSTATS_MIB_INNOROUTES
:
2589 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
2590 if (type
== IPV6_ADDR_ANY
) {
2591 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2592 IPSTATS_MIB_INADDRERRORS
);
2596 case IPSTATS_MIB_OUTNOROUTES
:
2597 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
2598 ipstats_mib_noroutes
);
2601 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
2606 static int ip6_pkt_discard(struct sk_buff
*skb
)
2608 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
2611 static int ip6_pkt_discard_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2613 skb
->dev
= skb_dst(skb
)->dev
;
2614 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
2617 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
2619 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
2622 static int ip6_pkt_prohibit_out(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
2624 skb
->dev
= skb_dst(skb
)->dev
;
2625 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
2629 * Allocate a dst for local (unicast / anycast) address.
2632 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
2633 const struct in6_addr
*addr
,
2637 struct net
*net
= dev_net(idev
->dev
);
2638 struct net_device
*dev
= net
->loopback_dev
;
2639 struct rt6_info
*rt
;
2641 /* use L3 Master device as loopback for host routes if device
2642 * is enslaved and address is not link local or multicast
2644 if (!rt6_need_strict(addr
))
2645 dev
= l3mdev_master_dev_rcu(idev
->dev
) ? : dev
;
2647 rt
= ip6_dst_alloc(net
, dev
, DST_NOCOUNT
);
2649 return ERR_PTR(-ENOMEM
);
2653 rt
->dst
.flags
|= DST_HOST
;
2654 rt
->dst
.input
= ip6_input
;
2655 rt
->dst
.output
= ip6_output
;
2656 rt
->rt6i_idev
= idev
;
2658 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
2660 rt
->rt6i_flags
|= RTF_ANYCAST
;
2662 rt
->rt6i_flags
|= RTF_LOCAL
;
2664 rt
->rt6i_gateway
= *addr
;
2665 rt
->rt6i_dst
.addr
= *addr
;
2666 rt
->rt6i_dst
.plen
= 128;
2667 tb_id
= l3mdev_fib_table(idev
->dev
) ? : RT6_TABLE_LOCAL
;
2668 rt
->rt6i_table
= fib6_get_table(net
, tb_id
);
2669 rt
->dst
.flags
|= DST_NOCACHE
;
2671 atomic_set(&rt
->dst
.__refcnt
, 1);
2676 /* remove deleted ip from prefsrc entries */
2677 struct arg_dev_net_ip
{
2678 struct net_device
*dev
;
2680 struct in6_addr
*addr
;
2683 static int fib6_remove_prefsrc(struct rt6_info
*rt
, void *arg
)
2685 struct net_device
*dev
= ((struct arg_dev_net_ip
*)arg
)->dev
;
2686 struct net
*net
= ((struct arg_dev_net_ip
*)arg
)->net
;
2687 struct in6_addr
*addr
= ((struct arg_dev_net_ip
*)arg
)->addr
;
2689 if (((void *)rt
->dst
.dev
== dev
|| !dev
) &&
2690 rt
!= net
->ipv6
.ip6_null_entry
&&
2691 ipv6_addr_equal(addr
, &rt
->rt6i_prefsrc
.addr
)) {
2692 /* remove prefsrc entry */
2693 rt
->rt6i_prefsrc
.plen
= 0;
2698 void rt6_remove_prefsrc(struct inet6_ifaddr
*ifp
)
2700 struct net
*net
= dev_net(ifp
->idev
->dev
);
2701 struct arg_dev_net_ip adni
= {
2702 .dev
= ifp
->idev
->dev
,
2706 fib6_clean_all(net
, fib6_remove_prefsrc
, &adni
);
2709 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2710 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2712 /* Remove routers and update dst entries when gateway turn into host. */
2713 static int fib6_clean_tohost(struct rt6_info
*rt
, void *arg
)
2715 struct in6_addr
*gateway
= (struct in6_addr
*)arg
;
2717 if ((((rt
->rt6i_flags
& RTF_RA_ROUTER
) == RTF_RA_ROUTER
) ||
2718 ((rt
->rt6i_flags
& RTF_CACHE_GATEWAY
) == RTF_CACHE_GATEWAY
)) &&
2719 ipv6_addr_equal(gateway
, &rt
->rt6i_gateway
)) {
2725 void rt6_clean_tohost(struct net
*net
, struct in6_addr
*gateway
)
2727 fib6_clean_all(net
, fib6_clean_tohost
, gateway
);
2730 struct arg_dev_net
{
2731 struct net_device
*dev
;
2735 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2737 const struct arg_dev_net
*adn
= arg
;
2738 const struct net_device
*dev
= adn
->dev
;
2740 if ((rt
->dst
.dev
== dev
|| !dev
) &&
2741 rt
!= adn
->net
->ipv6
.ip6_null_entry
)
2747 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2749 struct arg_dev_net adn
= {
2754 fib6_clean_all(net
, fib6_ifdown
, &adn
);
2755 icmp6_clean_all(fib6_ifdown
, &adn
);
2757 rt6_uncached_list_flush_dev(net
, dev
);
2760 struct rt6_mtu_change_arg
{
2761 struct net_device
*dev
;
2765 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2767 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2768 struct inet6_dev
*idev
;
2770 /* In IPv6 pmtu discovery is not optional,
2771 so that RTAX_MTU lock cannot disable it.
2772 We still use this lock to block changes
2773 caused by addrconf/ndisc.
2776 idev
= __in6_dev_get(arg
->dev
);
2780 /* For administrative MTU increase, there is no way to discover
2781 IPv6 PMTU increase, so PMTU increase should be updated here.
2782 Since RFC 1981 doesn't include administrative MTU increase
2783 update PMTU increase is a MUST. (i.e. jumbo frame)
2786 If new MTU is less than route PMTU, this new MTU will be the
2787 lowest MTU in the path, update the route PMTU to reflect PMTU
2788 decreases; if new MTU is greater than route PMTU, and the
2789 old MTU is the lowest MTU in the path, update the route PMTU
2790 to reflect the increase. In this case if the other nodes' MTU
2791 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2794 if (rt
->dst
.dev
== arg
->dev
&&
2795 dst_metric_raw(&rt
->dst
, RTAX_MTU
) &&
2796 !dst_metric_locked(&rt
->dst
, RTAX_MTU
)) {
2797 if (rt
->rt6i_flags
& RTF_CACHE
) {
2798 /* For RTF_CACHE with rt6i_pmtu == 0
2799 * (i.e. a redirected route),
2800 * the metrics of its rt->dst.from has already
2803 if (rt
->rt6i_pmtu
&& rt
->rt6i_pmtu
> arg
->mtu
)
2804 rt
->rt6i_pmtu
= arg
->mtu
;
2805 } else if (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2806 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2807 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
)) {
2808 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2814 void rt6_mtu_change(struct net_device
*dev
, unsigned int mtu
)
2816 struct rt6_mtu_change_arg arg
= {
2821 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, &arg
);
2824 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2825 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2826 [RTA_OIF
] = { .type
= NLA_U32
},
2827 [RTA_IIF
] = { .type
= NLA_U32
},
2828 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2829 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2830 [RTA_MULTIPATH
] = { .len
= sizeof(struct rtnexthop
) },
2831 [RTA_PREF
] = { .type
= NLA_U8
},
2832 [RTA_ENCAP_TYPE
] = { .type
= NLA_U16
},
2833 [RTA_ENCAP
] = { .type
= NLA_NESTED
},
2834 [RTA_EXPIRES
] = { .type
= NLA_U32
},
2835 [RTA_UID
] = { .type
= NLA_U32
},
2838 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2839 struct fib6_config
*cfg
)
2842 struct nlattr
*tb
[RTA_MAX
+1];
2846 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2851 rtm
= nlmsg_data(nlh
);
2852 memset(cfg
, 0, sizeof(*cfg
));
2854 cfg
->fc_table
= rtm
->rtm_table
;
2855 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2856 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2857 cfg
->fc_flags
= RTF_UP
;
2858 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2859 cfg
->fc_type
= rtm
->rtm_type
;
2861 if (rtm
->rtm_type
== RTN_UNREACHABLE
||
2862 rtm
->rtm_type
== RTN_BLACKHOLE
||
2863 rtm
->rtm_type
== RTN_PROHIBIT
||
2864 rtm
->rtm_type
== RTN_THROW
)
2865 cfg
->fc_flags
|= RTF_REJECT
;
2867 if (rtm
->rtm_type
== RTN_LOCAL
)
2868 cfg
->fc_flags
|= RTF_LOCAL
;
2870 if (rtm
->rtm_flags
& RTM_F_CLONED
)
2871 cfg
->fc_flags
|= RTF_CACHE
;
2873 cfg
->fc_nlinfo
.portid
= NETLINK_CB(skb
).portid
;
2874 cfg
->fc_nlinfo
.nlh
= nlh
;
2875 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2877 if (tb
[RTA_GATEWAY
]) {
2878 cfg
->fc_gateway
= nla_get_in6_addr(tb
[RTA_GATEWAY
]);
2879 cfg
->fc_flags
|= RTF_GATEWAY
;
2883 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2885 if (nla_len(tb
[RTA_DST
]) < plen
)
2888 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2892 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2894 if (nla_len(tb
[RTA_SRC
]) < plen
)
2897 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2900 if (tb
[RTA_PREFSRC
])
2901 cfg
->fc_prefsrc
= nla_get_in6_addr(tb
[RTA_PREFSRC
]);
2904 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2906 if (tb
[RTA_PRIORITY
])
2907 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2909 if (tb
[RTA_METRICS
]) {
2910 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2911 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2915 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2917 if (tb
[RTA_MULTIPATH
]) {
2918 cfg
->fc_mp
= nla_data(tb
[RTA_MULTIPATH
]);
2919 cfg
->fc_mp_len
= nla_len(tb
[RTA_MULTIPATH
]);
2921 err
= lwtunnel_valid_encap_type_attr(cfg
->fc_mp
,
2928 pref
= nla_get_u8(tb
[RTA_PREF
]);
2929 if (pref
!= ICMPV6_ROUTER_PREF_LOW
&&
2930 pref
!= ICMPV6_ROUTER_PREF_HIGH
)
2931 pref
= ICMPV6_ROUTER_PREF_MEDIUM
;
2932 cfg
->fc_flags
|= RTF_PREF(pref
);
2936 cfg
->fc_encap
= tb
[RTA_ENCAP
];
2938 if (tb
[RTA_ENCAP_TYPE
]) {
2939 cfg
->fc_encap_type
= nla_get_u16(tb
[RTA_ENCAP_TYPE
]);
2941 err
= lwtunnel_valid_encap_type(cfg
->fc_encap_type
);
2946 if (tb
[RTA_EXPIRES
]) {
2947 unsigned long timeout
= addrconf_timeout_fixup(nla_get_u32(tb
[RTA_EXPIRES
]), HZ
);
2949 if (addrconf_finite_timeout(timeout
)) {
2950 cfg
->fc_expires
= jiffies_to_clock_t(timeout
* HZ
);
2951 cfg
->fc_flags
|= RTF_EXPIRES
;
2961 struct rt6_info
*rt6_info
;
2962 struct fib6_config r_cfg
;
2963 struct mx6_config mxc
;
2964 struct list_head next
;
2967 static void ip6_print_replace_route_err(struct list_head
*rt6_nh_list
)
2971 list_for_each_entry(nh
, rt6_nh_list
, next
) {
2972 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2973 &nh
->r_cfg
.fc_dst
, &nh
->r_cfg
.fc_gateway
,
2974 nh
->r_cfg
.fc_ifindex
);
2978 static int ip6_route_info_append(struct list_head
*rt6_nh_list
,
2979 struct rt6_info
*rt
, struct fib6_config
*r_cfg
)
2982 struct rt6_info
*rtnh
;
2985 list_for_each_entry(nh
, rt6_nh_list
, next
) {
2986 /* check if rt6_info already exists */
2987 rtnh
= nh
->rt6_info
;
2989 if (rtnh
->dst
.dev
== rt
->dst
.dev
&&
2990 rtnh
->rt6i_idev
== rt
->rt6i_idev
&&
2991 ipv6_addr_equal(&rtnh
->rt6i_gateway
,
2996 nh
= kzalloc(sizeof(*nh
), GFP_KERNEL
);
3000 err
= ip6_convert_metrics(&nh
->mxc
, r_cfg
);
3005 memcpy(&nh
->r_cfg
, r_cfg
, sizeof(*r_cfg
));
3006 list_add_tail(&nh
->next
, rt6_nh_list
);
3011 static int ip6_route_multipath_add(struct fib6_config
*cfg
)
3013 struct fib6_config r_cfg
;
3014 struct rtnexthop
*rtnh
;
3015 struct rt6_info
*rt
;
3016 struct rt6_nh
*err_nh
;
3017 struct rt6_nh
*nh
, *nh_safe
;
3022 int replace
= (cfg
->fc_nlinfo
.nlh
&&
3023 (cfg
->fc_nlinfo
.nlh
->nlmsg_flags
& NLM_F_REPLACE
));
3024 LIST_HEAD(rt6_nh_list
);
3026 remaining
= cfg
->fc_mp_len
;
3027 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
3029 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3030 * rt6_info structs per nexthop
3032 while (rtnh_ok(rtnh
, remaining
)) {
3033 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
3034 if (rtnh
->rtnh_ifindex
)
3035 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
3037 attrlen
= rtnh_attrlen(rtnh
);
3039 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
3041 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
3043 r_cfg
.fc_gateway
= nla_get_in6_addr(nla
);
3044 r_cfg
.fc_flags
|= RTF_GATEWAY
;
3046 r_cfg
.fc_encap
= nla_find(attrs
, attrlen
, RTA_ENCAP
);
3047 nla
= nla_find(attrs
, attrlen
, RTA_ENCAP_TYPE
);
3049 r_cfg
.fc_encap_type
= nla_get_u16(nla
);
3052 rt
= ip6_route_info_create(&r_cfg
);
3059 err
= ip6_route_info_append(&rt6_nh_list
, rt
, &r_cfg
);
3065 rtnh
= rtnh_next(rtnh
, &remaining
);
3069 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
3070 err
= __ip6_ins_rt(nh
->rt6_info
, &cfg
->fc_nlinfo
, &nh
->mxc
);
3071 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3072 nh
->rt6_info
= NULL
;
3075 ip6_print_replace_route_err(&rt6_nh_list
);
3080 /* Because each route is added like a single route we remove
3081 * these flags after the first nexthop: if there is a collision,
3082 * we have already failed to add the first nexthop:
3083 * fib6_add_rt2node() has rejected it; when replacing, old
3084 * nexthops have been replaced by first new, the rest should
3087 cfg
->fc_nlinfo
.nlh
->nlmsg_flags
&= ~(NLM_F_EXCL
|
3095 /* Delete routes that were already added */
3096 list_for_each_entry(nh
, &rt6_nh_list
, next
) {
3099 ip6_route_del(&nh
->r_cfg
);
3103 list_for_each_entry_safe(nh
, nh_safe
, &rt6_nh_list
, next
) {
3105 dst_free(&nh
->rt6_info
->dst
);
3107 list_del(&nh
->next
);
3114 static int ip6_route_multipath_del(struct fib6_config
*cfg
)
3116 struct fib6_config r_cfg
;
3117 struct rtnexthop
*rtnh
;
3120 int err
= 1, last_err
= 0;
3122 remaining
= cfg
->fc_mp_len
;
3123 rtnh
= (struct rtnexthop
*)cfg
->fc_mp
;
3125 /* Parse a Multipath Entry */
3126 while (rtnh_ok(rtnh
, remaining
)) {
3127 memcpy(&r_cfg
, cfg
, sizeof(*cfg
));
3128 if (rtnh
->rtnh_ifindex
)
3129 r_cfg
.fc_ifindex
= rtnh
->rtnh_ifindex
;
3131 attrlen
= rtnh_attrlen(rtnh
);
3133 struct nlattr
*nla
, *attrs
= rtnh_attrs(rtnh
);
3135 nla
= nla_find(attrs
, attrlen
, RTA_GATEWAY
);
3137 nla_memcpy(&r_cfg
.fc_gateway
, nla
, 16);
3138 r_cfg
.fc_flags
|= RTF_GATEWAY
;
3141 err
= ip6_route_del(&r_cfg
);
3145 rtnh
= rtnh_next(rtnh
, &remaining
);
3151 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
3153 struct fib6_config cfg
;
3156 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
3161 return ip6_route_multipath_del(&cfg
);
3163 return ip6_route_del(&cfg
);
3166 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
*nlh
)
3168 struct fib6_config cfg
;
3171 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
3176 return ip6_route_multipath_add(&cfg
);
3178 return ip6_route_add(&cfg
);
3181 static inline size_t rt6_nlmsg_size(struct rt6_info
*rt
)
3183 return NLMSG_ALIGN(sizeof(struct rtmsg
))
3184 + nla_total_size(16) /* RTA_SRC */
3185 + nla_total_size(16) /* RTA_DST */
3186 + nla_total_size(16) /* RTA_GATEWAY */
3187 + nla_total_size(16) /* RTA_PREFSRC */
3188 + nla_total_size(4) /* RTA_TABLE */
3189 + nla_total_size(4) /* RTA_IIF */
3190 + nla_total_size(4) /* RTA_OIF */
3191 + nla_total_size(4) /* RTA_PRIORITY */
3192 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
3193 + nla_total_size(sizeof(struct rta_cacheinfo
))
3194 + nla_total_size(TCP_CA_NAME_MAX
) /* RTAX_CC_ALGO */
3195 + nla_total_size(1) /* RTA_PREF */
3196 + lwtunnel_get_encap_size(rt
->dst
.lwtstate
);
3199 static int rt6_fill_node(struct net
*net
,
3200 struct sk_buff
*skb
, struct rt6_info
*rt
,
3201 struct in6_addr
*dst
, struct in6_addr
*src
,
3202 int iif
, int type
, u32 portid
, u32 seq
,
3203 int prefix
, int nowait
, unsigned int flags
)
3205 u32 metrics
[RTAX_MAX
];
3207 struct nlmsghdr
*nlh
;
3211 if (prefix
) { /* user wants prefix routes only */
3212 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
3213 /* success since this is not a prefix route */
3218 nlh
= nlmsg_put(skb
, portid
, seq
, type
, sizeof(*rtm
), flags
);
3222 rtm
= nlmsg_data(nlh
);
3223 rtm
->rtm_family
= AF_INET6
;
3224 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
3225 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
3228 table
= rt
->rt6i_table
->tb6_id
;
3230 table
= RT6_TABLE_UNSPEC
;
3231 rtm
->rtm_table
= table
;
3232 if (nla_put_u32(skb
, RTA_TABLE
, table
))
3233 goto nla_put_failure
;
3234 if (rt
->rt6i_flags
& RTF_REJECT
) {
3235 switch (rt
->dst
.error
) {
3237 rtm
->rtm_type
= RTN_BLACKHOLE
;
3240 rtm
->rtm_type
= RTN_PROHIBIT
;
3243 rtm
->rtm_type
= RTN_THROW
;
3246 rtm
->rtm_type
= RTN_UNREACHABLE
;
3250 else if (rt
->rt6i_flags
& RTF_LOCAL
)
3251 rtm
->rtm_type
= RTN_LOCAL
;
3252 else if (rt
->dst
.dev
&& (rt
->dst
.dev
->flags
& IFF_LOOPBACK
))
3253 rtm
->rtm_type
= RTN_LOCAL
;
3255 rtm
->rtm_type
= RTN_UNICAST
;
3257 if (!netif_carrier_ok(rt
->dst
.dev
)) {
3258 rtm
->rtm_flags
|= RTNH_F_LINKDOWN
;
3259 if (rt
->rt6i_idev
->cnf
.ignore_routes_with_linkdown
)
3260 rtm
->rtm_flags
|= RTNH_F_DEAD
;
3262 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
3263 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
3264 if (rt
->rt6i_flags
& RTF_DYNAMIC
)
3265 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
3266 else if (rt
->rt6i_flags
& RTF_ADDRCONF
) {
3267 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ROUTEINFO
))
3268 rtm
->rtm_protocol
= RTPROT_RA
;
3270 rtm
->rtm_protocol
= RTPROT_KERNEL
;
3273 if (rt
->rt6i_flags
& RTF_CACHE
)
3274 rtm
->rtm_flags
|= RTM_F_CLONED
;
3277 if (nla_put_in6_addr(skb
, RTA_DST
, dst
))
3278 goto nla_put_failure
;
3279 rtm
->rtm_dst_len
= 128;
3280 } else if (rtm
->rtm_dst_len
)
3281 if (nla_put_in6_addr(skb
, RTA_DST
, &rt
->rt6i_dst
.addr
))
3282 goto nla_put_failure
;
3283 #ifdef CONFIG_IPV6_SUBTREES
3285 if (nla_put_in6_addr(skb
, RTA_SRC
, src
))
3286 goto nla_put_failure
;
3287 rtm
->rtm_src_len
= 128;
3288 } else if (rtm
->rtm_src_len
&&
3289 nla_put_in6_addr(skb
, RTA_SRC
, &rt
->rt6i_src
.addr
))
3290 goto nla_put_failure
;
3293 #ifdef CONFIG_IPV6_MROUTE
3294 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
3295 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
,
3302 goto nla_put_failure
;
3304 if (err
== -EMSGSIZE
)
3305 goto nla_put_failure
;
3310 if (nla_put_u32(skb
, RTA_IIF
, iif
))
3311 goto nla_put_failure
;
3313 struct in6_addr saddr_buf
;
3314 if (ip6_route_get_saddr(net
, rt
, dst
, 0, &saddr_buf
) == 0 &&
3315 nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3316 goto nla_put_failure
;
3319 if (rt
->rt6i_prefsrc
.plen
) {
3320 struct in6_addr saddr_buf
;
3321 saddr_buf
= rt
->rt6i_prefsrc
.addr
;
3322 if (nla_put_in6_addr(skb
, RTA_PREFSRC
, &saddr_buf
))
3323 goto nla_put_failure
;
3326 memcpy(metrics
, dst_metrics_ptr(&rt
->dst
), sizeof(metrics
));
3328 metrics
[RTAX_MTU
- 1] = rt
->rt6i_pmtu
;
3329 if (rtnetlink_put_metrics(skb
, metrics
) < 0)
3330 goto nla_put_failure
;
3332 if (rt
->rt6i_flags
& RTF_GATEWAY
) {
3333 if (nla_put_in6_addr(skb
, RTA_GATEWAY
, &rt
->rt6i_gateway
) < 0)
3334 goto nla_put_failure
;
3338 nla_put_u32(skb
, RTA_OIF
, rt
->dst
.dev
->ifindex
))
3339 goto nla_put_failure
;
3340 if (nla_put_u32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
))
3341 goto nla_put_failure
;
3343 expires
= (rt
->rt6i_flags
& RTF_EXPIRES
) ? rt
->dst
.expires
- jiffies
: 0;
3345 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, expires
, rt
->dst
.error
) < 0)
3346 goto nla_put_failure
;
3348 if (nla_put_u8(skb
, RTA_PREF
, IPV6_EXTRACT_PREF(rt
->rt6i_flags
)))
3349 goto nla_put_failure
;
3351 if (lwtunnel_fill_encap(skb
, rt
->dst
.lwtstate
) < 0)
3352 goto nla_put_failure
;
3354 nlmsg_end(skb
, nlh
);
3358 nlmsg_cancel(skb
, nlh
);
3362 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
3364 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
3367 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
3368 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
3369 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
3373 return rt6_fill_node(arg
->net
,
3374 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
3375 NETLINK_CB(arg
->cb
->skb
).portid
, arg
->cb
->nlh
->nlmsg_seq
,
3376 prefix
, 0, NLM_F_MULTI
);
3379 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
*nlh
)
3381 struct net
*net
= sock_net(in_skb
->sk
);
3382 struct nlattr
*tb
[RTA_MAX
+1];
3383 struct rt6_info
*rt
;
3384 struct sk_buff
*skb
;
3387 int err
, iif
= 0, oif
= 0;
3389 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
3394 memset(&fl6
, 0, sizeof(fl6
));
3395 rtm
= nlmsg_data(nlh
);
3396 fl6
.flowlabel
= ip6_make_flowinfo(rtm
->rtm_tos
, 0);
3399 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
3402 fl6
.saddr
= *(struct in6_addr
*)nla_data(tb
[RTA_SRC
]);
3406 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
3409 fl6
.daddr
= *(struct in6_addr
*)nla_data(tb
[RTA_DST
]);
3413 iif
= nla_get_u32(tb
[RTA_IIF
]);
3416 oif
= nla_get_u32(tb
[RTA_OIF
]);
3419 fl6
.flowi6_mark
= nla_get_u32(tb
[RTA_MARK
]);
3422 fl6
.flowi6_uid
= make_kuid(current_user_ns(),
3423 nla_get_u32(tb
[RTA_UID
]));
3425 fl6
.flowi6_uid
= iif
? INVALID_UID
: current_uid();
3428 struct net_device
*dev
;
3431 dev
= __dev_get_by_index(net
, iif
);
3437 fl6
.flowi6_iif
= iif
;
3439 if (!ipv6_addr_any(&fl6
.saddr
))
3440 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
3442 rt
= (struct rt6_info
*)ip6_route_input_lookup(net
, dev
, &fl6
,
3445 fl6
.flowi6_oif
= oif
;
3447 rt
= (struct rt6_info
*)ip6_route_output(net
, NULL
, &fl6
);
3450 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
3457 /* Reserve room for dummy headers, this skb can pass
3458 through good chunk of routing engine.
3460 skb_reset_mac_header(skb
);
3461 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
3463 skb_dst_set(skb
, &rt
->dst
);
3465 err
= rt6_fill_node(net
, skb
, rt
, &fl6
.daddr
, &fl6
.saddr
, iif
,
3466 RTM_NEWROUTE
, NETLINK_CB(in_skb
).portid
,
3467 nlh
->nlmsg_seq
, 0, 0, 0);
3473 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).portid
);
3478 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
,
3479 unsigned int nlm_flags
)
3481 struct sk_buff
*skb
;
3482 struct net
*net
= info
->nl_net
;
3487 seq
= info
->nlh
? info
->nlh
->nlmsg_seq
: 0;
3489 skb
= nlmsg_new(rt6_nlmsg_size(rt
), gfp_any());
3493 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
3494 event
, info
->portid
, seq
, 0, 0, nlm_flags
);
3496 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3497 WARN_ON(err
== -EMSGSIZE
);
3501 rtnl_notify(skb
, net
, info
->portid
, RTNLGRP_IPV6_ROUTE
,
3502 info
->nlh
, gfp_any());
3506 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
3509 static int ip6_route_dev_notify(struct notifier_block
*this,
3510 unsigned long event
, void *ptr
)
3512 struct net_device
*dev
= netdev_notifier_info_to_dev(ptr
);
3513 struct net
*net
= dev_net(dev
);
3515 if (!(dev
->flags
& IFF_LOOPBACK
))
3518 if (event
== NETDEV_REGISTER
) {
3519 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
3520 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
3521 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3522 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
3523 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
3524 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
3525 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
3527 } else if (event
== NETDEV_UNREGISTER
) {
3528 in6_dev_put(net
->ipv6
.ip6_null_entry
->rt6i_idev
);
3529 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3530 in6_dev_put(net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
);
3531 in6_dev_put(net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
);
3542 #ifdef CONFIG_PROC_FS
3544 static const struct file_operations ipv6_route_proc_fops
= {
3545 .owner
= THIS_MODULE
,
3546 .open
= ipv6_route_open
,
3548 .llseek
= seq_lseek
,
3549 .release
= seq_release_net
,
3552 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
3554 struct net
*net
= (struct net
*)seq
->private;
3555 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
3556 net
->ipv6
.rt6_stats
->fib_nodes
,
3557 net
->ipv6
.rt6_stats
->fib_route_nodes
,
3558 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
3559 net
->ipv6
.rt6_stats
->fib_rt_entries
,
3560 net
->ipv6
.rt6_stats
->fib_rt_cache
,
3561 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
3562 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
3567 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
3569 return single_open_net(inode
, file
, rt6_stats_seq_show
);
3572 static const struct file_operations rt6_stats_seq_fops
= {
3573 .owner
= THIS_MODULE
,
3574 .open
= rt6_stats_seq_open
,
3576 .llseek
= seq_lseek
,
3577 .release
= single_release_net
,
3579 #endif /* CONFIG_PROC_FS */
3581 #ifdef CONFIG_SYSCTL
3584 int ipv6_sysctl_rtcache_flush(struct ctl_table
*ctl
, int write
,
3585 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
3592 net
= (struct net
*)ctl
->extra1
;
3593 delay
= net
->ipv6
.sysctl
.flush_delay
;
3594 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
3595 fib6_run_gc(delay
<= 0 ? 0 : (unsigned long)delay
, net
, delay
> 0);
3599 struct ctl_table ipv6_route_table_template
[] = {
3601 .procname
= "flush",
3602 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
3603 .maxlen
= sizeof(int),
3605 .proc_handler
= ipv6_sysctl_rtcache_flush
3608 .procname
= "gc_thresh",
3609 .data
= &ip6_dst_ops_template
.gc_thresh
,
3610 .maxlen
= sizeof(int),
3612 .proc_handler
= proc_dointvec
,
3615 .procname
= "max_size",
3616 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
3617 .maxlen
= sizeof(int),
3619 .proc_handler
= proc_dointvec
,
3622 .procname
= "gc_min_interval",
3623 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3624 .maxlen
= sizeof(int),
3626 .proc_handler
= proc_dointvec_jiffies
,
3629 .procname
= "gc_timeout",
3630 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
3631 .maxlen
= sizeof(int),
3633 .proc_handler
= proc_dointvec_jiffies
,
3636 .procname
= "gc_interval",
3637 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
3638 .maxlen
= sizeof(int),
3640 .proc_handler
= proc_dointvec_jiffies
,
3643 .procname
= "gc_elasticity",
3644 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
3645 .maxlen
= sizeof(int),
3647 .proc_handler
= proc_dointvec
,
3650 .procname
= "mtu_expires",
3651 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
3652 .maxlen
= sizeof(int),
3654 .proc_handler
= proc_dointvec_jiffies
,
3657 .procname
= "min_adv_mss",
3658 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
3659 .maxlen
= sizeof(int),
3661 .proc_handler
= proc_dointvec
,
3664 .procname
= "gc_min_interval_ms",
3665 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
3666 .maxlen
= sizeof(int),
3668 .proc_handler
= proc_dointvec_ms_jiffies
,
3673 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
3675 struct ctl_table
*table
;
3677 table
= kmemdup(ipv6_route_table_template
,
3678 sizeof(ipv6_route_table_template
),
3682 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
3683 table
[0].extra1
= net
;
3684 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
3685 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
3686 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3687 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
3688 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
3689 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
3690 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
3691 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
3692 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
3694 /* Don't export sysctls to unprivileged users */
3695 if (net
->user_ns
!= &init_user_ns
)
3696 table
[0].procname
= NULL
;
3703 static int __net_init
ip6_route_net_init(struct net
*net
)
3707 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
3708 sizeof(net
->ipv6
.ip6_dst_ops
));
3710 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
3711 goto out_ip6_dst_ops
;
3713 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
3714 sizeof(*net
->ipv6
.ip6_null_entry
),
3716 if (!net
->ipv6
.ip6_null_entry
)
3717 goto out_ip6_dst_entries
;
3718 net
->ipv6
.ip6_null_entry
->dst
.path
=
3719 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
3720 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3721 dst_init_metrics(&net
->ipv6
.ip6_null_entry
->dst
,
3722 ip6_template_metrics
, true);
3724 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3725 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
3726 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
3728 if (!net
->ipv6
.ip6_prohibit_entry
)
3729 goto out_ip6_null_entry
;
3730 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
3731 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
3732 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3733 dst_init_metrics(&net
->ipv6
.ip6_prohibit_entry
->dst
,
3734 ip6_template_metrics
, true);
3736 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
3737 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
3739 if (!net
->ipv6
.ip6_blk_hole_entry
)
3740 goto out_ip6_prohibit_entry
;
3741 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
3742 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
3743 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
3744 dst_init_metrics(&net
->ipv6
.ip6_blk_hole_entry
->dst
,
3745 ip6_template_metrics
, true);
3748 net
->ipv6
.sysctl
.flush_delay
= 0;
3749 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
3750 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
3751 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
3752 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
3753 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
3754 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
3755 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
3757 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
3763 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3764 out_ip6_prohibit_entry
:
3765 kfree(net
->ipv6
.ip6_prohibit_entry
);
3767 kfree(net
->ipv6
.ip6_null_entry
);
3769 out_ip6_dst_entries
:
3770 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
3775 static void __net_exit
ip6_route_net_exit(struct net
*net
)
3777 kfree(net
->ipv6
.ip6_null_entry
);
3778 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3779 kfree(net
->ipv6
.ip6_prohibit_entry
);
3780 kfree(net
->ipv6
.ip6_blk_hole_entry
);
3782 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
3785 static int __net_init
ip6_route_net_init_late(struct net
*net
)
3787 #ifdef CONFIG_PROC_FS
3788 proc_create("ipv6_route", 0, net
->proc_net
, &ipv6_route_proc_fops
);
3789 proc_create("rt6_stats", S_IRUGO
, net
->proc_net
, &rt6_stats_seq_fops
);
3794 static void __net_exit
ip6_route_net_exit_late(struct net
*net
)
3796 #ifdef CONFIG_PROC_FS
3797 remove_proc_entry("ipv6_route", net
->proc_net
);
3798 remove_proc_entry("rt6_stats", net
->proc_net
);
3802 static struct pernet_operations ip6_route_net_ops
= {
3803 .init
= ip6_route_net_init
,
3804 .exit
= ip6_route_net_exit
,
3807 static int __net_init
ipv6_inetpeer_init(struct net
*net
)
3809 struct inet_peer_base
*bp
= kmalloc(sizeof(*bp
), GFP_KERNEL
);
3813 inet_peer_base_init(bp
);
3814 net
->ipv6
.peers
= bp
;
3818 static void __net_exit
ipv6_inetpeer_exit(struct net
*net
)
3820 struct inet_peer_base
*bp
= net
->ipv6
.peers
;
3822 net
->ipv6
.peers
= NULL
;
3823 inetpeer_invalidate_tree(bp
);
3827 static struct pernet_operations ipv6_inetpeer_ops
= {
3828 .init
= ipv6_inetpeer_init
,
3829 .exit
= ipv6_inetpeer_exit
,
3832 static struct pernet_operations ip6_route_net_late_ops
= {
3833 .init
= ip6_route_net_init_late
,
3834 .exit
= ip6_route_net_exit_late
,
3837 static struct notifier_block ip6_route_dev_notifier
= {
3838 .notifier_call
= ip6_route_dev_notify
,
3839 .priority
= ADDRCONF_NOTIFY_PRIORITY
- 10,
3842 void __init
ip6_route_init_special_entries(void)
3844 /* Registering of the loopback is done before this portion of code,
3845 * the loopback reference in rt6_info will not be taken, do it
3846 * manually for init_net */
3847 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
3848 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3849 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3850 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
3851 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3852 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
3853 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
3857 int __init
ip6_route_init(void)
3863 ip6_dst_ops_template
.kmem_cachep
=
3864 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
3865 SLAB_HWCACHE_ALIGN
, NULL
);
3866 if (!ip6_dst_ops_template
.kmem_cachep
)
3869 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
3871 goto out_kmem_cache
;
3873 ret
= register_pernet_subsys(&ipv6_inetpeer_ops
);
3875 goto out_dst_entries
;
3877 ret
= register_pernet_subsys(&ip6_route_net_ops
);
3879 goto out_register_inetpeer
;
3881 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
3885 goto out_register_subsys
;
3891 ret
= fib6_rules_init();
3895 ret
= register_pernet_subsys(&ip6_route_net_late_ops
);
3897 goto fib6_rules_init
;
3900 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
, NULL
) ||
3901 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
, NULL
) ||
3902 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
, NULL
))
3903 goto out_register_late_subsys
;
3905 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
3907 goto out_register_late_subsys
;
3909 for_each_possible_cpu(cpu
) {
3910 struct uncached_list
*ul
= per_cpu_ptr(&rt6_uncached_list
, cpu
);
3912 INIT_LIST_HEAD(&ul
->head
);
3913 spin_lock_init(&ul
->lock
);
3919 out_register_late_subsys
:
3920 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3922 fib6_rules_cleanup();
3927 out_register_subsys
:
3928 unregister_pernet_subsys(&ip6_route_net_ops
);
3929 out_register_inetpeer
:
3930 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3932 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3934 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
3938 void ip6_route_cleanup(void)
3940 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
3941 unregister_pernet_subsys(&ip6_route_net_late_ops
);
3942 fib6_rules_cleanup();
3945 unregister_pernet_subsys(&ipv6_inetpeer_ops
);
3946 unregister_pernet_subsys(&ip6_route_net_ops
);
3947 dst_entries_destroy(&ip6_dst_blackhole_ops
);
3948 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);