2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
24 * Fixed routing subtrees.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
);
78 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
);
79 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*);
80 static void ip6_dst_destroy(struct dst_entry
*);
81 static void ip6_dst_ifdown(struct dst_entry
*,
82 struct net_device
*dev
, int how
);
83 static int ip6_dst_gc(struct dst_ops
*ops
);
85 static int ip6_pkt_discard(struct sk_buff
*skb
);
86 static int ip6_pkt_discard_out(struct sk_buff
*skb
);
87 static void ip6_link_failure(struct sk_buff
*skb
);
88 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
92 struct in6_addr
*prefix
, int prefixlen
,
93 struct in6_addr
*gwaddr
, int ifindex
,
95 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
96 struct in6_addr
*prefix
, int prefixlen
,
97 struct in6_addr
*gwaddr
, int ifindex
);
100 static struct dst_ops ip6_dst_ops_template
= {
102 .protocol
= cpu_to_be16(ETH_P_IPV6
),
105 .check
= ip6_dst_check
,
106 .destroy
= ip6_dst_destroy
,
107 .ifdown
= ip6_dst_ifdown
,
108 .negative_advice
= ip6_negative_advice
,
109 .link_failure
= ip6_link_failure
,
110 .update_pmtu
= ip6_rt_update_pmtu
,
111 .local_out
= __ip6_local_out
,
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
118 static struct dst_ops ip6_dst_blackhole_ops
= {
120 .protocol
= cpu_to_be16(ETH_P_IPV6
),
121 .destroy
= ip6_dst_destroy
,
122 .check
= ip6_dst_check
,
123 .update_pmtu
= ip6_rt_blackhole_update_pmtu
,
126 static struct rt6_info ip6_null_entry_template
= {
128 .__refcnt
= ATOMIC_INIT(1),
131 .error
= -ENETUNREACH
,
132 .input
= ip6_pkt_discard
,
133 .output
= ip6_pkt_discard_out
,
135 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
136 .rt6i_protocol
= RTPROT_KERNEL
,
137 .rt6i_metric
= ~(u32
) 0,
138 .rt6i_ref
= ATOMIC_INIT(1),
141 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
143 static int ip6_pkt_prohibit(struct sk_buff
*skb
);
144 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
);
146 static struct rt6_info ip6_prohibit_entry_template
= {
148 .__refcnt
= ATOMIC_INIT(1),
152 .input
= ip6_pkt_prohibit
,
153 .output
= ip6_pkt_prohibit_out
,
155 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
156 .rt6i_protocol
= RTPROT_KERNEL
,
157 .rt6i_metric
= ~(u32
) 0,
158 .rt6i_ref
= ATOMIC_INIT(1),
161 static struct rt6_info ip6_blk_hole_entry_template
= {
163 .__refcnt
= ATOMIC_INIT(1),
167 .input
= dst_discard
,
168 .output
= dst_discard
,
170 .rt6i_flags
= (RTF_REJECT
| RTF_NONEXTHOP
),
171 .rt6i_protocol
= RTPROT_KERNEL
,
172 .rt6i_metric
= ~(u32
) 0,
173 .rt6i_ref
= ATOMIC_INIT(1),
178 /* allocate dst with ip6_dst_ops */
179 static inline struct rt6_info
*ip6_dst_alloc(struct dst_ops
*ops
)
181 return (struct rt6_info
*)dst_alloc(ops
);
184 static void ip6_dst_destroy(struct dst_entry
*dst
)
186 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
187 struct inet6_dev
*idev
= rt
->rt6i_idev
;
188 struct inet_peer
*peer
= rt
->rt6i_peer
;
191 rt
->rt6i_idev
= NULL
;
195 BUG_ON(!(rt
->rt6i_flags
& RTF_CACHE
));
196 rt
->rt6i_peer
= NULL
;
201 void rt6_bind_peer(struct rt6_info
*rt
, int create
)
203 struct inet_peer
*peer
;
205 if (WARN_ON(!(rt
->rt6i_flags
& RTF_CACHE
)))
208 peer
= inet_getpeer_v6(&rt
->rt6i_dst
.addr
, create
);
209 if (peer
&& cmpxchg(&rt
->rt6i_peer
, NULL
, peer
) != NULL
)
213 static void ip6_dst_ifdown(struct dst_entry
*dst
, struct net_device
*dev
,
216 struct rt6_info
*rt
= (struct rt6_info
*)dst
;
217 struct inet6_dev
*idev
= rt
->rt6i_idev
;
218 struct net_device
*loopback_dev
=
219 dev_net(dev
)->loopback_dev
;
221 if (dev
!= loopback_dev
&& idev
!= NULL
&& idev
->dev
== dev
) {
222 struct inet6_dev
*loopback_idev
=
223 in6_dev_get(loopback_dev
);
224 if (loopback_idev
!= NULL
) {
225 rt
->rt6i_idev
= loopback_idev
;
231 static __inline__
int rt6_check_expired(const struct rt6_info
*rt
)
233 return (rt
->rt6i_flags
& RTF_EXPIRES
) &&
234 time_after(jiffies
, rt
->rt6i_expires
);
237 static inline int rt6_need_strict(struct in6_addr
*daddr
)
239 return ipv6_addr_type(daddr
) &
240 (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LINKLOCAL
| IPV6_ADDR_LOOPBACK
);
244 * Route lookup. Any table->tb6_lock is implied.
247 static inline struct rt6_info
*rt6_device_match(struct net
*net
,
249 struct in6_addr
*saddr
,
253 struct rt6_info
*local
= NULL
;
254 struct rt6_info
*sprt
;
256 if (!oif
&& ipv6_addr_any(saddr
))
259 for (sprt
= rt
; sprt
; sprt
= sprt
->dst
.rt6_next
) {
260 struct net_device
*dev
= sprt
->rt6i_dev
;
263 if (dev
->ifindex
== oif
)
265 if (dev
->flags
& IFF_LOOPBACK
) {
266 if (sprt
->rt6i_idev
== NULL
||
267 sprt
->rt6i_idev
->dev
->ifindex
!= oif
) {
268 if (flags
& RT6_LOOKUP_F_IFACE
&& oif
)
270 if (local
&& (!oif
||
271 local
->rt6i_idev
->dev
->ifindex
== oif
))
277 if (ipv6_chk_addr(net
, saddr
, dev
,
278 flags
& RT6_LOOKUP_F_IFACE
))
287 if (flags
& RT6_LOOKUP_F_IFACE
)
288 return net
->ipv6
.ip6_null_entry
;
294 #ifdef CONFIG_IPV6_ROUTER_PREF
295 static void rt6_probe(struct rt6_info
*rt
)
297 struct neighbour
*neigh
= rt
? rt
->rt6i_nexthop
: NULL
;
299 * Okay, this does not seem to be appropriate
300 * for now, however, we need to check if it
301 * is really so; aka Router Reachability Probing.
303 * Router Reachability Probe MUST be rate-limited
304 * to no more than one per minute.
306 if (!neigh
|| (neigh
->nud_state
& NUD_VALID
))
308 read_lock_bh(&neigh
->lock
);
309 if (!(neigh
->nud_state
& NUD_VALID
) &&
310 time_after(jiffies
, neigh
->updated
+ rt
->rt6i_idev
->cnf
.rtr_probe_interval
)) {
311 struct in6_addr mcaddr
;
312 struct in6_addr
*target
;
314 neigh
->updated
= jiffies
;
315 read_unlock_bh(&neigh
->lock
);
317 target
= (struct in6_addr
*)&neigh
->primary_key
;
318 addrconf_addr_solict_mult(target
, &mcaddr
);
319 ndisc_send_ns(rt
->rt6i_dev
, NULL
, target
, &mcaddr
, NULL
);
321 read_unlock_bh(&neigh
->lock
);
324 static inline void rt6_probe(struct rt6_info
*rt
)
330 * Default Router Selection (RFC 2461 6.3.6)
332 static inline int rt6_check_dev(struct rt6_info
*rt
, int oif
)
334 struct net_device
*dev
= rt
->rt6i_dev
;
335 if (!oif
|| dev
->ifindex
== oif
)
337 if ((dev
->flags
& IFF_LOOPBACK
) &&
338 rt
->rt6i_idev
&& rt
->rt6i_idev
->dev
->ifindex
== oif
)
343 static inline int rt6_check_neigh(struct rt6_info
*rt
)
345 struct neighbour
*neigh
= rt
->rt6i_nexthop
;
347 if (rt
->rt6i_flags
& RTF_NONEXTHOP
||
348 !(rt
->rt6i_flags
& RTF_GATEWAY
))
351 read_lock_bh(&neigh
->lock
);
352 if (neigh
->nud_state
& NUD_VALID
)
354 #ifdef CONFIG_IPV6_ROUTER_PREF
355 else if (neigh
->nud_state
& NUD_FAILED
)
360 read_unlock_bh(&neigh
->lock
);
366 static int rt6_score_route(struct rt6_info
*rt
, int oif
,
371 m
= rt6_check_dev(rt
, oif
);
372 if (!m
&& (strict
& RT6_LOOKUP_F_IFACE
))
374 #ifdef CONFIG_IPV6_ROUTER_PREF
375 m
|= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt
->rt6i_flags
)) << 2;
377 n
= rt6_check_neigh(rt
);
378 if (!n
&& (strict
& RT6_LOOKUP_F_REACHABLE
))
383 static struct rt6_info
*find_match(struct rt6_info
*rt
, int oif
, int strict
,
384 int *mpri
, struct rt6_info
*match
)
388 if (rt6_check_expired(rt
))
391 m
= rt6_score_route(rt
, oif
, strict
);
396 if (strict
& RT6_LOOKUP_F_REACHABLE
)
400 } else if (strict
& RT6_LOOKUP_F_REACHABLE
) {
408 static struct rt6_info
*find_rr_leaf(struct fib6_node
*fn
,
409 struct rt6_info
*rr_head
,
410 u32 metric
, int oif
, int strict
)
412 struct rt6_info
*rt
, *match
;
416 for (rt
= rr_head
; rt
&& rt
->rt6i_metric
== metric
;
417 rt
= rt
->dst
.rt6_next
)
418 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
419 for (rt
= fn
->leaf
; rt
&& rt
!= rr_head
&& rt
->rt6i_metric
== metric
;
420 rt
= rt
->dst
.rt6_next
)
421 match
= find_match(rt
, oif
, strict
, &mpri
, match
);
426 static struct rt6_info
*rt6_select(struct fib6_node
*fn
, int oif
, int strict
)
428 struct rt6_info
*match
, *rt0
;
431 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
432 __func__
, fn
->leaf
, oif
);
436 fn
->rr_ptr
= rt0
= fn
->leaf
;
438 match
= find_rr_leaf(fn
, rt0
, rt0
->rt6i_metric
, oif
, strict
);
441 (strict
& RT6_LOOKUP_F_REACHABLE
)) {
442 struct rt6_info
*next
= rt0
->dst
.rt6_next
;
444 /* no entries matched; do round-robin */
445 if (!next
|| next
->rt6i_metric
!= rt0
->rt6i_metric
)
452 RT6_TRACE("%s() => %p\n",
455 net
= dev_net(rt0
->rt6i_dev
);
456 return match
? match
: net
->ipv6
.ip6_null_entry
;
459 #ifdef CONFIG_IPV6_ROUTE_INFO
460 int rt6_route_rcv(struct net_device
*dev
, u8
*opt
, int len
,
461 struct in6_addr
*gwaddr
)
463 struct net
*net
= dev_net(dev
);
464 struct route_info
*rinfo
= (struct route_info
*) opt
;
465 struct in6_addr prefix_buf
, *prefix
;
467 unsigned long lifetime
;
470 if (len
< sizeof(struct route_info
)) {
474 /* Sanity check for prefix_len and length */
475 if (rinfo
->length
> 3) {
477 } else if (rinfo
->prefix_len
> 128) {
479 } else if (rinfo
->prefix_len
> 64) {
480 if (rinfo
->length
< 2) {
483 } else if (rinfo
->prefix_len
> 0) {
484 if (rinfo
->length
< 1) {
489 pref
= rinfo
->route_pref
;
490 if (pref
== ICMPV6_ROUTER_PREF_INVALID
)
493 lifetime
= addrconf_timeout_fixup(ntohl(rinfo
->lifetime
), HZ
);
495 if (rinfo
->length
== 3)
496 prefix
= (struct in6_addr
*)rinfo
->prefix
;
498 /* this function is safe */
499 ipv6_addr_prefix(&prefix_buf
,
500 (struct in6_addr
*)rinfo
->prefix
,
502 prefix
= &prefix_buf
;
505 rt
= rt6_get_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
,
508 if (rt
&& !lifetime
) {
514 rt
= rt6_add_route_info(net
, prefix
, rinfo
->prefix_len
, gwaddr
, dev
->ifindex
,
517 rt
->rt6i_flags
= RTF_ROUTEINFO
|
518 (rt
->rt6i_flags
& ~RTF_PREF_MASK
) | RTF_PREF(pref
);
521 if (!addrconf_finite_timeout(lifetime
)) {
522 rt
->rt6i_flags
&= ~RTF_EXPIRES
;
524 rt
->rt6i_expires
= jiffies
+ HZ
* lifetime
;
525 rt
->rt6i_flags
|= RTF_EXPIRES
;
527 dst_release(&rt
->dst
);
533 #define BACKTRACK(__net, saddr) \
535 if (rt == __net->ipv6.ip6_null_entry) { \
536 struct fib6_node *pn; \
538 if (fn->fn_flags & RTN_TL_ROOT) \
541 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
542 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
545 if (fn->fn_flags & RTN_RTINFO) \
551 static struct rt6_info
*ip6_pol_route_lookup(struct net
*net
,
552 struct fib6_table
*table
,
553 struct flowi
*fl
, int flags
)
555 struct fib6_node
*fn
;
558 read_lock_bh(&table
->tb6_lock
);
559 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
562 rt
= rt6_device_match(net
, rt
, &fl
->fl6_src
, fl
->oif
, flags
);
563 BACKTRACK(net
, &fl
->fl6_src
);
565 dst_use(&rt
->dst
, jiffies
);
566 read_unlock_bh(&table
->tb6_lock
);
571 struct rt6_info
*rt6_lookup(struct net
*net
, const struct in6_addr
*daddr
,
572 const struct in6_addr
*saddr
, int oif
, int strict
)
578 struct dst_entry
*dst
;
579 int flags
= strict
? RT6_LOOKUP_F_IFACE
: 0;
582 memcpy(&fl
.fl6_src
, saddr
, sizeof(*saddr
));
583 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
586 dst
= fib6_rule_lookup(net
, &fl
, flags
, ip6_pol_route_lookup
);
588 return (struct rt6_info
*) dst
;
595 EXPORT_SYMBOL(rt6_lookup
);
597 /* ip6_ins_rt is called with FREE table->tb6_lock.
598 It takes new route entry, the addition fails by any reason the
599 route is freed. In any case, if caller does not hold it, it may
603 static int __ip6_ins_rt(struct rt6_info
*rt
, struct nl_info
*info
)
606 struct fib6_table
*table
;
608 table
= rt
->rt6i_table
;
609 write_lock_bh(&table
->tb6_lock
);
610 err
= fib6_add(&table
->tb6_root
, rt
, info
);
611 write_unlock_bh(&table
->tb6_lock
);
616 int ip6_ins_rt(struct rt6_info
*rt
)
618 struct nl_info info
= {
619 .nl_net
= dev_net(rt
->rt6i_dev
),
621 return __ip6_ins_rt(rt
, &info
);
624 static struct rt6_info
*rt6_alloc_cow(struct rt6_info
*ort
, struct in6_addr
*daddr
,
625 struct in6_addr
*saddr
)
633 rt
= ip6_rt_copy(ort
);
636 struct neighbour
*neigh
;
637 int attempts
= !in_softirq();
639 if (!(rt
->rt6i_flags
&RTF_GATEWAY
)) {
640 if (rt
->rt6i_dst
.plen
!= 128 &&
641 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, daddr
))
642 rt
->rt6i_flags
|= RTF_ANYCAST
;
643 ipv6_addr_copy(&rt
->rt6i_gateway
, daddr
);
646 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
647 rt
->rt6i_dst
.plen
= 128;
648 rt
->rt6i_flags
|= RTF_CACHE
;
649 rt
->dst
.flags
|= DST_HOST
;
651 #ifdef CONFIG_IPV6_SUBTREES
652 if (rt
->rt6i_src
.plen
&& saddr
) {
653 ipv6_addr_copy(&rt
->rt6i_src
.addr
, saddr
);
654 rt
->rt6i_src
.plen
= 128;
659 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
661 struct net
*net
= dev_net(rt
->rt6i_dev
);
662 int saved_rt_min_interval
=
663 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
664 int saved_rt_elasticity
=
665 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
667 if (attempts
-- > 0) {
668 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 1;
669 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= 0;
671 ip6_dst_gc(&net
->ipv6
.ip6_dst_ops
);
673 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
=
675 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
=
676 saved_rt_min_interval
;
682 "ipv6: Neighbour table overflow.\n");
686 rt
->rt6i_nexthop
= neigh
;
693 static struct rt6_info
*rt6_alloc_clone(struct rt6_info
*ort
, struct in6_addr
*daddr
)
695 struct rt6_info
*rt
= ip6_rt_copy(ort
);
697 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, daddr
);
698 rt
->rt6i_dst
.plen
= 128;
699 rt
->rt6i_flags
|= RTF_CACHE
;
700 rt
->dst
.flags
|= DST_HOST
;
701 rt
->rt6i_nexthop
= neigh_clone(ort
->rt6i_nexthop
);
706 static struct rt6_info
*ip6_pol_route(struct net
*net
, struct fib6_table
*table
, int oif
,
707 struct flowi
*fl
, int flags
)
709 struct fib6_node
*fn
;
710 struct rt6_info
*rt
, *nrt
;
714 int reachable
= net
->ipv6
.devconf_all
->forwarding
? 0 : RT6_LOOKUP_F_REACHABLE
;
716 strict
|= flags
& RT6_LOOKUP_F_IFACE
;
719 read_lock_bh(&table
->tb6_lock
);
722 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
725 rt
= rt6_select(fn
, oif
, strict
| reachable
);
727 BACKTRACK(net
, &fl
->fl6_src
);
728 if (rt
== net
->ipv6
.ip6_null_entry
||
729 rt
->rt6i_flags
& RTF_CACHE
)
733 read_unlock_bh(&table
->tb6_lock
);
735 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
736 nrt
= rt6_alloc_cow(rt
, &fl
->fl6_dst
, &fl
->fl6_src
);
738 #if CLONE_OFFLINK_ROUTE
739 nrt
= rt6_alloc_clone(rt
, &fl
->fl6_dst
);
745 dst_release(&rt
->dst
);
746 rt
= nrt
? : net
->ipv6
.ip6_null_entry
;
750 err
= ip6_ins_rt(nrt
);
759 * Race condition! In the gap, when table->tb6_lock was
760 * released someone could insert this route. Relookup.
762 dst_release(&rt
->dst
);
771 read_unlock_bh(&table
->tb6_lock
);
773 rt
->dst
.lastuse
= jiffies
;
779 static struct rt6_info
*ip6_pol_route_input(struct net
*net
, struct fib6_table
*table
,
780 struct flowi
*fl
, int flags
)
782 return ip6_pol_route(net
, table
, fl
->iif
, fl
, flags
);
785 void ip6_route_input(struct sk_buff
*skb
)
787 struct ipv6hdr
*iph
= ipv6_hdr(skb
);
788 struct net
*net
= dev_net(skb
->dev
);
789 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
791 .iif
= skb
->dev
->ifindex
,
792 .fl6_dst
= iph
->daddr
,
793 .fl6_src
= iph
->saddr
,
794 .fl6_flowlabel
= (* (__be32
*) iph
)&IPV6_FLOWINFO_MASK
,
796 .proto
= iph
->nexthdr
,
799 if (rt6_need_strict(&iph
->daddr
) && skb
->dev
->type
!= ARPHRD_PIMREG
)
800 flags
|= RT6_LOOKUP_F_IFACE
;
802 skb_dst_set(skb
, fib6_rule_lookup(net
, &fl
, flags
, ip6_pol_route_input
));
805 static struct rt6_info
*ip6_pol_route_output(struct net
*net
, struct fib6_table
*table
,
806 struct flowi
*fl
, int flags
)
808 return ip6_pol_route(net
, table
, fl
->oif
, fl
, flags
);
811 struct dst_entry
* ip6_route_output(struct net
*net
, struct sock
*sk
,
816 if ((sk
&& sk
->sk_bound_dev_if
) || rt6_need_strict(&fl
->fl6_dst
))
817 flags
|= RT6_LOOKUP_F_IFACE
;
819 if (!ipv6_addr_any(&fl
->fl6_src
))
820 flags
|= RT6_LOOKUP_F_HAS_SADDR
;
822 flags
|= rt6_srcprefs2flags(inet6_sk(sk
)->srcprefs
);
824 return fib6_rule_lookup(net
, fl
, flags
, ip6_pol_route_output
);
827 EXPORT_SYMBOL(ip6_route_output
);
829 int ip6_dst_blackhole(struct sock
*sk
, struct dst_entry
**dstp
, struct flowi
*fl
)
831 struct rt6_info
*ort
= (struct rt6_info
*) *dstp
;
832 struct rt6_info
*rt
= (struct rt6_info
*)
833 dst_alloc(&ip6_dst_blackhole_ops
);
834 struct dst_entry
*new = NULL
;
839 atomic_set(&new->__refcnt
, 1);
841 new->input
= dst_discard
;
842 new->output
= dst_discard
;
844 dst_copy_metrics(new, &ort
->dst
);
845 new->dev
= ort
->dst
.dev
;
848 rt
->rt6i_idev
= ort
->rt6i_idev
;
850 in6_dev_hold(rt
->rt6i_idev
);
851 rt
->rt6i_expires
= 0;
853 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
854 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
857 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
858 #ifdef CONFIG_IPV6_SUBTREES
859 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
867 return new ? 0 : -ENOMEM
;
869 EXPORT_SYMBOL_GPL(ip6_dst_blackhole
);
872 * Destination cache support functions
875 static struct dst_entry
*ip6_dst_check(struct dst_entry
*dst
, u32 cookie
)
879 rt
= (struct rt6_info
*) dst
;
881 if (rt
->rt6i_node
&& (rt
->rt6i_node
->fn_sernum
== cookie
))
887 static struct dst_entry
*ip6_negative_advice(struct dst_entry
*dst
)
889 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
892 if (rt
->rt6i_flags
& RTF_CACHE
) {
893 if (rt6_check_expired(rt
)) {
905 static void ip6_link_failure(struct sk_buff
*skb
)
909 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, ICMPV6_ADDR_UNREACH
, 0);
911 rt
= (struct rt6_info
*) skb_dst(skb
);
913 if (rt
->rt6i_flags
&RTF_CACHE
) {
914 dst_set_expires(&rt
->dst
, 0);
915 rt
->rt6i_flags
|= RTF_EXPIRES
;
916 } else if (rt
->rt6i_node
&& (rt
->rt6i_flags
& RTF_DEFAULT
))
917 rt
->rt6i_node
->fn_sernum
= -1;
921 static void ip6_rt_update_pmtu(struct dst_entry
*dst
, u32 mtu
)
923 struct rt6_info
*rt6
= (struct rt6_info
*)dst
;
925 if (mtu
< dst_mtu(dst
) && rt6
->rt6i_dst
.plen
== 128) {
926 rt6
->rt6i_flags
|= RTF_MODIFIED
;
927 if (mtu
< IPV6_MIN_MTU
) {
928 u32 features
= dst_metric(dst
, RTAX_FEATURES
);
930 features
|= RTAX_FEATURE_ALLFRAG
;
931 dst_metric_set(dst
, RTAX_FEATURES
, features
);
933 dst_metric_set(dst
, RTAX_MTU
, mtu
);
934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE
, dst
);
938 static int ipv6_get_mtu(struct net_device
*dev
);
940 static inline unsigned int ipv6_advmss(struct net
*net
, unsigned int mtu
)
942 mtu
-= sizeof(struct ipv6hdr
) + sizeof(struct tcphdr
);
944 if (mtu
< net
->ipv6
.sysctl
.ip6_rt_min_advmss
)
945 mtu
= net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
951 * rely only on pmtu discovery"
953 if (mtu
> IPV6_MAXPLEN
- sizeof(struct tcphdr
))
958 static struct dst_entry
*icmp6_dst_gc_list
;
959 static DEFINE_SPINLOCK(icmp6_dst_lock
);
961 struct dst_entry
*icmp6_dst_alloc(struct net_device
*dev
,
962 struct neighbour
*neigh
,
963 const struct in6_addr
*addr
)
966 struct inet6_dev
*idev
= in6_dev_get(dev
);
967 struct net
*net
= dev_net(dev
);
969 if (unlikely(idev
== NULL
))
972 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
);
973 if (unlikely(rt
== NULL
)) {
982 neigh
= ndisc_get_neigh(dev
, addr
);
988 rt
->rt6i_idev
= idev
;
989 rt
->rt6i_nexthop
= neigh
;
990 atomic_set(&rt
->dst
.__refcnt
, 1);
991 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, 255);
992 dst_metric_set(&rt
->dst
, RTAX_MTU
, ipv6_get_mtu(rt
->rt6i_dev
));
993 dst_metric_set(&rt
->dst
, RTAX_ADVMSS
, ipv6_advmss(net
, dst_mtu(&rt
->dst
)));
994 rt
->dst
.output
= ip6_output
;
996 #if 0 /* there's no chance to use these for ndisc */
997 rt
->dst
.flags
= ipv6_addr_type(addr
) & IPV6_ADDR_UNICAST
1000 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1001 rt
->rt6i_dst
.plen
= 128;
1004 spin_lock_bh(&icmp6_dst_lock
);
1005 rt
->dst
.next
= icmp6_dst_gc_list
;
1006 icmp6_dst_gc_list
= &rt
->dst
;
1007 spin_unlock_bh(&icmp6_dst_lock
);
1009 fib6_force_start_gc(net
);
1015 int icmp6_dst_gc(void)
1017 struct dst_entry
*dst
, *next
, **pprev
;
1022 spin_lock_bh(&icmp6_dst_lock
);
1023 pprev
= &icmp6_dst_gc_list
;
1025 while ((dst
= *pprev
) != NULL
) {
1026 if (!atomic_read(&dst
->__refcnt
)) {
1035 spin_unlock_bh(&icmp6_dst_lock
);
1040 static void icmp6_clean_all(int (*func
)(struct rt6_info
*rt
, void *arg
),
1043 struct dst_entry
*dst
, **pprev
;
1045 spin_lock_bh(&icmp6_dst_lock
);
1046 pprev
= &icmp6_dst_gc_list
;
1047 while ((dst
= *pprev
) != NULL
) {
1048 struct rt6_info
*rt
= (struct rt6_info
*) dst
;
1049 if (func(rt
, arg
)) {
1056 spin_unlock_bh(&icmp6_dst_lock
);
1059 static int ip6_dst_gc(struct dst_ops
*ops
)
1061 unsigned long now
= jiffies
;
1062 struct net
*net
= container_of(ops
, struct net
, ipv6
.ip6_dst_ops
);
1063 int rt_min_interval
= net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
1064 int rt_max_size
= net
->ipv6
.sysctl
.ip6_rt_max_size
;
1065 int rt_elasticity
= net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
1066 int rt_gc_timeout
= net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
1067 unsigned long rt_last_gc
= net
->ipv6
.ip6_rt_last_gc
;
1070 entries
= dst_entries_get_fast(ops
);
1071 if (time_after(rt_last_gc
+ rt_min_interval
, now
) &&
1072 entries
<= rt_max_size
)
1075 net
->ipv6
.ip6_rt_gc_expire
++;
1076 fib6_run_gc(net
->ipv6
.ip6_rt_gc_expire
, net
);
1077 net
->ipv6
.ip6_rt_last_gc
= now
;
1078 entries
= dst_entries_get_slow(ops
);
1079 if (entries
< ops
->gc_thresh
)
1080 net
->ipv6
.ip6_rt_gc_expire
= rt_gc_timeout
>>1;
1082 net
->ipv6
.ip6_rt_gc_expire
-= net
->ipv6
.ip6_rt_gc_expire
>>rt_elasticity
;
1083 return entries
> rt_max_size
;
1086 /* Clean host part of a prefix. Not necessary in radix tree,
1087 but results in cleaner routing tables.
1089 Remove it only when all the things will work!
1092 static int ipv6_get_mtu(struct net_device
*dev
)
1094 int mtu
= IPV6_MIN_MTU
;
1095 struct inet6_dev
*idev
;
1098 idev
= __in6_dev_get(dev
);
1100 mtu
= idev
->cnf
.mtu6
;
1105 int ip6_dst_hoplimit(struct dst_entry
*dst
)
1107 int hoplimit
= dst_metric(dst
, RTAX_HOPLIMIT
);
1109 struct net_device
*dev
= dst
->dev
;
1110 struct inet6_dev
*idev
;
1113 idev
= __in6_dev_get(dev
);
1115 hoplimit
= idev
->cnf
.hop_limit
;
1117 hoplimit
= dev_net(dev
)->ipv6
.devconf_all
->hop_limit
;
1127 int ip6_route_add(struct fib6_config
*cfg
)
1130 struct net
*net
= cfg
->fc_nlinfo
.nl_net
;
1131 struct rt6_info
*rt
= NULL
;
1132 struct net_device
*dev
= NULL
;
1133 struct inet6_dev
*idev
= NULL
;
1134 struct fib6_table
*table
;
1137 if (cfg
->fc_dst_len
> 128 || cfg
->fc_src_len
> 128)
1139 #ifndef CONFIG_IPV6_SUBTREES
1140 if (cfg
->fc_src_len
)
1143 if (cfg
->fc_ifindex
) {
1145 dev
= dev_get_by_index(net
, cfg
->fc_ifindex
);
1148 idev
= in6_dev_get(dev
);
1153 if (cfg
->fc_metric
== 0)
1154 cfg
->fc_metric
= IP6_RT_PRIO_USER
;
1156 table
= fib6_new_table(net
, cfg
->fc_table
);
1157 if (table
== NULL
) {
1162 rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
);
1169 rt
->dst
.obsolete
= -1;
1170 rt
->rt6i_expires
= (cfg
->fc_flags
& RTF_EXPIRES
) ?
1171 jiffies
+ clock_t_to_jiffies(cfg
->fc_expires
) :
1174 if (cfg
->fc_protocol
== RTPROT_UNSPEC
)
1175 cfg
->fc_protocol
= RTPROT_BOOT
;
1176 rt
->rt6i_protocol
= cfg
->fc_protocol
;
1178 addr_type
= ipv6_addr_type(&cfg
->fc_dst
);
1180 if (addr_type
& IPV6_ADDR_MULTICAST
)
1181 rt
->dst
.input
= ip6_mc_input
;
1182 else if (cfg
->fc_flags
& RTF_LOCAL
)
1183 rt
->dst
.input
= ip6_input
;
1185 rt
->dst
.input
= ip6_forward
;
1187 rt
->dst
.output
= ip6_output
;
1189 ipv6_addr_prefix(&rt
->rt6i_dst
.addr
, &cfg
->fc_dst
, cfg
->fc_dst_len
);
1190 rt
->rt6i_dst
.plen
= cfg
->fc_dst_len
;
1191 if (rt
->rt6i_dst
.plen
== 128)
1192 rt
->dst
.flags
= DST_HOST
;
1194 #ifdef CONFIG_IPV6_SUBTREES
1195 ipv6_addr_prefix(&rt
->rt6i_src
.addr
, &cfg
->fc_src
, cfg
->fc_src_len
);
1196 rt
->rt6i_src
.plen
= cfg
->fc_src_len
;
1199 rt
->rt6i_metric
= cfg
->fc_metric
;
1201 /* We cannot add true routes via loopback here,
1202 they would result in kernel looping; promote them to reject routes
1204 if ((cfg
->fc_flags
& RTF_REJECT
) ||
1205 (dev
&& (dev
->flags
&IFF_LOOPBACK
) && !(addr_type
&IPV6_ADDR_LOOPBACK
)
1206 && !(cfg
->fc_flags
&RTF_LOCAL
))) {
1207 /* hold loopback dev/idev if we haven't done so. */
1208 if (dev
!= net
->loopback_dev
) {
1213 dev
= net
->loopback_dev
;
1215 idev
= in6_dev_get(dev
);
1221 rt
->dst
.output
= ip6_pkt_discard_out
;
1222 rt
->dst
.input
= ip6_pkt_discard
;
1223 rt
->dst
.error
= -ENETUNREACH
;
1224 rt
->rt6i_flags
= RTF_REJECT
|RTF_NONEXTHOP
;
1228 if (cfg
->fc_flags
& RTF_GATEWAY
) {
1229 struct in6_addr
*gw_addr
;
1232 gw_addr
= &cfg
->fc_gateway
;
1233 ipv6_addr_copy(&rt
->rt6i_gateway
, gw_addr
);
1234 gwa_type
= ipv6_addr_type(gw_addr
);
1236 if (gwa_type
!= (IPV6_ADDR_LINKLOCAL
|IPV6_ADDR_UNICAST
)) {
1237 struct rt6_info
*grt
;
1239 /* IPv6 strictly inhibits using not link-local
1240 addresses as nexthop address.
1241 Otherwise, router will not able to send redirects.
1242 It is very good, but in some (rare!) circumstances
1243 (SIT, PtP, NBMA NOARP links) it is handy to allow
1244 some exceptions. --ANK
1247 if (!(gwa_type
&IPV6_ADDR_UNICAST
))
1250 grt
= rt6_lookup(net
, gw_addr
, NULL
, cfg
->fc_ifindex
, 1);
1252 err
= -EHOSTUNREACH
;
1256 if (dev
!= grt
->rt6i_dev
) {
1257 dst_release(&grt
->dst
);
1261 dev
= grt
->rt6i_dev
;
1262 idev
= grt
->rt6i_idev
;
1264 in6_dev_hold(grt
->rt6i_idev
);
1266 if (!(grt
->rt6i_flags
&RTF_GATEWAY
))
1268 dst_release(&grt
->dst
);
1274 if (dev
== NULL
|| (dev
->flags
&IFF_LOOPBACK
))
1282 if (cfg
->fc_flags
& (RTF_GATEWAY
| RTF_NONEXTHOP
)) {
1283 rt
->rt6i_nexthop
= __neigh_lookup_errno(&nd_tbl
, &rt
->rt6i_gateway
, dev
);
1284 if (IS_ERR(rt
->rt6i_nexthop
)) {
1285 err
= PTR_ERR(rt
->rt6i_nexthop
);
1286 rt
->rt6i_nexthop
= NULL
;
1291 rt
->rt6i_flags
= cfg
->fc_flags
;
1298 nla_for_each_attr(nla
, cfg
->fc_mx
, cfg
->fc_mx_len
, remaining
) {
1299 int type
= nla_type(nla
);
1302 if (type
> RTAX_MAX
) {
1307 dst_metric_set(&rt
->dst
, type
, nla_get_u32(nla
));
1312 if (dst_metric(&rt
->dst
, RTAX_HOPLIMIT
) == 0)
1313 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, -1);
1314 if (!dst_mtu(&rt
->dst
))
1315 dst_metric_set(&rt
->dst
, RTAX_MTU
, ipv6_get_mtu(dev
));
1316 if (!dst_metric(&rt
->dst
, RTAX_ADVMSS
))
1317 dst_metric_set(&rt
->dst
, RTAX_ADVMSS
, ipv6_advmss(net
, dst_mtu(&rt
->dst
)));
1319 rt
->rt6i_idev
= idev
;
1320 rt
->rt6i_table
= table
;
1322 cfg
->fc_nlinfo
.nl_net
= dev_net(dev
);
1324 return __ip6_ins_rt(rt
, &cfg
->fc_nlinfo
);
1336 static int __ip6_del_rt(struct rt6_info
*rt
, struct nl_info
*info
)
1339 struct fib6_table
*table
;
1340 struct net
*net
= dev_net(rt
->rt6i_dev
);
1342 if (rt
== net
->ipv6
.ip6_null_entry
)
1345 table
= rt
->rt6i_table
;
1346 write_lock_bh(&table
->tb6_lock
);
1348 err
= fib6_del(rt
, info
);
1349 dst_release(&rt
->dst
);
1351 write_unlock_bh(&table
->tb6_lock
);
1356 int ip6_del_rt(struct rt6_info
*rt
)
1358 struct nl_info info
= {
1359 .nl_net
= dev_net(rt
->rt6i_dev
),
1361 return __ip6_del_rt(rt
, &info
);
1364 static int ip6_route_del(struct fib6_config
*cfg
)
1366 struct fib6_table
*table
;
1367 struct fib6_node
*fn
;
1368 struct rt6_info
*rt
;
1371 table
= fib6_get_table(cfg
->fc_nlinfo
.nl_net
, cfg
->fc_table
);
1375 read_lock_bh(&table
->tb6_lock
);
1377 fn
= fib6_locate(&table
->tb6_root
,
1378 &cfg
->fc_dst
, cfg
->fc_dst_len
,
1379 &cfg
->fc_src
, cfg
->fc_src_len
);
1382 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1383 if (cfg
->fc_ifindex
&&
1384 (rt
->rt6i_dev
== NULL
||
1385 rt
->rt6i_dev
->ifindex
!= cfg
->fc_ifindex
))
1387 if (cfg
->fc_flags
& RTF_GATEWAY
&&
1388 !ipv6_addr_equal(&cfg
->fc_gateway
, &rt
->rt6i_gateway
))
1390 if (cfg
->fc_metric
&& cfg
->fc_metric
!= rt
->rt6i_metric
)
1393 read_unlock_bh(&table
->tb6_lock
);
1395 return __ip6_del_rt(rt
, &cfg
->fc_nlinfo
);
1398 read_unlock_bh(&table
->tb6_lock
);
1406 struct ip6rd_flowi
{
1408 struct in6_addr gateway
;
1411 static struct rt6_info
*__ip6_route_redirect(struct net
*net
,
1412 struct fib6_table
*table
,
1416 struct ip6rd_flowi
*rdfl
= (struct ip6rd_flowi
*)fl
;
1417 struct rt6_info
*rt
;
1418 struct fib6_node
*fn
;
1421 * Get the "current" route for this destination and
1422 * check if the redirect has come from approriate router.
1424 * RFC 2461 specifies that redirects should only be
1425 * accepted if they come from the nexthop to the target.
1426 * Due to the way the routes are chosen, this notion
1427 * is a bit fuzzy and one might need to check all possible
1431 read_lock_bh(&table
->tb6_lock
);
1432 fn
= fib6_lookup(&table
->tb6_root
, &fl
->fl6_dst
, &fl
->fl6_src
);
1434 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1436 * Current route is on-link; redirect is always invalid.
1438 * Seems, previous statement is not true. It could
1439 * be node, which looks for us as on-link (f.e. proxy ndisc)
1440 * But then router serving it might decide, that we should
1441 * know truth 8)8) --ANK (980726).
1443 if (rt6_check_expired(rt
))
1445 if (!(rt
->rt6i_flags
& RTF_GATEWAY
))
1447 if (fl
->oif
!= rt
->rt6i_dev
->ifindex
)
1449 if (!ipv6_addr_equal(&rdfl
->gateway
, &rt
->rt6i_gateway
))
1455 rt
= net
->ipv6
.ip6_null_entry
;
1456 BACKTRACK(net
, &fl
->fl6_src
);
1460 read_unlock_bh(&table
->tb6_lock
);
1465 static struct rt6_info
*ip6_route_redirect(struct in6_addr
*dest
,
1466 struct in6_addr
*src
,
1467 struct in6_addr
*gateway
,
1468 struct net_device
*dev
)
1470 int flags
= RT6_LOOKUP_F_HAS_SADDR
;
1471 struct net
*net
= dev_net(dev
);
1472 struct ip6rd_flowi rdfl
= {
1474 .oif
= dev
->ifindex
,
1480 ipv6_addr_copy(&rdfl
.gateway
, gateway
);
1482 if (rt6_need_strict(dest
))
1483 flags
|= RT6_LOOKUP_F_IFACE
;
1485 return (struct rt6_info
*)fib6_rule_lookup(net
, (struct flowi
*)&rdfl
,
1486 flags
, __ip6_route_redirect
);
1489 void rt6_redirect(struct in6_addr
*dest
, struct in6_addr
*src
,
1490 struct in6_addr
*saddr
,
1491 struct neighbour
*neigh
, u8
*lladdr
, int on_link
)
1493 struct rt6_info
*rt
, *nrt
= NULL
;
1494 struct netevent_redirect netevent
;
1495 struct net
*net
= dev_net(neigh
->dev
);
1497 rt
= ip6_route_redirect(dest
, src
, saddr
, neigh
->dev
);
1499 if (rt
== net
->ipv6
.ip6_null_entry
) {
1500 if (net_ratelimit())
1501 printk(KERN_DEBUG
"rt6_redirect: source isn't a valid nexthop "
1502 "for redirect target\n");
1507 * We have finally decided to accept it.
1510 neigh_update(neigh
, lladdr
, NUD_STALE
,
1511 NEIGH_UPDATE_F_WEAK_OVERRIDE
|
1512 NEIGH_UPDATE_F_OVERRIDE
|
1513 (on_link
? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER
|
1514 NEIGH_UPDATE_F_ISROUTER
))
1518 * Redirect received -> path was valid.
1519 * Look, redirects are sent only in response to data packets,
1520 * so that this nexthop apparently is reachable. --ANK
1522 dst_confirm(&rt
->dst
);
1524 /* Duplicate redirect: silently ignore. */
1525 if (neigh
== rt
->dst
.neighbour
)
1528 nrt
= ip6_rt_copy(rt
);
1532 nrt
->rt6i_flags
= RTF_GATEWAY
|RTF_UP
|RTF_DYNAMIC
|RTF_CACHE
;
1534 nrt
->rt6i_flags
&= ~RTF_GATEWAY
;
1536 ipv6_addr_copy(&nrt
->rt6i_dst
.addr
, dest
);
1537 nrt
->rt6i_dst
.plen
= 128;
1538 nrt
->dst
.flags
|= DST_HOST
;
1540 ipv6_addr_copy(&nrt
->rt6i_gateway
, (struct in6_addr
*)neigh
->primary_key
);
1541 nrt
->rt6i_nexthop
= neigh_clone(neigh
);
1542 /* Reset pmtu, it may be better */
1543 dst_metric_set(&nrt
->dst
, RTAX_MTU
, ipv6_get_mtu(neigh
->dev
));
1544 dst_metric_set(&nrt
->dst
, RTAX_ADVMSS
, ipv6_advmss(dev_net(neigh
->dev
),
1545 dst_mtu(&nrt
->dst
)));
1547 if (ip6_ins_rt(nrt
))
1550 netevent
.old
= &rt
->dst
;
1551 netevent
.new = &nrt
->dst
;
1552 call_netevent_notifiers(NETEVENT_REDIRECT
, &netevent
);
1554 if (rt
->rt6i_flags
&RTF_CACHE
) {
1560 dst_release(&rt
->dst
);
1564 * Handle ICMP "packet too big" messages
1565 * i.e. Path MTU discovery
1568 static void rt6_do_pmtu_disc(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1569 struct net
*net
, u32 pmtu
, int ifindex
)
1571 struct rt6_info
*rt
, *nrt
;
1574 rt
= rt6_lookup(net
, daddr
, saddr
, ifindex
, 0);
1578 if (pmtu
>= dst_mtu(&rt
->dst
))
1581 if (pmtu
< IPV6_MIN_MTU
) {
1583 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1584 * MTU (1280) and a fragment header should always be included
1585 * after a node receiving Too Big message reporting PMTU is
1586 * less than the IPv6 Minimum Link MTU.
1588 pmtu
= IPV6_MIN_MTU
;
1592 /* New mtu received -> path was valid.
1593 They are sent only in response to data packets,
1594 so that this nexthop apparently is reachable. --ANK
1596 dst_confirm(&rt
->dst
);
1598 /* Host route. If it is static, it would be better
1599 not to override it, but add new one, so that
1600 when cache entry will expire old pmtu
1601 would return automatically.
1603 if (rt
->rt6i_flags
& RTF_CACHE
) {
1604 dst_metric_set(&rt
->dst
, RTAX_MTU
, pmtu
);
1606 u32 features
= dst_metric(&rt
->dst
, RTAX_FEATURES
);
1607 features
|= RTAX_FEATURE_ALLFRAG
;
1608 dst_metric_set(&rt
->dst
, RTAX_FEATURES
, features
);
1610 dst_set_expires(&rt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1611 rt
->rt6i_flags
|= RTF_MODIFIED
|RTF_EXPIRES
;
1616 Two cases are possible:
1617 1. It is connected route. Action: COW
1618 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1620 if (!rt
->rt6i_nexthop
&& !(rt
->rt6i_flags
& RTF_NONEXTHOP
))
1621 nrt
= rt6_alloc_cow(rt
, daddr
, saddr
);
1623 nrt
= rt6_alloc_clone(rt
, daddr
);
1626 dst_metric_set(&nrt
->dst
, RTAX_MTU
, pmtu
);
1628 u32 features
= dst_metric(&nrt
->dst
, RTAX_FEATURES
);
1629 features
|= RTAX_FEATURE_ALLFRAG
;
1630 dst_metric_set(&nrt
->dst
, RTAX_FEATURES
, features
);
1633 /* According to RFC 1981, detecting PMTU increase shouldn't be
1634 * happened within 5 mins, the recommended timer is 10 mins.
1635 * Here this route expiration time is set to ip6_rt_mtu_expires
1636 * which is 10 mins. After 10 mins the decreased pmtu is expired
1637 * and detecting PMTU increase will be automatically happened.
1639 dst_set_expires(&nrt
->dst
, net
->ipv6
.sysctl
.ip6_rt_mtu_expires
);
1640 nrt
->rt6i_flags
|= RTF_DYNAMIC
|RTF_EXPIRES
;
1645 dst_release(&rt
->dst
);
1648 void rt6_pmtu_discovery(struct in6_addr
*daddr
, struct in6_addr
*saddr
,
1649 struct net_device
*dev
, u32 pmtu
)
1651 struct net
*net
= dev_net(dev
);
1654 * RFC 1981 states that a node "MUST reduce the size of the packets it
1655 * is sending along the path" that caused the Packet Too Big message.
1656 * Since it's not possible in the general case to determine which
1657 * interface was used to send the original packet, we update the MTU
1658 * on the interface that will be used to send future packets. We also
1659 * update the MTU on the interface that received the Packet Too Big in
1660 * case the original packet was forced out that interface with
1661 * SO_BINDTODEVICE or similar. This is the next best thing to the
1662 * correct behaviour, which would be to update the MTU on all
1665 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, 0);
1666 rt6_do_pmtu_disc(daddr
, saddr
, net
, pmtu
, dev
->ifindex
);
1670 * Misc support functions
1673 static struct rt6_info
* ip6_rt_copy(struct rt6_info
*ort
)
1675 struct net
*net
= dev_net(ort
->rt6i_dev
);
1676 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
);
1679 rt
->dst
.input
= ort
->dst
.input
;
1680 rt
->dst
.output
= ort
->dst
.output
;
1682 dst_copy_metrics(&rt
->dst
, &ort
->dst
);
1683 rt
->dst
.error
= ort
->dst
.error
;
1684 rt
->dst
.dev
= ort
->dst
.dev
;
1686 dev_hold(rt
->dst
.dev
);
1687 rt
->rt6i_idev
= ort
->rt6i_idev
;
1689 in6_dev_hold(rt
->rt6i_idev
);
1690 rt
->dst
.lastuse
= jiffies
;
1691 rt
->rt6i_expires
= 0;
1693 ipv6_addr_copy(&rt
->rt6i_gateway
, &ort
->rt6i_gateway
);
1694 rt
->rt6i_flags
= ort
->rt6i_flags
& ~RTF_EXPIRES
;
1695 rt
->rt6i_metric
= 0;
1697 memcpy(&rt
->rt6i_dst
, &ort
->rt6i_dst
, sizeof(struct rt6key
));
1698 #ifdef CONFIG_IPV6_SUBTREES
1699 memcpy(&rt
->rt6i_src
, &ort
->rt6i_src
, sizeof(struct rt6key
));
1701 rt
->rt6i_table
= ort
->rt6i_table
;
1706 #ifdef CONFIG_IPV6_ROUTE_INFO
1707 static struct rt6_info
*rt6_get_route_info(struct net
*net
,
1708 struct in6_addr
*prefix
, int prefixlen
,
1709 struct in6_addr
*gwaddr
, int ifindex
)
1711 struct fib6_node
*fn
;
1712 struct rt6_info
*rt
= NULL
;
1713 struct fib6_table
*table
;
1715 table
= fib6_get_table(net
, RT6_TABLE_INFO
);
1719 write_lock_bh(&table
->tb6_lock
);
1720 fn
= fib6_locate(&table
->tb6_root
, prefix
,prefixlen
, NULL
, 0);
1724 for (rt
= fn
->leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1725 if (rt
->rt6i_dev
->ifindex
!= ifindex
)
1727 if ((rt
->rt6i_flags
& (RTF_ROUTEINFO
|RTF_GATEWAY
)) != (RTF_ROUTEINFO
|RTF_GATEWAY
))
1729 if (!ipv6_addr_equal(&rt
->rt6i_gateway
, gwaddr
))
1735 write_unlock_bh(&table
->tb6_lock
);
1739 static struct rt6_info
*rt6_add_route_info(struct net
*net
,
1740 struct in6_addr
*prefix
, int prefixlen
,
1741 struct in6_addr
*gwaddr
, int ifindex
,
1744 struct fib6_config cfg
= {
1745 .fc_table
= RT6_TABLE_INFO
,
1746 .fc_metric
= IP6_RT_PRIO_USER
,
1747 .fc_ifindex
= ifindex
,
1748 .fc_dst_len
= prefixlen
,
1749 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_ROUTEINFO
|
1750 RTF_UP
| RTF_PREF(pref
),
1752 .fc_nlinfo
.nlh
= NULL
,
1753 .fc_nlinfo
.nl_net
= net
,
1756 ipv6_addr_copy(&cfg
.fc_dst
, prefix
);
1757 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1759 /* We should treat it as a default route if prefix length is 0. */
1761 cfg
.fc_flags
|= RTF_DEFAULT
;
1763 ip6_route_add(&cfg
);
1765 return rt6_get_route_info(net
, prefix
, prefixlen
, gwaddr
, ifindex
);
1769 struct rt6_info
*rt6_get_dflt_router(struct in6_addr
*addr
, struct net_device
*dev
)
1771 struct rt6_info
*rt
;
1772 struct fib6_table
*table
;
1774 table
= fib6_get_table(dev_net(dev
), RT6_TABLE_DFLT
);
1778 write_lock_bh(&table
->tb6_lock
);
1779 for (rt
= table
->tb6_root
.leaf
; rt
; rt
=rt
->dst
.rt6_next
) {
1780 if (dev
== rt
->rt6i_dev
&&
1781 ((rt
->rt6i_flags
& (RTF_ADDRCONF
| RTF_DEFAULT
)) == (RTF_ADDRCONF
| RTF_DEFAULT
)) &&
1782 ipv6_addr_equal(&rt
->rt6i_gateway
, addr
))
1787 write_unlock_bh(&table
->tb6_lock
);
1791 struct rt6_info
*rt6_add_dflt_router(struct in6_addr
*gwaddr
,
1792 struct net_device
*dev
,
1795 struct fib6_config cfg
= {
1796 .fc_table
= RT6_TABLE_DFLT
,
1797 .fc_metric
= IP6_RT_PRIO_USER
,
1798 .fc_ifindex
= dev
->ifindex
,
1799 .fc_flags
= RTF_GATEWAY
| RTF_ADDRCONF
| RTF_DEFAULT
|
1800 RTF_UP
| RTF_EXPIRES
| RTF_PREF(pref
),
1802 .fc_nlinfo
.nlh
= NULL
,
1803 .fc_nlinfo
.nl_net
= dev_net(dev
),
1806 ipv6_addr_copy(&cfg
.fc_gateway
, gwaddr
);
1808 ip6_route_add(&cfg
);
1810 return rt6_get_dflt_router(gwaddr
, dev
);
1813 void rt6_purge_dflt_routers(struct net
*net
)
1815 struct rt6_info
*rt
;
1816 struct fib6_table
*table
;
1818 /* NOTE: Keep consistent with rt6_get_dflt_router */
1819 table
= fib6_get_table(net
, RT6_TABLE_DFLT
);
1824 read_lock_bh(&table
->tb6_lock
);
1825 for (rt
= table
->tb6_root
.leaf
; rt
; rt
= rt
->dst
.rt6_next
) {
1826 if (rt
->rt6i_flags
& (RTF_DEFAULT
| RTF_ADDRCONF
)) {
1828 read_unlock_bh(&table
->tb6_lock
);
1833 read_unlock_bh(&table
->tb6_lock
);
1836 static void rtmsg_to_fib6_config(struct net
*net
,
1837 struct in6_rtmsg
*rtmsg
,
1838 struct fib6_config
*cfg
)
1840 memset(cfg
, 0, sizeof(*cfg
));
1842 cfg
->fc_table
= RT6_TABLE_MAIN
;
1843 cfg
->fc_ifindex
= rtmsg
->rtmsg_ifindex
;
1844 cfg
->fc_metric
= rtmsg
->rtmsg_metric
;
1845 cfg
->fc_expires
= rtmsg
->rtmsg_info
;
1846 cfg
->fc_dst_len
= rtmsg
->rtmsg_dst_len
;
1847 cfg
->fc_src_len
= rtmsg
->rtmsg_src_len
;
1848 cfg
->fc_flags
= rtmsg
->rtmsg_flags
;
1850 cfg
->fc_nlinfo
.nl_net
= net
;
1852 ipv6_addr_copy(&cfg
->fc_dst
, &rtmsg
->rtmsg_dst
);
1853 ipv6_addr_copy(&cfg
->fc_src
, &rtmsg
->rtmsg_src
);
1854 ipv6_addr_copy(&cfg
->fc_gateway
, &rtmsg
->rtmsg_gateway
);
1857 int ipv6_route_ioctl(struct net
*net
, unsigned int cmd
, void __user
*arg
)
1859 struct fib6_config cfg
;
1860 struct in6_rtmsg rtmsg
;
1864 case SIOCADDRT
: /* Add a route */
1865 case SIOCDELRT
: /* Delete a route */
1866 if (!capable(CAP_NET_ADMIN
))
1868 err
= copy_from_user(&rtmsg
, arg
,
1869 sizeof(struct in6_rtmsg
));
1873 rtmsg_to_fib6_config(net
, &rtmsg
, &cfg
);
1878 err
= ip6_route_add(&cfg
);
1881 err
= ip6_route_del(&cfg
);
1895 * Drop the packet on the floor
1898 static int ip6_pkt_drop(struct sk_buff
*skb
, u8 code
, int ipstats_mib_noroutes
)
1901 struct dst_entry
*dst
= skb_dst(skb
);
1902 switch (ipstats_mib_noroutes
) {
1903 case IPSTATS_MIB_INNOROUTES
:
1904 type
= ipv6_addr_type(&ipv6_hdr(skb
)->daddr
);
1905 if (type
== IPV6_ADDR_ANY
) {
1906 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
1907 IPSTATS_MIB_INADDRERRORS
);
1911 case IPSTATS_MIB_OUTNOROUTES
:
1912 IP6_INC_STATS(dev_net(dst
->dev
), ip6_dst_idev(dst
),
1913 ipstats_mib_noroutes
);
1916 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
, code
, 0);
1921 static int ip6_pkt_discard(struct sk_buff
*skb
)
1923 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_INNOROUTES
);
1926 static int ip6_pkt_discard_out(struct sk_buff
*skb
)
1928 skb
->dev
= skb_dst(skb
)->dev
;
1929 return ip6_pkt_drop(skb
, ICMPV6_NOROUTE
, IPSTATS_MIB_OUTNOROUTES
);
1932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1934 static int ip6_pkt_prohibit(struct sk_buff
*skb
)
1936 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_INNOROUTES
);
1939 static int ip6_pkt_prohibit_out(struct sk_buff
*skb
)
1941 skb
->dev
= skb_dst(skb
)->dev
;
1942 return ip6_pkt_drop(skb
, ICMPV6_ADM_PROHIBITED
, IPSTATS_MIB_OUTNOROUTES
);
1948 * Allocate a dst for local (unicast / anycast) address.
1951 struct rt6_info
*addrconf_dst_alloc(struct inet6_dev
*idev
,
1952 const struct in6_addr
*addr
,
1955 struct net
*net
= dev_net(idev
->dev
);
1956 struct rt6_info
*rt
= ip6_dst_alloc(&net
->ipv6
.ip6_dst_ops
);
1957 struct neighbour
*neigh
;
1960 if (net_ratelimit())
1961 pr_warning("IPv6: Maximum number of routes reached,"
1962 " consider increasing route/max_size.\n");
1963 return ERR_PTR(-ENOMEM
);
1966 dev_hold(net
->loopback_dev
);
1969 rt
->dst
.flags
= DST_HOST
;
1970 rt
->dst
.input
= ip6_input
;
1971 rt
->dst
.output
= ip6_output
;
1972 rt
->rt6i_dev
= net
->loopback_dev
;
1973 rt
->rt6i_idev
= idev
;
1974 dst_metric_set(&rt
->dst
, RTAX_MTU
, ipv6_get_mtu(rt
->rt6i_dev
));
1975 dst_metric_set(&rt
->dst
, RTAX_ADVMSS
, ipv6_advmss(net
, dst_mtu(&rt
->dst
)));
1976 dst_metric_set(&rt
->dst
, RTAX_HOPLIMIT
, -1);
1977 rt
->dst
.obsolete
= -1;
1979 rt
->rt6i_flags
= RTF_UP
| RTF_NONEXTHOP
;
1981 rt
->rt6i_flags
|= RTF_ANYCAST
;
1983 rt
->rt6i_flags
|= RTF_LOCAL
;
1984 neigh
= ndisc_get_neigh(rt
->rt6i_dev
, &rt
->rt6i_gateway
);
1985 if (IS_ERR(neigh
)) {
1988 /* We are casting this because that is the return
1989 * value type. But an errno encoded pointer is the
1990 * same regardless of the underlying pointer type,
1991 * and that's what we are returning. So this is OK.
1993 return (struct rt6_info
*) neigh
;
1995 rt
->rt6i_nexthop
= neigh
;
1997 ipv6_addr_copy(&rt
->rt6i_dst
.addr
, addr
);
1998 rt
->rt6i_dst
.plen
= 128;
1999 rt
->rt6i_table
= fib6_get_table(net
, RT6_TABLE_LOCAL
);
2001 atomic_set(&rt
->dst
.__refcnt
, 1);
2006 struct arg_dev_net
{
2007 struct net_device
*dev
;
2011 static int fib6_ifdown(struct rt6_info
*rt
, void *arg
)
2013 struct net_device
*dev
= ((struct arg_dev_net
*)arg
)->dev
;
2014 struct net
*net
= ((struct arg_dev_net
*)arg
)->net
;
2016 if (((void *)rt
->rt6i_dev
== dev
|| dev
== NULL
) &&
2017 rt
!= net
->ipv6
.ip6_null_entry
) {
2018 RT6_TRACE("deleted by ifdown %p\n", rt
);
2024 void rt6_ifdown(struct net
*net
, struct net_device
*dev
)
2026 struct arg_dev_net adn
= {
2031 fib6_clean_all(net
, fib6_ifdown
, 0, &adn
);
2032 icmp6_clean_all(fib6_ifdown
, &adn
);
2035 struct rt6_mtu_change_arg
2037 struct net_device
*dev
;
2041 static int rt6_mtu_change_route(struct rt6_info
*rt
, void *p_arg
)
2043 struct rt6_mtu_change_arg
*arg
= (struct rt6_mtu_change_arg
*) p_arg
;
2044 struct inet6_dev
*idev
;
2045 struct net
*net
= dev_net(arg
->dev
);
2047 /* In IPv6 pmtu discovery is not optional,
2048 so that RTAX_MTU lock cannot disable it.
2049 We still use this lock to block changes
2050 caused by addrconf/ndisc.
2053 idev
= __in6_dev_get(arg
->dev
);
2057 /* For administrative MTU increase, there is no way to discover
2058 IPv6 PMTU increase, so PMTU increase should be updated here.
2059 Since RFC 1981 doesn't include administrative MTU increase
2060 update PMTU increase is a MUST. (i.e. jumbo frame)
2063 If new MTU is less than route PMTU, this new MTU will be the
2064 lowest MTU in the path, update the route PMTU to reflect PMTU
2065 decreases; if new MTU is greater than route PMTU, and the
2066 old MTU is the lowest MTU in the path, update the route PMTU
2067 to reflect the increase. In this case if the other nodes' MTU
2068 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2071 if (rt
->rt6i_dev
== arg
->dev
&&
2072 !dst_metric_locked(&rt
->dst
, RTAX_MTU
) &&
2073 (dst_mtu(&rt
->dst
) >= arg
->mtu
||
2074 (dst_mtu(&rt
->dst
) < arg
->mtu
&&
2075 dst_mtu(&rt
->dst
) == idev
->cnf
.mtu6
))) {
2076 dst_metric_set(&rt
->dst
, RTAX_MTU
, arg
->mtu
);
2077 dst_metric_set(&rt
->dst
, RTAX_ADVMSS
, ipv6_advmss(net
, arg
->mtu
));
2082 void rt6_mtu_change(struct net_device
*dev
, unsigned mtu
)
2084 struct rt6_mtu_change_arg arg
= {
2089 fib6_clean_all(dev_net(dev
), rt6_mtu_change_route
, 0, &arg
);
2092 static const struct nla_policy rtm_ipv6_policy
[RTA_MAX
+1] = {
2093 [RTA_GATEWAY
] = { .len
= sizeof(struct in6_addr
) },
2094 [RTA_OIF
] = { .type
= NLA_U32
},
2095 [RTA_IIF
] = { .type
= NLA_U32
},
2096 [RTA_PRIORITY
] = { .type
= NLA_U32
},
2097 [RTA_METRICS
] = { .type
= NLA_NESTED
},
2100 static int rtm_to_fib6_config(struct sk_buff
*skb
, struct nlmsghdr
*nlh
,
2101 struct fib6_config
*cfg
)
2104 struct nlattr
*tb
[RTA_MAX
+1];
2107 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2112 rtm
= nlmsg_data(nlh
);
2113 memset(cfg
, 0, sizeof(*cfg
));
2115 cfg
->fc_table
= rtm
->rtm_table
;
2116 cfg
->fc_dst_len
= rtm
->rtm_dst_len
;
2117 cfg
->fc_src_len
= rtm
->rtm_src_len
;
2118 cfg
->fc_flags
= RTF_UP
;
2119 cfg
->fc_protocol
= rtm
->rtm_protocol
;
2121 if (rtm
->rtm_type
== RTN_UNREACHABLE
)
2122 cfg
->fc_flags
|= RTF_REJECT
;
2124 if (rtm
->rtm_type
== RTN_LOCAL
)
2125 cfg
->fc_flags
|= RTF_LOCAL
;
2127 cfg
->fc_nlinfo
.pid
= NETLINK_CB(skb
).pid
;
2128 cfg
->fc_nlinfo
.nlh
= nlh
;
2129 cfg
->fc_nlinfo
.nl_net
= sock_net(skb
->sk
);
2131 if (tb
[RTA_GATEWAY
]) {
2132 nla_memcpy(&cfg
->fc_gateway
, tb
[RTA_GATEWAY
], 16);
2133 cfg
->fc_flags
|= RTF_GATEWAY
;
2137 int plen
= (rtm
->rtm_dst_len
+ 7) >> 3;
2139 if (nla_len(tb
[RTA_DST
]) < plen
)
2142 nla_memcpy(&cfg
->fc_dst
, tb
[RTA_DST
], plen
);
2146 int plen
= (rtm
->rtm_src_len
+ 7) >> 3;
2148 if (nla_len(tb
[RTA_SRC
]) < plen
)
2151 nla_memcpy(&cfg
->fc_src
, tb
[RTA_SRC
], plen
);
2155 cfg
->fc_ifindex
= nla_get_u32(tb
[RTA_OIF
]);
2157 if (tb
[RTA_PRIORITY
])
2158 cfg
->fc_metric
= nla_get_u32(tb
[RTA_PRIORITY
]);
2160 if (tb
[RTA_METRICS
]) {
2161 cfg
->fc_mx
= nla_data(tb
[RTA_METRICS
]);
2162 cfg
->fc_mx_len
= nla_len(tb
[RTA_METRICS
]);
2166 cfg
->fc_table
= nla_get_u32(tb
[RTA_TABLE
]);
2173 static int inet6_rtm_delroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2175 struct fib6_config cfg
;
2178 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2182 return ip6_route_del(&cfg
);
2185 static int inet6_rtm_newroute(struct sk_buff
*skb
, struct nlmsghdr
* nlh
, void *arg
)
2187 struct fib6_config cfg
;
2190 err
= rtm_to_fib6_config(skb
, nlh
, &cfg
);
2194 return ip6_route_add(&cfg
);
2197 static inline size_t rt6_nlmsg_size(void)
2199 return NLMSG_ALIGN(sizeof(struct rtmsg
))
2200 + nla_total_size(16) /* RTA_SRC */
2201 + nla_total_size(16) /* RTA_DST */
2202 + nla_total_size(16) /* RTA_GATEWAY */
2203 + nla_total_size(16) /* RTA_PREFSRC */
2204 + nla_total_size(4) /* RTA_TABLE */
2205 + nla_total_size(4) /* RTA_IIF */
2206 + nla_total_size(4) /* RTA_OIF */
2207 + nla_total_size(4) /* RTA_PRIORITY */
2208 + RTAX_MAX
* nla_total_size(4) /* RTA_METRICS */
2209 + nla_total_size(sizeof(struct rta_cacheinfo
));
2212 static int rt6_fill_node(struct net
*net
,
2213 struct sk_buff
*skb
, struct rt6_info
*rt
,
2214 struct in6_addr
*dst
, struct in6_addr
*src
,
2215 int iif
, int type
, u32 pid
, u32 seq
,
2216 int prefix
, int nowait
, unsigned int flags
)
2219 struct nlmsghdr
*nlh
;
2223 if (prefix
) { /* user wants prefix routes only */
2224 if (!(rt
->rt6i_flags
& RTF_PREFIX_RT
)) {
2225 /* success since this is not a prefix route */
2230 nlh
= nlmsg_put(skb
, pid
, seq
, type
, sizeof(*rtm
), flags
);
2234 rtm
= nlmsg_data(nlh
);
2235 rtm
->rtm_family
= AF_INET6
;
2236 rtm
->rtm_dst_len
= rt
->rt6i_dst
.plen
;
2237 rtm
->rtm_src_len
= rt
->rt6i_src
.plen
;
2240 table
= rt
->rt6i_table
->tb6_id
;
2242 table
= RT6_TABLE_UNSPEC
;
2243 rtm
->rtm_table
= table
;
2244 NLA_PUT_U32(skb
, RTA_TABLE
, table
);
2245 if (rt
->rt6i_flags
&RTF_REJECT
)
2246 rtm
->rtm_type
= RTN_UNREACHABLE
;
2247 else if (rt
->rt6i_flags
&RTF_LOCAL
)
2248 rtm
->rtm_type
= RTN_LOCAL
;
2249 else if (rt
->rt6i_dev
&& (rt
->rt6i_dev
->flags
&IFF_LOOPBACK
))
2250 rtm
->rtm_type
= RTN_LOCAL
;
2252 rtm
->rtm_type
= RTN_UNICAST
;
2254 rtm
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2255 rtm
->rtm_protocol
= rt
->rt6i_protocol
;
2256 if (rt
->rt6i_flags
&RTF_DYNAMIC
)
2257 rtm
->rtm_protocol
= RTPROT_REDIRECT
;
2258 else if (rt
->rt6i_flags
& RTF_ADDRCONF
)
2259 rtm
->rtm_protocol
= RTPROT_KERNEL
;
2260 else if (rt
->rt6i_flags
&RTF_DEFAULT
)
2261 rtm
->rtm_protocol
= RTPROT_RA
;
2263 if (rt
->rt6i_flags
&RTF_CACHE
)
2264 rtm
->rtm_flags
|= RTM_F_CLONED
;
2267 NLA_PUT(skb
, RTA_DST
, 16, dst
);
2268 rtm
->rtm_dst_len
= 128;
2269 } else if (rtm
->rtm_dst_len
)
2270 NLA_PUT(skb
, RTA_DST
, 16, &rt
->rt6i_dst
.addr
);
2271 #ifdef CONFIG_IPV6_SUBTREES
2273 NLA_PUT(skb
, RTA_SRC
, 16, src
);
2274 rtm
->rtm_src_len
= 128;
2275 } else if (rtm
->rtm_src_len
)
2276 NLA_PUT(skb
, RTA_SRC
, 16, &rt
->rt6i_src
.addr
);
2279 #ifdef CONFIG_IPV6_MROUTE
2280 if (ipv6_addr_is_multicast(&rt
->rt6i_dst
.addr
)) {
2281 int err
= ip6mr_get_route(net
, skb
, rtm
, nowait
);
2286 goto nla_put_failure
;
2288 if (err
== -EMSGSIZE
)
2289 goto nla_put_failure
;
2294 NLA_PUT_U32(skb
, RTA_IIF
, iif
);
2296 struct inet6_dev
*idev
= ip6_dst_idev(&rt
->dst
);
2297 struct in6_addr saddr_buf
;
2298 if (ipv6_dev_get_saddr(net
, idev
? idev
->dev
: NULL
,
2299 dst
, 0, &saddr_buf
) == 0)
2300 NLA_PUT(skb
, RTA_PREFSRC
, 16, &saddr_buf
);
2303 if (rtnetlink_put_metrics(skb
, dst_metrics_ptr(&rt
->dst
)) < 0)
2304 goto nla_put_failure
;
2306 if (rt
->dst
.neighbour
)
2307 NLA_PUT(skb
, RTA_GATEWAY
, 16, &rt
->dst
.neighbour
->primary_key
);
2310 NLA_PUT_U32(skb
, RTA_OIF
, rt
->rt6i_dev
->ifindex
);
2312 NLA_PUT_U32(skb
, RTA_PRIORITY
, rt
->rt6i_metric
);
2314 if (!(rt
->rt6i_flags
& RTF_EXPIRES
))
2316 else if (rt
->rt6i_expires
- jiffies
< INT_MAX
)
2317 expires
= rt
->rt6i_expires
- jiffies
;
2321 if (rtnl_put_cacheinfo(skb
, &rt
->dst
, 0, 0, 0,
2322 expires
, rt
->dst
.error
) < 0)
2323 goto nla_put_failure
;
2325 return nlmsg_end(skb
, nlh
);
2328 nlmsg_cancel(skb
, nlh
);
2332 int rt6_dump_route(struct rt6_info
*rt
, void *p_arg
)
2334 struct rt6_rtnl_dump_arg
*arg
= (struct rt6_rtnl_dump_arg
*) p_arg
;
2337 if (nlmsg_len(arg
->cb
->nlh
) >= sizeof(struct rtmsg
)) {
2338 struct rtmsg
*rtm
= nlmsg_data(arg
->cb
->nlh
);
2339 prefix
= (rtm
->rtm_flags
& RTM_F_PREFIX
) != 0;
2343 return rt6_fill_node(arg
->net
,
2344 arg
->skb
, rt
, NULL
, NULL
, 0, RTM_NEWROUTE
,
2345 NETLINK_CB(arg
->cb
->skb
).pid
, arg
->cb
->nlh
->nlmsg_seq
,
2346 prefix
, 0, NLM_F_MULTI
);
2349 static int inet6_rtm_getroute(struct sk_buff
*in_skb
, struct nlmsghdr
* nlh
, void *arg
)
2351 struct net
*net
= sock_net(in_skb
->sk
);
2352 struct nlattr
*tb
[RTA_MAX
+1];
2353 struct rt6_info
*rt
;
2354 struct sk_buff
*skb
;
2359 err
= nlmsg_parse(nlh
, sizeof(*rtm
), tb
, RTA_MAX
, rtm_ipv6_policy
);
2364 memset(&fl
, 0, sizeof(fl
));
2367 if (nla_len(tb
[RTA_SRC
]) < sizeof(struct in6_addr
))
2370 ipv6_addr_copy(&fl
.fl6_src
, nla_data(tb
[RTA_SRC
]));
2374 if (nla_len(tb
[RTA_DST
]) < sizeof(struct in6_addr
))
2377 ipv6_addr_copy(&fl
.fl6_dst
, nla_data(tb
[RTA_DST
]));
2381 iif
= nla_get_u32(tb
[RTA_IIF
]);
2384 fl
.oif
= nla_get_u32(tb
[RTA_OIF
]);
2387 struct net_device
*dev
;
2388 dev
= __dev_get_by_index(net
, iif
);
2395 skb
= alloc_skb(NLMSG_GOODSIZE
, GFP_KERNEL
);
2401 /* Reserve room for dummy headers, this skb can pass
2402 through good chunk of routing engine.
2404 skb_reset_mac_header(skb
);
2405 skb_reserve(skb
, MAX_HEADER
+ sizeof(struct ipv6hdr
));
2407 rt
= (struct rt6_info
*) ip6_route_output(net
, NULL
, &fl
);
2408 skb_dst_set(skb
, &rt
->dst
);
2410 err
= rt6_fill_node(net
, skb
, rt
, &fl
.fl6_dst
, &fl
.fl6_src
, iif
,
2411 RTM_NEWROUTE
, NETLINK_CB(in_skb
).pid
,
2412 nlh
->nlmsg_seq
, 0, 0, 0);
2418 err
= rtnl_unicast(skb
, net
, NETLINK_CB(in_skb
).pid
);
2423 void inet6_rt_notify(int event
, struct rt6_info
*rt
, struct nl_info
*info
)
2425 struct sk_buff
*skb
;
2426 struct net
*net
= info
->nl_net
;
2431 seq
= info
->nlh
!= NULL
? info
->nlh
->nlmsg_seq
: 0;
2433 skb
= nlmsg_new(rt6_nlmsg_size(), gfp_any());
2437 err
= rt6_fill_node(net
, skb
, rt
, NULL
, NULL
, 0,
2438 event
, info
->pid
, seq
, 0, 0, 0);
2440 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2441 WARN_ON(err
== -EMSGSIZE
);
2445 rtnl_notify(skb
, net
, info
->pid
, RTNLGRP_IPV6_ROUTE
,
2446 info
->nlh
, gfp_any());
2450 rtnl_set_sk_err(net
, RTNLGRP_IPV6_ROUTE
, err
);
2453 static int ip6_route_dev_notify(struct notifier_block
*this,
2454 unsigned long event
, void *data
)
2456 struct net_device
*dev
= (struct net_device
*)data
;
2457 struct net
*net
= dev_net(dev
);
2459 if (event
== NETDEV_REGISTER
&& (dev
->flags
& IFF_LOOPBACK
)) {
2460 net
->ipv6
.ip6_null_entry
->dst
.dev
= dev
;
2461 net
->ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(dev
);
2462 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2463 net
->ipv6
.ip6_prohibit_entry
->dst
.dev
= dev
;
2464 net
->ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(dev
);
2465 net
->ipv6
.ip6_blk_hole_entry
->dst
.dev
= dev
;
2466 net
->ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(dev
);
2477 #ifdef CONFIG_PROC_FS
2488 static int rt6_info_route(struct rt6_info
*rt
, void *p_arg
)
2490 struct seq_file
*m
= p_arg
;
2492 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_dst
.addr
, rt
->rt6i_dst
.plen
);
2494 #ifdef CONFIG_IPV6_SUBTREES
2495 seq_printf(m
, "%pi6 %02x ", &rt
->rt6i_src
.addr
, rt
->rt6i_src
.plen
);
2497 seq_puts(m
, "00000000000000000000000000000000 00 ");
2500 if (rt
->rt6i_nexthop
) {
2501 seq_printf(m
, "%pi6", rt
->rt6i_nexthop
->primary_key
);
2503 seq_puts(m
, "00000000000000000000000000000000");
2505 seq_printf(m
, " %08x %08x %08x %08x %8s\n",
2506 rt
->rt6i_metric
, atomic_read(&rt
->dst
.__refcnt
),
2507 rt
->dst
.__use
, rt
->rt6i_flags
,
2508 rt
->rt6i_dev
? rt
->rt6i_dev
->name
: "");
2512 static int ipv6_route_show(struct seq_file
*m
, void *v
)
2514 struct net
*net
= (struct net
*)m
->private;
2515 fib6_clean_all(net
, rt6_info_route
, 0, m
);
2519 static int ipv6_route_open(struct inode
*inode
, struct file
*file
)
2521 return single_open_net(inode
, file
, ipv6_route_show
);
2524 static const struct file_operations ipv6_route_proc_fops
= {
2525 .owner
= THIS_MODULE
,
2526 .open
= ipv6_route_open
,
2528 .llseek
= seq_lseek
,
2529 .release
= single_release_net
,
2532 static int rt6_stats_seq_show(struct seq_file
*seq
, void *v
)
2534 struct net
*net
= (struct net
*)seq
->private;
2535 seq_printf(seq
, "%04x %04x %04x %04x %04x %04x %04x\n",
2536 net
->ipv6
.rt6_stats
->fib_nodes
,
2537 net
->ipv6
.rt6_stats
->fib_route_nodes
,
2538 net
->ipv6
.rt6_stats
->fib_rt_alloc
,
2539 net
->ipv6
.rt6_stats
->fib_rt_entries
,
2540 net
->ipv6
.rt6_stats
->fib_rt_cache
,
2541 dst_entries_get_slow(&net
->ipv6
.ip6_dst_ops
),
2542 net
->ipv6
.rt6_stats
->fib_discarded_routes
);
2547 static int rt6_stats_seq_open(struct inode
*inode
, struct file
*file
)
2549 return single_open_net(inode
, file
, rt6_stats_seq_show
);
2552 static const struct file_operations rt6_stats_seq_fops
= {
2553 .owner
= THIS_MODULE
,
2554 .open
= rt6_stats_seq_open
,
2556 .llseek
= seq_lseek
,
2557 .release
= single_release_net
,
2559 #endif /* CONFIG_PROC_FS */
2561 #ifdef CONFIG_SYSCTL
2564 int ipv6_sysctl_rtcache_flush(ctl_table
*ctl
, int write
,
2565 void __user
*buffer
, size_t *lenp
, loff_t
*ppos
)
2567 struct net
*net
= current
->nsproxy
->net_ns
;
2568 int delay
= net
->ipv6
.sysctl
.flush_delay
;
2570 proc_dointvec(ctl
, write
, buffer
, lenp
, ppos
);
2571 fib6_run_gc(delay
<= 0 ? ~0UL : (unsigned long)delay
, net
);
2577 ctl_table ipv6_route_table_template
[] = {
2579 .procname
= "flush",
2580 .data
= &init_net
.ipv6
.sysctl
.flush_delay
,
2581 .maxlen
= sizeof(int),
2583 .proc_handler
= ipv6_sysctl_rtcache_flush
2586 .procname
= "gc_thresh",
2587 .data
= &ip6_dst_ops_template
.gc_thresh
,
2588 .maxlen
= sizeof(int),
2590 .proc_handler
= proc_dointvec
,
2593 .procname
= "max_size",
2594 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_max_size
,
2595 .maxlen
= sizeof(int),
2597 .proc_handler
= proc_dointvec
,
2600 .procname
= "gc_min_interval",
2601 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2602 .maxlen
= sizeof(int),
2604 .proc_handler
= proc_dointvec_jiffies
,
2607 .procname
= "gc_timeout",
2608 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_timeout
,
2609 .maxlen
= sizeof(int),
2611 .proc_handler
= proc_dointvec_jiffies
,
2614 .procname
= "gc_interval",
2615 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_interval
,
2616 .maxlen
= sizeof(int),
2618 .proc_handler
= proc_dointvec_jiffies
,
2621 .procname
= "gc_elasticity",
2622 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_elasticity
,
2623 .maxlen
= sizeof(int),
2625 .proc_handler
= proc_dointvec
,
2628 .procname
= "mtu_expires",
2629 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_mtu_expires
,
2630 .maxlen
= sizeof(int),
2632 .proc_handler
= proc_dointvec_jiffies
,
2635 .procname
= "min_adv_mss",
2636 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_min_advmss
,
2637 .maxlen
= sizeof(int),
2639 .proc_handler
= proc_dointvec
,
2642 .procname
= "gc_min_interval_ms",
2643 .data
= &init_net
.ipv6
.sysctl
.ip6_rt_gc_min_interval
,
2644 .maxlen
= sizeof(int),
2646 .proc_handler
= proc_dointvec_ms_jiffies
,
2651 struct ctl_table
* __net_init
ipv6_route_sysctl_init(struct net
*net
)
2653 struct ctl_table
*table
;
2655 table
= kmemdup(ipv6_route_table_template
,
2656 sizeof(ipv6_route_table_template
),
2660 table
[0].data
= &net
->ipv6
.sysctl
.flush_delay
;
2661 table
[1].data
= &net
->ipv6
.ip6_dst_ops
.gc_thresh
;
2662 table
[2].data
= &net
->ipv6
.sysctl
.ip6_rt_max_size
;
2663 table
[3].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2664 table
[4].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_timeout
;
2665 table
[5].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_interval
;
2666 table
[6].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
;
2667 table
[7].data
= &net
->ipv6
.sysctl
.ip6_rt_mtu_expires
;
2668 table
[8].data
= &net
->ipv6
.sysctl
.ip6_rt_min_advmss
;
2669 table
[9].data
= &net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
;
2676 static int __net_init
ip6_route_net_init(struct net
*net
)
2680 memcpy(&net
->ipv6
.ip6_dst_ops
, &ip6_dst_ops_template
,
2681 sizeof(net
->ipv6
.ip6_dst_ops
));
2683 if (dst_entries_init(&net
->ipv6
.ip6_dst_ops
) < 0)
2684 goto out_ip6_dst_ops
;
2686 net
->ipv6
.ip6_null_entry
= kmemdup(&ip6_null_entry_template
,
2687 sizeof(*net
->ipv6
.ip6_null_entry
),
2689 if (!net
->ipv6
.ip6_null_entry
)
2690 goto out_ip6_dst_entries
;
2691 net
->ipv6
.ip6_null_entry
->dst
.path
=
2692 (struct dst_entry
*)net
->ipv6
.ip6_null_entry
;
2693 net
->ipv6
.ip6_null_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2694 dst_metric_set(&net
->ipv6
.ip6_null_entry
->dst
, RTAX_HOPLIMIT
, 255);
2696 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2697 net
->ipv6
.ip6_prohibit_entry
= kmemdup(&ip6_prohibit_entry_template
,
2698 sizeof(*net
->ipv6
.ip6_prohibit_entry
),
2700 if (!net
->ipv6
.ip6_prohibit_entry
)
2701 goto out_ip6_null_entry
;
2702 net
->ipv6
.ip6_prohibit_entry
->dst
.path
=
2703 (struct dst_entry
*)net
->ipv6
.ip6_prohibit_entry
;
2704 net
->ipv6
.ip6_prohibit_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2705 dst_metric_set(&net
->ipv6
.ip6_prohibit_entry
->dst
, RTAX_HOPLIMIT
, 255);
2707 net
->ipv6
.ip6_blk_hole_entry
= kmemdup(&ip6_blk_hole_entry_template
,
2708 sizeof(*net
->ipv6
.ip6_blk_hole_entry
),
2710 if (!net
->ipv6
.ip6_blk_hole_entry
)
2711 goto out_ip6_prohibit_entry
;
2712 net
->ipv6
.ip6_blk_hole_entry
->dst
.path
=
2713 (struct dst_entry
*)net
->ipv6
.ip6_blk_hole_entry
;
2714 net
->ipv6
.ip6_blk_hole_entry
->dst
.ops
= &net
->ipv6
.ip6_dst_ops
;
2715 dst_metric_set(&net
->ipv6
.ip6_blk_hole_entry
->dst
, RTAX_HOPLIMIT
, 255);
2718 net
->ipv6
.sysctl
.flush_delay
= 0;
2719 net
->ipv6
.sysctl
.ip6_rt_max_size
= 4096;
2720 net
->ipv6
.sysctl
.ip6_rt_gc_min_interval
= HZ
/ 2;
2721 net
->ipv6
.sysctl
.ip6_rt_gc_timeout
= 60*HZ
;
2722 net
->ipv6
.sysctl
.ip6_rt_gc_interval
= 30*HZ
;
2723 net
->ipv6
.sysctl
.ip6_rt_gc_elasticity
= 9;
2724 net
->ipv6
.sysctl
.ip6_rt_mtu_expires
= 10*60*HZ
;
2725 net
->ipv6
.sysctl
.ip6_rt_min_advmss
= IPV6_MIN_MTU
- 20 - 40;
2727 #ifdef CONFIG_PROC_FS
2728 proc_net_fops_create(net
, "ipv6_route", 0, &ipv6_route_proc_fops
);
2729 proc_net_fops_create(net
, "rt6_stats", S_IRUGO
, &rt6_stats_seq_fops
);
2731 net
->ipv6
.ip6_rt_gc_expire
= 30*HZ
;
2737 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2738 out_ip6_prohibit_entry
:
2739 kfree(net
->ipv6
.ip6_prohibit_entry
);
2741 kfree(net
->ipv6
.ip6_null_entry
);
2743 out_ip6_dst_entries
:
2744 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2749 static void __net_exit
ip6_route_net_exit(struct net
*net
)
2751 #ifdef CONFIG_PROC_FS
2752 proc_net_remove(net
, "ipv6_route");
2753 proc_net_remove(net
, "rt6_stats");
2755 kfree(net
->ipv6
.ip6_null_entry
);
2756 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2757 kfree(net
->ipv6
.ip6_prohibit_entry
);
2758 kfree(net
->ipv6
.ip6_blk_hole_entry
);
2760 dst_entries_destroy(&net
->ipv6
.ip6_dst_ops
);
2763 static struct pernet_operations ip6_route_net_ops
= {
2764 .init
= ip6_route_net_init
,
2765 .exit
= ip6_route_net_exit
,
2768 static struct notifier_block ip6_route_dev_notifier
= {
2769 .notifier_call
= ip6_route_dev_notify
,
2773 int __init
ip6_route_init(void)
2778 ip6_dst_ops_template
.kmem_cachep
=
2779 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info
), 0,
2780 SLAB_HWCACHE_ALIGN
, NULL
);
2781 if (!ip6_dst_ops_template
.kmem_cachep
)
2784 ret
= dst_entries_init(&ip6_dst_blackhole_ops
);
2786 goto out_kmem_cache
;
2788 ret
= register_pernet_subsys(&ip6_route_net_ops
);
2790 goto out_dst_entries
;
2792 ip6_dst_blackhole_ops
.kmem_cachep
= ip6_dst_ops_template
.kmem_cachep
;
2794 /* Registering of the loopback is done before this portion of code,
2795 * the loopback reference in rt6_info will not be taken, do it
2796 * manually for init_net */
2797 init_net
.ipv6
.ip6_null_entry
->dst
.dev
= init_net
.loopback_dev
;
2798 init_net
.ipv6
.ip6_null_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2799 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2800 init_net
.ipv6
.ip6_prohibit_entry
->dst
.dev
= init_net
.loopback_dev
;
2801 init_net
.ipv6
.ip6_prohibit_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2802 init_net
.ipv6
.ip6_blk_hole_entry
->dst
.dev
= init_net
.loopback_dev
;
2803 init_net
.ipv6
.ip6_blk_hole_entry
->rt6i_idev
= in6_dev_get(init_net
.loopback_dev
);
2807 goto out_register_subsys
;
2813 ret
= fib6_rules_init();
2818 if (__rtnl_register(PF_INET6
, RTM_NEWROUTE
, inet6_rtm_newroute
, NULL
) ||
2819 __rtnl_register(PF_INET6
, RTM_DELROUTE
, inet6_rtm_delroute
, NULL
) ||
2820 __rtnl_register(PF_INET6
, RTM_GETROUTE
, inet6_rtm_getroute
, NULL
))
2821 goto fib6_rules_init
;
2823 ret
= register_netdevice_notifier(&ip6_route_dev_notifier
);
2825 goto fib6_rules_init
;
2831 fib6_rules_cleanup();
2836 out_register_subsys
:
2837 unregister_pernet_subsys(&ip6_route_net_ops
);
2839 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2841 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);
2845 void ip6_route_cleanup(void)
2847 unregister_netdevice_notifier(&ip6_route_dev_notifier
);
2848 fib6_rules_cleanup();
2851 unregister_pernet_subsys(&ip6_route_net_ops
);
2852 dst_entries_destroy(&ip6_dst_blackhole_ops
);
2853 kmem_cache_destroy(ip6_dst_ops_template
.kmem_cachep
);