]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
can: ti_hecc: include linux/io.h
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
21efcfa0
ED
75static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
76 const struct in6_addr *dest);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 79static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
87static int ip6_pkt_discard_out(struct sk_buff *skb);
88static void ip6_link_failure(struct sk_buff *skb);
89static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90
70ceb4f5 91#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 92static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
93 const struct in6_addr *prefix, int prefixlen,
94 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 95 unsigned pref);
efa2cea0 96static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
97 const struct in6_addr *prefix, int prefixlen,
98 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
99#endif
100
06582540
DM
101static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
102{
103 struct rt6_info *rt = (struct rt6_info *) dst;
104 struct inet_peer *peer;
105 u32 *p = NULL;
106
107 if (!rt->rt6i_peer)
108 rt6_bind_peer(rt, 1);
109
110 peer = rt->rt6i_peer;
111 if (peer) {
112 u32 *old_p = __DST_METRICS_PTR(old);
113 unsigned long prev, new;
114
115 p = peer->metrics;
116 if (inet_metrics_new(peer))
117 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
118
119 new = (unsigned long) p;
120 prev = cmpxchg(&dst->_metrics, old, new);
121
122 if (prev != old) {
123 p = __DST_METRICS_PTR(prev);
124 if (prev & DST_METRICS_READ_ONLY)
125 p = NULL;
126 }
127 }
128 return p;
129}
130
d3aaeb38
DM
131static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
132{
133 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
134}
135
9a7ec3a9 136static struct dst_ops ip6_dst_ops_template = {
1da177e4 137 .family = AF_INET6,
09640e63 138 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
139 .gc = ip6_dst_gc,
140 .gc_thresh = 1024,
141 .check = ip6_dst_check,
0dbaee3b 142 .default_advmss = ip6_default_advmss,
d33e4553 143 .default_mtu = ip6_default_mtu,
06582540 144 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
145 .destroy = ip6_dst_destroy,
146 .ifdown = ip6_dst_ifdown,
147 .negative_advice = ip6_negative_advice,
148 .link_failure = ip6_link_failure,
149 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 150 .local_out = __ip6_local_out,
d3aaeb38 151 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
152};
153
ec831ea7
RD
154static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
155{
156 return 0;
157}
158
14e50e57
DM
159static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
160{
161}
162
0972ddb2
HB
163static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
164 unsigned long old)
165{
166 return NULL;
167}
168
14e50e57
DM
169static struct dst_ops ip6_dst_blackhole_ops = {
170 .family = AF_INET6,
09640e63 171 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
172 .destroy = ip6_dst_destroy,
173 .check = ip6_dst_check,
ec831ea7 174 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 175 .default_advmss = ip6_default_advmss,
14e50e57 176 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 177 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 178 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
179};
180
62fa8a84
DM
181static const u32 ip6_template_metrics[RTAX_MAX] = {
182 [RTAX_HOPLIMIT - 1] = 255,
183};
184
bdb3289f 185static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
186 .dst = {
187 .__refcnt = ATOMIC_INIT(1),
188 .__use = 1,
189 .obsolete = -1,
190 .error = -ENETUNREACH,
d8d1f30b
CG
191 .input = ip6_pkt_discard,
192 .output = ip6_pkt_discard_out,
1da177e4
LT
193 },
194 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 195 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
196 .rt6i_metric = ~(u32) 0,
197 .rt6i_ref = ATOMIC_INIT(1),
198};
199
101367c2
TG
200#ifdef CONFIG_IPV6_MULTIPLE_TABLES
201
6723ab54
DM
202static int ip6_pkt_prohibit(struct sk_buff *skb);
203static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 204
280a34c8 205static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EACCES,
d8d1f30b
CG
211 .input = ip6_pkt_prohibit,
212 .output = ip6_pkt_prohibit_out,
101367c2
TG
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 215 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218};
219
bdb3289f 220static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
221 .dst = {
222 .__refcnt = ATOMIC_INIT(1),
223 .__use = 1,
224 .obsolete = -1,
225 .error = -EINVAL,
d8d1f30b
CG
226 .input = dst_discard,
227 .output = dst_discard,
101367c2
TG
228 },
229 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 230 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
231 .rt6i_metric = ~(u32) 0,
232 .rt6i_ref = ATOMIC_INIT(1),
233};
234
235#endif
236
1da177e4 237/* allocate dst with ip6_dst_ops */
5c1e6aa3 238static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
239 struct net_device *dev,
240 int flags)
1da177e4 241{
957c665f 242 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662
DM
243
244 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
245
246 return rt;
1da177e4
LT
247}
248
249static void ip6_dst_destroy(struct dst_entry *dst)
250{
251 struct rt6_info *rt = (struct rt6_info *)dst;
252 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 253 struct inet_peer *peer = rt->rt6i_peer;
1da177e4
LT
254
255 if (idev != NULL) {
256 rt->rt6i_idev = NULL;
257 in6_dev_put(idev);
1ab1457c 258 }
b3419363 259 if (peer) {
b3419363
DM
260 rt->rt6i_peer = NULL;
261 inet_putpeer(peer);
262 }
263}
264
6431cbc2
DM
265static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
266
267static u32 rt6_peer_genid(void)
268{
269 return atomic_read(&__rt6_peer_genid);
270}
271
b3419363
DM
272void rt6_bind_peer(struct rt6_info *rt, int create)
273{
274 struct inet_peer *peer;
275
b3419363
DM
276 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
277 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
278 inet_putpeer(peer);
6431cbc2
DM
279 else
280 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
281}
282
283static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
284 int how)
285{
286 struct rt6_info *rt = (struct rt6_info *)dst;
287 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 288 struct net_device *loopback_dev =
c346dca1 289 dev_net(dev)->loopback_dev;
1da177e4 290
5a3e55d6
DL
291 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
292 struct inet6_dev *loopback_idev =
293 in6_dev_get(loopback_dev);
1da177e4
LT
294 if (loopback_idev != NULL) {
295 rt->rt6i_idev = loopback_idev;
296 in6_dev_put(idev);
297 }
298 }
299}
300
301static __inline__ int rt6_check_expired(const struct rt6_info *rt)
302{
a02cec21
ED
303 return (rt->rt6i_flags & RTF_EXPIRES) &&
304 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
305}
306
b71d1d42 307static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 308{
a02cec21
ED
309 return ipv6_addr_type(daddr) &
310 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
311}
312
1da177e4 313/*
c71099ac 314 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
315 */
316
8ed67789
DL
317static inline struct rt6_info *rt6_device_match(struct net *net,
318 struct rt6_info *rt,
b71d1d42 319 const struct in6_addr *saddr,
1da177e4 320 int oif,
d420895e 321 int flags)
1da177e4
LT
322{
323 struct rt6_info *local = NULL;
324 struct rt6_info *sprt;
325
dd3abc4e
YH
326 if (!oif && ipv6_addr_any(saddr))
327 goto out;
328
d8d1f30b 329 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
330 struct net_device *dev = sprt->rt6i_dev;
331
332 if (oif) {
1da177e4
LT
333 if (dev->ifindex == oif)
334 return sprt;
335 if (dev->flags & IFF_LOOPBACK) {
336 if (sprt->rt6i_idev == NULL ||
337 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 338 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 339 continue;
1ab1457c 340 if (local && (!oif ||
1da177e4
LT
341 local->rt6i_idev->dev->ifindex == oif))
342 continue;
343 }
344 local = sprt;
345 }
dd3abc4e
YH
346 } else {
347 if (ipv6_chk_addr(net, saddr, dev,
348 flags & RT6_LOOKUP_F_IFACE))
349 return sprt;
1da177e4 350 }
dd3abc4e 351 }
1da177e4 352
dd3abc4e 353 if (oif) {
1da177e4
LT
354 if (local)
355 return local;
356
d420895e 357 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 358 return net->ipv6.ip6_null_entry;
1da177e4 359 }
dd3abc4e 360out:
1da177e4
LT
361 return rt;
362}
363
27097255
YH
364#ifdef CONFIG_IPV6_ROUTER_PREF
365static void rt6_probe(struct rt6_info *rt)
366{
f2c31e32 367 struct neighbour *neigh;
27097255
YH
368 /*
369 * Okay, this does not seem to be appropriate
370 * for now, however, we need to check if it
371 * is really so; aka Router Reachability Probing.
372 *
373 * Router Reachability Probe MUST be rate-limited
374 * to no more than one per minute.
375 */
f2c31e32
ED
376 rcu_read_lock();
377 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255 378 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 379 goto out;
27097255
YH
380 read_lock_bh(&neigh->lock);
381 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 382 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
383 struct in6_addr mcaddr;
384 struct in6_addr *target;
385
386 neigh->updated = jiffies;
387 read_unlock_bh(&neigh->lock);
388
389 target = (struct in6_addr *)&neigh->primary_key;
390 addrconf_addr_solict_mult(target, &mcaddr);
391 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 392 } else {
27097255 393 read_unlock_bh(&neigh->lock);
f2c31e32
ED
394 }
395out:
396 rcu_read_unlock();
27097255
YH
397}
398#else
399static inline void rt6_probe(struct rt6_info *rt)
400{
27097255
YH
401}
402#endif
403
1da177e4 404/*
554cfb7e 405 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 406 */
b6f99a21 407static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
408{
409 struct net_device *dev = rt->rt6i_dev;
161980f4 410 if (!oif || dev->ifindex == oif)
554cfb7e 411 return 2;
161980f4
DM
412 if ((dev->flags & IFF_LOOPBACK) &&
413 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
414 return 1;
415 return 0;
554cfb7e 416}
1da177e4 417
b6f99a21 418static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 419{
f2c31e32 420 struct neighbour *neigh;
398bcbeb 421 int m;
f2c31e32
ED
422
423 rcu_read_lock();
424 neigh = dst_get_neighbour(&rt->dst);
4d0c5911
YH
425 if (rt->rt6i_flags & RTF_NONEXTHOP ||
426 !(rt->rt6i_flags & RTF_GATEWAY))
427 m = 1;
428 else if (neigh) {
554cfb7e
YH
429 read_lock_bh(&neigh->lock);
430 if (neigh->nud_state & NUD_VALID)
4d0c5911 431 m = 2;
398bcbeb
YH
432#ifdef CONFIG_IPV6_ROUTER_PREF
433 else if (neigh->nud_state & NUD_FAILED)
434 m = 0;
435#endif
436 else
ea73ee23 437 m = 1;
554cfb7e 438 read_unlock_bh(&neigh->lock);
398bcbeb
YH
439 } else
440 m = 0;
f2c31e32 441 rcu_read_unlock();
554cfb7e 442 return m;
1da177e4
LT
443}
444
554cfb7e
YH
445static int rt6_score_route(struct rt6_info *rt, int oif,
446 int strict)
1da177e4 447{
4d0c5911 448 int m, n;
1ab1457c 449
4d0c5911 450 m = rt6_check_dev(rt, oif);
77d16f45 451 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 452 return -1;
ebacaaa0
YH
453#ifdef CONFIG_IPV6_ROUTER_PREF
454 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
455#endif
4d0c5911 456 n = rt6_check_neigh(rt);
557e92ef 457 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
458 return -1;
459 return m;
460}
461
f11e6659
DM
462static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
463 int *mpri, struct rt6_info *match)
554cfb7e 464{
f11e6659
DM
465 int m;
466
467 if (rt6_check_expired(rt))
468 goto out;
469
470 m = rt6_score_route(rt, oif, strict);
471 if (m < 0)
472 goto out;
473
474 if (m > *mpri) {
475 if (strict & RT6_LOOKUP_F_REACHABLE)
476 rt6_probe(match);
477 *mpri = m;
478 match = rt;
479 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
480 rt6_probe(rt);
481 }
482
483out:
484 return match;
485}
486
487static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
488 struct rt6_info *rr_head,
489 u32 metric, int oif, int strict)
490{
491 struct rt6_info *rt, *match;
554cfb7e 492 int mpri = -1;
1da177e4 493
f11e6659
DM
494 match = NULL;
495 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 496 rt = rt->dst.rt6_next)
f11e6659
DM
497 match = find_match(rt, oif, strict, &mpri, match);
498 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 499 rt = rt->dst.rt6_next)
f11e6659 500 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 501
f11e6659
DM
502 return match;
503}
1da177e4 504
f11e6659
DM
505static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
506{
507 struct rt6_info *match, *rt0;
8ed67789 508 struct net *net;
1da177e4 509
f11e6659 510 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 511 __func__, fn->leaf, oif);
554cfb7e 512
f11e6659
DM
513 rt0 = fn->rr_ptr;
514 if (!rt0)
515 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 516
f11e6659 517 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 518
554cfb7e 519 if (!match &&
f11e6659 520 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 521 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 522
554cfb7e 523 /* no entries matched; do round-robin */
f11e6659
DM
524 if (!next || next->rt6i_metric != rt0->rt6i_metric)
525 next = fn->leaf;
526
527 if (next != rt0)
528 fn->rr_ptr = next;
1da177e4 529 }
1da177e4 530
f11e6659 531 RT6_TRACE("%s() => %p\n",
0dc47877 532 __func__, match);
1da177e4 533
c346dca1 534 net = dev_net(rt0->rt6i_dev);
a02cec21 535 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
536}
537
70ceb4f5
YH
538#ifdef CONFIG_IPV6_ROUTE_INFO
539int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 540 const struct in6_addr *gwaddr)
70ceb4f5 541{
c346dca1 542 struct net *net = dev_net(dev);
70ceb4f5
YH
543 struct route_info *rinfo = (struct route_info *) opt;
544 struct in6_addr prefix_buf, *prefix;
545 unsigned int pref;
4bed72e4 546 unsigned long lifetime;
70ceb4f5
YH
547 struct rt6_info *rt;
548
549 if (len < sizeof(struct route_info)) {
550 return -EINVAL;
551 }
552
553 /* Sanity check for prefix_len and length */
554 if (rinfo->length > 3) {
555 return -EINVAL;
556 } else if (rinfo->prefix_len > 128) {
557 return -EINVAL;
558 } else if (rinfo->prefix_len > 64) {
559 if (rinfo->length < 2) {
560 return -EINVAL;
561 }
562 } else if (rinfo->prefix_len > 0) {
563 if (rinfo->length < 1) {
564 return -EINVAL;
565 }
566 }
567
568 pref = rinfo->route_pref;
569 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 570 return -EINVAL;
70ceb4f5 571
4bed72e4 572 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
573
574 if (rinfo->length == 3)
575 prefix = (struct in6_addr *)rinfo->prefix;
576 else {
577 /* this function is safe */
578 ipv6_addr_prefix(&prefix_buf,
579 (struct in6_addr *)rinfo->prefix,
580 rinfo->prefix_len);
581 prefix = &prefix_buf;
582 }
583
efa2cea0
DL
584 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
585 dev->ifindex);
70ceb4f5
YH
586
587 if (rt && !lifetime) {
e0a1ad73 588 ip6_del_rt(rt);
70ceb4f5
YH
589 rt = NULL;
590 }
591
592 if (!rt && lifetime)
efa2cea0 593 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
594 pref);
595 else if (rt)
596 rt->rt6i_flags = RTF_ROUTEINFO |
597 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
598
599 if (rt) {
4bed72e4 600 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
601 rt->rt6i_flags &= ~RTF_EXPIRES;
602 } else {
603 rt->rt6i_expires = jiffies + HZ * lifetime;
604 rt->rt6i_flags |= RTF_EXPIRES;
605 }
d8d1f30b 606 dst_release(&rt->dst);
70ceb4f5
YH
607 }
608 return 0;
609}
610#endif
611
8ed67789 612#define BACKTRACK(__net, saddr) \
982f56f3 613do { \
8ed67789 614 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 615 struct fib6_node *pn; \
e0eda7bb 616 while (1) { \
982f56f3
YH
617 if (fn->fn_flags & RTN_TL_ROOT) \
618 goto out; \
619 pn = fn->parent; \
620 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 621 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
622 else \
623 fn = pn; \
624 if (fn->fn_flags & RTN_RTINFO) \
625 goto restart; \
c71099ac 626 } \
c71099ac 627 } \
982f56f3 628} while(0)
c71099ac 629
8ed67789
DL
630static struct rt6_info *ip6_pol_route_lookup(struct net *net,
631 struct fib6_table *table,
4c9483b2 632 struct flowi6 *fl6, int flags)
1da177e4
LT
633{
634 struct fib6_node *fn;
635 struct rt6_info *rt;
636
c71099ac 637 read_lock_bh(&table->tb6_lock);
4c9483b2 638 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
639restart:
640 rt = fn->leaf;
4c9483b2
DM
641 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
642 BACKTRACK(net, &fl6->saddr);
c71099ac 643out:
d8d1f30b 644 dst_use(&rt->dst, jiffies);
c71099ac 645 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
646 return rt;
647
648}
649
9acd9f3a
YH
650struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
651 const struct in6_addr *saddr, int oif, int strict)
c71099ac 652{
4c9483b2
DM
653 struct flowi6 fl6 = {
654 .flowi6_oif = oif,
655 .daddr = *daddr,
c71099ac
TG
656 };
657 struct dst_entry *dst;
77d16f45 658 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 659
adaa70bb 660 if (saddr) {
4c9483b2 661 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
662 flags |= RT6_LOOKUP_F_HAS_SADDR;
663 }
664
4c9483b2 665 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
666 if (dst->error == 0)
667 return (struct rt6_info *) dst;
668
669 dst_release(dst);
670
1da177e4
LT
671 return NULL;
672}
673
7159039a
YH
674EXPORT_SYMBOL(rt6_lookup);
675
c71099ac 676/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
677 It takes new route entry, the addition fails by any reason the
678 route is freed. In any case, if caller does not hold it, it may
679 be destroyed.
680 */
681
86872cb5 682static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
683{
684 int err;
c71099ac 685 struct fib6_table *table;
1da177e4 686
c71099ac
TG
687 table = rt->rt6i_table;
688 write_lock_bh(&table->tb6_lock);
86872cb5 689 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 690 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
691
692 return err;
693}
694
40e22e8f
TG
695int ip6_ins_rt(struct rt6_info *rt)
696{
4d1169c1 697 struct nl_info info = {
c346dca1 698 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 699 };
528c4ceb 700 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
701}
702
21efcfa0
ED
703static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
704 const struct in6_addr *daddr,
b71d1d42 705 const struct in6_addr *saddr)
1da177e4 706{
1da177e4
LT
707 struct rt6_info *rt;
708
709 /*
710 * Clone the route.
711 */
712
21efcfa0 713 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
714
715 if (rt) {
14deae41
DM
716 struct neighbour *neigh;
717 int attempts = !in_softirq();
718
58c4fb86
YH
719 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
720 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 721 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 722 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 723 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 724 }
1da177e4
LT
725
726 rt->rt6i_dst.plen = 128;
727 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 728 rt->dst.flags |= DST_HOST;
1da177e4
LT
729
730#ifdef CONFIG_IPV6_SUBTREES
731 if (rt->rt6i_src.plen && saddr) {
732 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
733 rt->rt6i_src.plen = 128;
734 }
735#endif
736
14deae41
DM
737 retry:
738 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
739 if (IS_ERR(neigh)) {
740 struct net *net = dev_net(rt->rt6i_dev);
741 int saved_rt_min_interval =
742 net->ipv6.sysctl.ip6_rt_gc_min_interval;
743 int saved_rt_elasticity =
744 net->ipv6.sysctl.ip6_rt_gc_elasticity;
745
746 if (attempts-- > 0) {
747 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
748 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
749
86393e52 750 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
751
752 net->ipv6.sysctl.ip6_rt_gc_elasticity =
753 saved_rt_elasticity;
754 net->ipv6.sysctl.ip6_rt_gc_min_interval =
755 saved_rt_min_interval;
756 goto retry;
757 }
758
759 if (net_ratelimit())
760 printk(KERN_WARNING
7e1b33e5 761 "ipv6: Neighbour table overflow.\n");
d8d1f30b 762 dst_free(&rt->dst);
14deae41
DM
763 return NULL;
764 }
69cce1d1 765 dst_set_neighbour(&rt->dst, neigh);
1da177e4 766
95a9a5ba 767 }
1da177e4 768
95a9a5ba
YH
769 return rt;
770}
1da177e4 771
21efcfa0
ED
772static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
773 const struct in6_addr *daddr)
299d9939 774{
21efcfa0
ED
775 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
776
299d9939 777 if (rt) {
299d9939
YH
778 rt->rt6i_dst.plen = 128;
779 rt->rt6i_flags |= RTF_CACHE;
d8d1f30b 780 rt->dst.flags |= DST_HOST;
f2c31e32 781 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
299d9939
YH
782 }
783 return rt;
784}
785
8ed67789 786static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 787 struct flowi6 *fl6, int flags)
1da177e4
LT
788{
789 struct fib6_node *fn;
519fbd87 790 struct rt6_info *rt, *nrt;
c71099ac 791 int strict = 0;
1da177e4 792 int attempts = 3;
519fbd87 793 int err;
53b7997f 794 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 795
77d16f45 796 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
797
798relookup:
c71099ac 799 read_lock_bh(&table->tb6_lock);
1da177e4 800
8238dd06 801restart_2:
4c9483b2 802 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
803
804restart:
4acad72d 805 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 806
4c9483b2 807 BACKTRACK(net, &fl6->saddr);
8ed67789 808 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 809 rt->rt6i_flags & RTF_CACHE)
1ddef044 810 goto out;
1da177e4 811
d8d1f30b 812 dst_hold(&rt->dst);
c71099ac 813 read_unlock_bh(&table->tb6_lock);
fb9de91e 814
f2c31e32 815 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 816 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 817 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 818 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
819 else
820 goto out2;
e40cf353 821
d8d1f30b 822 dst_release(&rt->dst);
8ed67789 823 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 824
d8d1f30b 825 dst_hold(&rt->dst);
519fbd87 826 if (nrt) {
40e22e8f 827 err = ip6_ins_rt(nrt);
519fbd87 828 if (!err)
1da177e4 829 goto out2;
1da177e4 830 }
1da177e4 831
519fbd87
YH
832 if (--attempts <= 0)
833 goto out2;
834
835 /*
c71099ac 836 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
837 * released someone could insert this route. Relookup.
838 */
d8d1f30b 839 dst_release(&rt->dst);
519fbd87
YH
840 goto relookup;
841
842out:
8238dd06
YH
843 if (reachable) {
844 reachable = 0;
845 goto restart_2;
846 }
d8d1f30b 847 dst_hold(&rt->dst);
c71099ac 848 read_unlock_bh(&table->tb6_lock);
1da177e4 849out2:
d8d1f30b
CG
850 rt->dst.lastuse = jiffies;
851 rt->dst.__use++;
c71099ac
TG
852
853 return rt;
1da177e4
LT
854}
855
8ed67789 856static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 857 struct flowi6 *fl6, int flags)
4acad72d 858{
4c9483b2 859 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
860}
861
c71099ac
TG
862void ip6_route_input(struct sk_buff *skb)
863{
b71d1d42 864 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 865 struct net *net = dev_net(skb->dev);
adaa70bb 866 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
867 struct flowi6 fl6 = {
868 .flowi6_iif = skb->dev->ifindex,
869 .daddr = iph->daddr,
870 .saddr = iph->saddr,
871 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
872 .flowi6_mark = skb->mark,
873 .flowi6_proto = iph->nexthdr,
c71099ac 874 };
adaa70bb 875
1d6e55f1 876 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 877 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 878
4c9483b2 879 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
880}
881
8ed67789 882static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 883 struct flowi6 *fl6, int flags)
1da177e4 884{
4c9483b2 885 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
886}
887
9c7a4f9c 888struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 889 struct flowi6 *fl6)
c71099ac
TG
890{
891 int flags = 0;
892
4c9483b2 893 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 894 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 895
4c9483b2 896 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 897 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
898 else if (sk)
899 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 900
4c9483b2 901 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
902}
903
7159039a 904EXPORT_SYMBOL(ip6_route_output);
1da177e4 905
2774c131 906struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 907{
5c1e6aa3 908 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
909 struct dst_entry *new = NULL;
910
5c1e6aa3 911 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 912 if (rt) {
cf911662
DM
913 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
914
d8d1f30b 915 new = &rt->dst;
14e50e57 916
14e50e57 917 new->__use = 1;
352e512c
HX
918 new->input = dst_discard;
919 new->output = dst_discard;
14e50e57 920
21efcfa0
ED
921 if (dst_metrics_read_only(&ort->dst))
922 new->_metrics = ort->dst._metrics;
923 else
924 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
925 rt->rt6i_idev = ort->rt6i_idev;
926 if (rt->rt6i_idev)
927 in6_dev_hold(rt->rt6i_idev);
928 rt->rt6i_expires = 0;
929
930 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
931 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
932 rt->rt6i_metric = 0;
933
934 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
935#ifdef CONFIG_IPV6_SUBTREES
936 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
937#endif
938
939 dst_free(new);
940 }
941
69ead7af
DM
942 dst_release(dst_orig);
943 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 944}
14e50e57 945
1da177e4
LT
946/*
947 * Destination cache support functions
948 */
949
950static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
951{
952 struct rt6_info *rt;
953
954 rt = (struct rt6_info *) dst;
955
6431cbc2
DM
956 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
957 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
958 if (!rt->rt6i_peer)
959 rt6_bind_peer(rt, 0);
960 rt->rt6i_peer_genid = rt6_peer_genid();
961 }
1da177e4 962 return dst;
6431cbc2 963 }
1da177e4
LT
964 return NULL;
965}
966
967static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
968{
969 struct rt6_info *rt = (struct rt6_info *) dst;
970
971 if (rt) {
54c1a859
YH
972 if (rt->rt6i_flags & RTF_CACHE) {
973 if (rt6_check_expired(rt)) {
974 ip6_del_rt(rt);
975 dst = NULL;
976 }
977 } else {
1da177e4 978 dst_release(dst);
54c1a859
YH
979 dst = NULL;
980 }
1da177e4 981 }
54c1a859 982 return dst;
1da177e4
LT
983}
984
985static void ip6_link_failure(struct sk_buff *skb)
986{
987 struct rt6_info *rt;
988
3ffe533c 989 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 990
adf30907 991 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
992 if (rt) {
993 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 994 dst_set_expires(&rt->dst, 0);
1da177e4
LT
995 rt->rt6i_flags |= RTF_EXPIRES;
996 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
997 rt->rt6i_node->fn_sernum = -1;
998 }
999}
1000
1001static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1002{
1003 struct rt6_info *rt6 = (struct rt6_info*)dst;
1004
1005 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1006 rt6->rt6i_flags |= RTF_MODIFIED;
1007 if (mtu < IPV6_MIN_MTU) {
defb3519 1008 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1009 mtu = IPV6_MIN_MTU;
defb3519
DM
1010 features |= RTAX_FEATURE_ALLFRAG;
1011 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1012 }
defb3519 1013 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1014 }
1015}
1016
0dbaee3b 1017static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1018{
0dbaee3b
DM
1019 struct net_device *dev = dst->dev;
1020 unsigned int mtu = dst_mtu(dst);
1021 struct net *net = dev_net(dev);
1022
1da177e4
LT
1023 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1024
5578689a
DL
1025 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1026 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1027
1028 /*
1ab1457c
YH
1029 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1030 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1031 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1032 * rely only on pmtu discovery"
1033 */
1034 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1035 mtu = IPV6_MAXPLEN;
1036 return mtu;
1037}
1038
d33e4553
DM
1039static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1040{
1041 unsigned int mtu = IPV6_MIN_MTU;
1042 struct inet6_dev *idev;
1043
1044 rcu_read_lock();
1045 idev = __in6_dev_get(dst->dev);
1046 if (idev)
1047 mtu = idev->cnf.mtu6;
1048 rcu_read_unlock();
1049
1050 return mtu;
1051}
1052
3b00944c
YH
1053static struct dst_entry *icmp6_dst_gc_list;
1054static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1055
3b00944c 1056struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1057 struct neighbour *neigh,
9acd9f3a 1058 const struct in6_addr *addr)
1da177e4
LT
1059{
1060 struct rt6_info *rt;
1061 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1062 struct net *net = dev_net(dev);
1da177e4
LT
1063
1064 if (unlikely(idev == NULL))
1065 return NULL;
1066
957c665f 1067 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1da177e4
LT
1068 if (unlikely(rt == NULL)) {
1069 in6_dev_put(idev);
1070 goto out;
1071 }
1072
1da177e4
LT
1073 if (neigh)
1074 neigh_hold(neigh);
14deae41 1075 else {
1da177e4 1076 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1077 if (IS_ERR(neigh))
1078 neigh = NULL;
1079 }
1da177e4 1080
1da177e4 1081 rt->rt6i_idev = idev;
69cce1d1 1082 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1083 atomic_set(&rt->dst.__refcnt, 1);
21efcfa0 1084 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
defb3519 1085 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
d8d1f30b 1086 rt->dst.output = ip6_output;
1da177e4 1087
3b00944c 1088 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1089 rt->dst.next = icmp6_dst_gc_list;
1090 icmp6_dst_gc_list = &rt->dst;
3b00944c 1091 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1092
5578689a 1093 fib6_force_start_gc(net);
1da177e4
LT
1094
1095out:
d8d1f30b 1096 return &rt->dst;
1da177e4
LT
1097}
1098
3d0f24a7 1099int icmp6_dst_gc(void)
1da177e4 1100{
e9476e95 1101 struct dst_entry *dst, **pprev;
3d0f24a7 1102 int more = 0;
1da177e4 1103
3b00944c
YH
1104 spin_lock_bh(&icmp6_dst_lock);
1105 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1106
1da177e4
LT
1107 while ((dst = *pprev) != NULL) {
1108 if (!atomic_read(&dst->__refcnt)) {
1109 *pprev = dst->next;
1110 dst_free(dst);
1da177e4
LT
1111 } else {
1112 pprev = &dst->next;
3d0f24a7 1113 ++more;
1da177e4
LT
1114 }
1115 }
1116
3b00944c 1117 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1118
3d0f24a7 1119 return more;
1da177e4
LT
1120}
1121
1e493d19
DM
1122static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1123 void *arg)
1124{
1125 struct dst_entry *dst, **pprev;
1126
1127 spin_lock_bh(&icmp6_dst_lock);
1128 pprev = &icmp6_dst_gc_list;
1129 while ((dst = *pprev) != NULL) {
1130 struct rt6_info *rt = (struct rt6_info *) dst;
1131 if (func(rt, arg)) {
1132 *pprev = dst->next;
1133 dst_free(dst);
1134 } else {
1135 pprev = &dst->next;
1136 }
1137 }
1138 spin_unlock_bh(&icmp6_dst_lock);
1139}
1140
569d3645 1141static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1142{
1da177e4 1143 unsigned long now = jiffies;
86393e52 1144 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1145 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1146 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1147 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1148 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1149 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1150 int entries;
7019b78e 1151
fc66f95c 1152 entries = dst_entries_get_fast(ops);
7019b78e 1153 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1154 entries <= rt_max_size)
1da177e4
LT
1155 goto out;
1156
6891a346
BT
1157 net->ipv6.ip6_rt_gc_expire++;
1158 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1159 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1160 entries = dst_entries_get_slow(ops);
1161 if (entries < ops->gc_thresh)
7019b78e 1162 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1163out:
7019b78e 1164 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1165 return entries > rt_max_size;
1da177e4
LT
1166}
1167
1168/* Clean host part of a prefix. Not necessary in radix tree,
1169 but results in cleaner routing tables.
1170
1171 Remove it only when all the things will work!
1172 */
1173
6b75d090 1174int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1175{
5170ae82 1176 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1177 if (hoplimit == 0) {
6b75d090 1178 struct net_device *dev = dst->dev;
c68f24cc
ED
1179 struct inet6_dev *idev;
1180
1181 rcu_read_lock();
1182 idev = __in6_dev_get(dev);
1183 if (idev)
6b75d090 1184 hoplimit = idev->cnf.hop_limit;
c68f24cc 1185 else
53b7997f 1186 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1187 rcu_read_unlock();
1da177e4
LT
1188 }
1189 return hoplimit;
1190}
abbf46ae 1191EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1192
1193/*
1194 *
1195 */
1196
86872cb5 1197int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1198{
1199 int err;
5578689a 1200 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1201 struct rt6_info *rt = NULL;
1202 struct net_device *dev = NULL;
1203 struct inet6_dev *idev = NULL;
c71099ac 1204 struct fib6_table *table;
1da177e4
LT
1205 int addr_type;
1206
86872cb5 1207 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1208 return -EINVAL;
1209#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1210 if (cfg->fc_src_len)
1da177e4
LT
1211 return -EINVAL;
1212#endif
86872cb5 1213 if (cfg->fc_ifindex) {
1da177e4 1214 err = -ENODEV;
5578689a 1215 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1216 if (!dev)
1217 goto out;
1218 idev = in6_dev_get(dev);
1219 if (!idev)
1220 goto out;
1221 }
1222
86872cb5
TG
1223 if (cfg->fc_metric == 0)
1224 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1225
5578689a 1226 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1227 if (table == NULL) {
1228 err = -ENOBUFS;
1229 goto out;
1230 }
1231
957c665f 1232 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4
LT
1233
1234 if (rt == NULL) {
1235 err = -ENOMEM;
1236 goto out;
1237 }
1238
d8d1f30b 1239 rt->dst.obsolete = -1;
6f704992
YH
1240 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1241 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1242 0;
1da177e4 1243
86872cb5
TG
1244 if (cfg->fc_protocol == RTPROT_UNSPEC)
1245 cfg->fc_protocol = RTPROT_BOOT;
1246 rt->rt6i_protocol = cfg->fc_protocol;
1247
1248 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1249
1250 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1251 rt->dst.input = ip6_mc_input;
ab79ad14
1252 else if (cfg->fc_flags & RTF_LOCAL)
1253 rt->dst.input = ip6_input;
1da177e4 1254 else
d8d1f30b 1255 rt->dst.input = ip6_forward;
1da177e4 1256
d8d1f30b 1257 rt->dst.output = ip6_output;
1da177e4 1258
86872cb5
TG
1259 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1260 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1261 if (rt->rt6i_dst.plen == 128)
11d53b49 1262 rt->dst.flags |= DST_HOST;
1da177e4
LT
1263
1264#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1265 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1266 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1267#endif
1268
86872cb5 1269 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1270
1271 /* We cannot add true routes via loopback here,
1272 they would result in kernel looping; promote them to reject routes
1273 */
86872cb5 1274 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1275 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1276 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1277 /* hold loopback dev/idev if we haven't done so. */
5578689a 1278 if (dev != net->loopback_dev) {
1da177e4
LT
1279 if (dev) {
1280 dev_put(dev);
1281 in6_dev_put(idev);
1282 }
5578689a 1283 dev = net->loopback_dev;
1da177e4
LT
1284 dev_hold(dev);
1285 idev = in6_dev_get(dev);
1286 if (!idev) {
1287 err = -ENODEV;
1288 goto out;
1289 }
1290 }
d8d1f30b
CG
1291 rt->dst.output = ip6_pkt_discard_out;
1292 rt->dst.input = ip6_pkt_discard;
1293 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1294 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1295 goto install_route;
1296 }
1297
86872cb5 1298 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1299 const struct in6_addr *gw_addr;
1da177e4
LT
1300 int gwa_type;
1301
86872cb5
TG
1302 gw_addr = &cfg->fc_gateway;
1303 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1304 gwa_type = ipv6_addr_type(gw_addr);
1305
1306 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1307 struct rt6_info *grt;
1308
1309 /* IPv6 strictly inhibits using not link-local
1310 addresses as nexthop address.
1311 Otherwise, router will not able to send redirects.
1312 It is very good, but in some (rare!) circumstances
1313 (SIT, PtP, NBMA NOARP links) it is handy to allow
1314 some exceptions. --ANK
1315 */
1316 err = -EINVAL;
1317 if (!(gwa_type&IPV6_ADDR_UNICAST))
1318 goto out;
1319
5578689a 1320 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1321
1322 err = -EHOSTUNREACH;
1323 if (grt == NULL)
1324 goto out;
1325 if (dev) {
1326 if (dev != grt->rt6i_dev) {
d8d1f30b 1327 dst_release(&grt->dst);
1da177e4
LT
1328 goto out;
1329 }
1330 } else {
1331 dev = grt->rt6i_dev;
1332 idev = grt->rt6i_idev;
1333 dev_hold(dev);
1334 in6_dev_hold(grt->rt6i_idev);
1335 }
1336 if (!(grt->rt6i_flags&RTF_GATEWAY))
1337 err = 0;
d8d1f30b 1338 dst_release(&grt->dst);
1da177e4
LT
1339
1340 if (err)
1341 goto out;
1342 }
1343 err = -EINVAL;
1344 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1345 goto out;
1346 }
1347
1348 err = -ENODEV;
1349 if (dev == NULL)
1350 goto out;
1351
c3968a85
DW
1352 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1353 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1354 err = -EINVAL;
1355 goto out;
1356 }
1357 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1358 rt->rt6i_prefsrc.plen = 128;
1359 } else
1360 rt->rt6i_prefsrc.plen = 0;
1361
86872cb5 1362 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1363 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1364 if (IS_ERR(n)) {
1365 err = PTR_ERR(n);
1da177e4
LT
1366 goto out;
1367 }
69cce1d1 1368 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1369 }
1370
86872cb5 1371 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1372
1373install_route:
86872cb5
TG
1374 if (cfg->fc_mx) {
1375 struct nlattr *nla;
1376 int remaining;
1377
1378 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1379 int type = nla_type(nla);
86872cb5
TG
1380
1381 if (type) {
1382 if (type > RTAX_MAX) {
1da177e4
LT
1383 err = -EINVAL;
1384 goto out;
1385 }
86872cb5 1386
defb3519 1387 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1388 }
1da177e4
LT
1389 }
1390 }
1391
d8d1f30b 1392 rt->dst.dev = dev;
1da177e4 1393 rt->rt6i_idev = idev;
c71099ac 1394 rt->rt6i_table = table;
63152fc0 1395
c346dca1 1396 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1397
86872cb5 1398 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1399
1400out:
1401 if (dev)
1402 dev_put(dev);
1403 if (idev)
1404 in6_dev_put(idev);
1405 if (rt)
d8d1f30b 1406 dst_free(&rt->dst);
1da177e4
LT
1407 return err;
1408}
1409
86872cb5 1410static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1411{
1412 int err;
c71099ac 1413 struct fib6_table *table;
c346dca1 1414 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1415
8ed67789 1416 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1417 return -ENOENT;
1418
c71099ac
TG
1419 table = rt->rt6i_table;
1420 write_lock_bh(&table->tb6_lock);
1da177e4 1421
86872cb5 1422 err = fib6_del(rt, info);
d8d1f30b 1423 dst_release(&rt->dst);
1da177e4 1424
c71099ac 1425 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1426
1427 return err;
1428}
1429
e0a1ad73
TG
1430int ip6_del_rt(struct rt6_info *rt)
1431{
4d1169c1 1432 struct nl_info info = {
c346dca1 1433 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1434 };
528c4ceb 1435 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1436}
1437
86872cb5 1438static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1439{
c71099ac 1440 struct fib6_table *table;
1da177e4
LT
1441 struct fib6_node *fn;
1442 struct rt6_info *rt;
1443 int err = -ESRCH;
1444
5578689a 1445 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1446 if (table == NULL)
1447 return err;
1448
1449 read_lock_bh(&table->tb6_lock);
1da177e4 1450
c71099ac 1451 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1452 &cfg->fc_dst, cfg->fc_dst_len,
1453 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1454
1da177e4 1455 if (fn) {
d8d1f30b 1456 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1457 if (cfg->fc_ifindex &&
1da177e4 1458 (rt->rt6i_dev == NULL ||
86872cb5 1459 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1460 continue;
86872cb5
TG
1461 if (cfg->fc_flags & RTF_GATEWAY &&
1462 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1463 continue;
86872cb5 1464 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1465 continue;
d8d1f30b 1466 dst_hold(&rt->dst);
c71099ac 1467 read_unlock_bh(&table->tb6_lock);
1da177e4 1468
86872cb5 1469 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1470 }
1471 }
c71099ac 1472 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1473
1474 return err;
1475}
1476
1477/*
1478 * Handle redirects
1479 */
a6279458 1480struct ip6rd_flowi {
4c9483b2 1481 struct flowi6 fl6;
a6279458
YH
1482 struct in6_addr gateway;
1483};
1484
8ed67789
DL
1485static struct rt6_info *__ip6_route_redirect(struct net *net,
1486 struct fib6_table *table,
4c9483b2 1487 struct flowi6 *fl6,
a6279458 1488 int flags)
1da177e4 1489{
4c9483b2 1490 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1491 struct rt6_info *rt;
e843b9e1 1492 struct fib6_node *fn;
c71099ac 1493
1da177e4 1494 /*
e843b9e1
YH
1495 * Get the "current" route for this destination and
1496 * check if the redirect has come from approriate router.
1497 *
1498 * RFC 2461 specifies that redirects should only be
1499 * accepted if they come from the nexthop to the target.
1500 * Due to the way the routes are chosen, this notion
1501 * is a bit fuzzy and one might need to check all possible
1502 * routes.
1da177e4 1503 */
1da177e4 1504
c71099ac 1505 read_lock_bh(&table->tb6_lock);
4c9483b2 1506 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1507restart:
d8d1f30b 1508 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1509 /*
1510 * Current route is on-link; redirect is always invalid.
1511 *
1512 * Seems, previous statement is not true. It could
1513 * be node, which looks for us as on-link (f.e. proxy ndisc)
1514 * But then router serving it might decide, that we should
1515 * know truth 8)8) --ANK (980726).
1516 */
1517 if (rt6_check_expired(rt))
1518 continue;
1519 if (!(rt->rt6i_flags & RTF_GATEWAY))
1520 continue;
4c9483b2 1521 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1522 continue;
a6279458 1523 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1524 continue;
1525 break;
1526 }
a6279458 1527
cb15d9c2 1528 if (!rt)
8ed67789 1529 rt = net->ipv6.ip6_null_entry;
4c9483b2 1530 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1531out:
d8d1f30b 1532 dst_hold(&rt->dst);
a6279458 1533
c71099ac 1534 read_unlock_bh(&table->tb6_lock);
e843b9e1 1535
a6279458
YH
1536 return rt;
1537};
1538
b71d1d42
ED
1539static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1540 const struct in6_addr *src,
1541 const struct in6_addr *gateway,
a6279458
YH
1542 struct net_device *dev)
1543{
adaa70bb 1544 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1545 struct net *net = dev_net(dev);
a6279458 1546 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1547 .fl6 = {
1548 .flowi6_oif = dev->ifindex,
1549 .daddr = *dest,
1550 .saddr = *src,
a6279458 1551 },
a6279458 1552 };
adaa70bb 1553
86c36ce4
BH
1554 ipv6_addr_copy(&rdfl.gateway, gateway);
1555
adaa70bb
TG
1556 if (rt6_need_strict(dest))
1557 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1558
4c9483b2 1559 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1560 flags, __ip6_route_redirect);
a6279458
YH
1561}
1562
b71d1d42
ED
1563void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1564 const struct in6_addr *saddr,
a6279458
YH
1565 struct neighbour *neigh, u8 *lladdr, int on_link)
1566{
1567 struct rt6_info *rt, *nrt = NULL;
1568 struct netevent_redirect netevent;
c346dca1 1569 struct net *net = dev_net(neigh->dev);
a6279458
YH
1570
1571 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1572
8ed67789 1573 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1574 if (net_ratelimit())
1575 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1576 "for redirect target\n");
a6279458 1577 goto out;
1da177e4
LT
1578 }
1579
1da177e4
LT
1580 /*
1581 * We have finally decided to accept it.
1582 */
1583
1ab1457c 1584 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1585 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1586 NEIGH_UPDATE_F_OVERRIDE|
1587 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1588 NEIGH_UPDATE_F_ISROUTER))
1589 );
1590
1591 /*
1592 * Redirect received -> path was valid.
1593 * Look, redirects are sent only in response to data packets,
1594 * so that this nexthop apparently is reachable. --ANK
1595 */
d8d1f30b 1596 dst_confirm(&rt->dst);
1da177e4
LT
1597
1598 /* Duplicate redirect: silently ignore. */
f2c31e32 1599 if (neigh == dst_get_neighbour_raw(&rt->dst))
1da177e4
LT
1600 goto out;
1601
21efcfa0 1602 nrt = ip6_rt_copy(rt, dest);
1da177e4
LT
1603 if (nrt == NULL)
1604 goto out;
1605
1606 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1607 if (on_link)
1608 nrt->rt6i_flags &= ~RTF_GATEWAY;
1609
1da177e4 1610 nrt->rt6i_dst.plen = 128;
d8d1f30b 1611 nrt->dst.flags |= DST_HOST;
1da177e4
LT
1612
1613 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
69cce1d1 1614 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1615
40e22e8f 1616 if (ip6_ins_rt(nrt))
1da177e4
LT
1617 goto out;
1618
d8d1f30b
CG
1619 netevent.old = &rt->dst;
1620 netevent.new = &nrt->dst;
8d71740c
TT
1621 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1622
1da177e4 1623 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1624 ip6_del_rt(rt);
1da177e4
LT
1625 return;
1626 }
1627
1628out:
d8d1f30b 1629 dst_release(&rt->dst);
1da177e4
LT
1630}
1631
1632/*
1633 * Handle ICMP "packet too big" messages
1634 * i.e. Path MTU discovery
1635 */
1636
b71d1d42 1637static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1638 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1639{
1640 struct rt6_info *rt, *nrt;
1641 int allfrag = 0;
d3052b55 1642again:
ae878ae2 1643 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1644 if (rt == NULL)
1645 return;
1646
d3052b55
AV
1647 if (rt6_check_expired(rt)) {
1648 ip6_del_rt(rt);
1649 goto again;
1650 }
1651
d8d1f30b 1652 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1653 goto out;
1654
1655 if (pmtu < IPV6_MIN_MTU) {
1656 /*
1ab1457c 1657 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1658 * MTU (1280) and a fragment header should always be included
1659 * after a node receiving Too Big message reporting PMTU is
1660 * less than the IPv6 Minimum Link MTU.
1661 */
1662 pmtu = IPV6_MIN_MTU;
1663 allfrag = 1;
1664 }
1665
1666 /* New mtu received -> path was valid.
1667 They are sent only in response to data packets,
1668 so that this nexthop apparently is reachable. --ANK
1669 */
d8d1f30b 1670 dst_confirm(&rt->dst);
1da177e4
LT
1671
1672 /* Host route. If it is static, it would be better
1673 not to override it, but add new one, so that
1674 when cache entry will expire old pmtu
1675 would return automatically.
1676 */
1677 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1678 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1679 if (allfrag) {
1680 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1681 features |= RTAX_FEATURE_ALLFRAG;
1682 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1683 }
d8d1f30b 1684 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1685 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1686 goto out;
1687 }
1688
1689 /* Network route.
1690 Two cases are possible:
1691 1. It is connected route. Action: COW
1692 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1693 */
f2c31e32 1694 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1695 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1696 else
1697 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1698
d5315b50 1699 if (nrt) {
defb3519
DM
1700 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1701 if (allfrag) {
1702 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1703 features |= RTAX_FEATURE_ALLFRAG;
1704 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1705 }
a1e78363
YH
1706
1707 /* According to RFC 1981, detecting PMTU increase shouldn't be
1708 * happened within 5 mins, the recommended timer is 10 mins.
1709 * Here this route expiration time is set to ip6_rt_mtu_expires
1710 * which is 10 mins. After 10 mins the decreased pmtu is expired
1711 * and detecting PMTU increase will be automatically happened.
1712 */
d8d1f30b 1713 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1714 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1715
40e22e8f 1716 ip6_ins_rt(nrt);
1da177e4 1717 }
1da177e4 1718out:
d8d1f30b 1719 dst_release(&rt->dst);
1da177e4
LT
1720}
1721
b71d1d42 1722void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1723 struct net_device *dev, u32 pmtu)
1724{
1725 struct net *net = dev_net(dev);
1726
1727 /*
1728 * RFC 1981 states that a node "MUST reduce the size of the packets it
1729 * is sending along the path" that caused the Packet Too Big message.
1730 * Since it's not possible in the general case to determine which
1731 * interface was used to send the original packet, we update the MTU
1732 * on the interface that will be used to send future packets. We also
1733 * update the MTU on the interface that received the Packet Too Big in
1734 * case the original packet was forced out that interface with
1735 * SO_BINDTODEVICE or similar. This is the next best thing to the
1736 * correct behaviour, which would be to update the MTU on all
1737 * interfaces.
1738 */
1739 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1740 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1741}
1742
1da177e4
LT
1743/*
1744 * Misc support functions
1745 */
1746
21efcfa0
ED
1747static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1748 const struct in6_addr *dest)
1da177e4 1749{
c346dca1 1750 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1751 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1752 ort->dst.dev, 0);
1da177e4
LT
1753
1754 if (rt) {
d8d1f30b
CG
1755 rt->dst.input = ort->dst.input;
1756 rt->dst.output = ort->dst.output;
1757
21efcfa0
ED
1758 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
1759 rt->rt6i_dst.plen = ort->rt6i_dst.plen;
defb3519 1760 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1761 rt->dst.error = ort->dst.error;
1da177e4
LT
1762 rt->rt6i_idev = ort->rt6i_idev;
1763 if (rt->rt6i_idev)
1764 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1765 rt->dst.lastuse = jiffies;
1da177e4
LT
1766 rt->rt6i_expires = 0;
1767
1768 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1769 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1770 rt->rt6i_metric = 0;
1771
1da177e4
LT
1772#ifdef CONFIG_IPV6_SUBTREES
1773 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1774#endif
0f6c6392 1775 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1776 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1777 }
1778 return rt;
1779}
1780
70ceb4f5 1781#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1782static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1783 const struct in6_addr *prefix, int prefixlen,
1784 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1785{
1786 struct fib6_node *fn;
1787 struct rt6_info *rt = NULL;
c71099ac
TG
1788 struct fib6_table *table;
1789
efa2cea0 1790 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1791 if (table == NULL)
1792 return NULL;
70ceb4f5 1793
c71099ac
TG
1794 write_lock_bh(&table->tb6_lock);
1795 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1796 if (!fn)
1797 goto out;
1798
d8d1f30b 1799 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1800 if (rt->rt6i_dev->ifindex != ifindex)
1801 continue;
1802 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1803 continue;
1804 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1805 continue;
d8d1f30b 1806 dst_hold(&rt->dst);
70ceb4f5
YH
1807 break;
1808 }
1809out:
c71099ac 1810 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1811 return rt;
1812}
1813
efa2cea0 1814static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1815 const struct in6_addr *prefix, int prefixlen,
1816 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1817 unsigned pref)
1818{
86872cb5
TG
1819 struct fib6_config cfg = {
1820 .fc_table = RT6_TABLE_INFO,
238fc7ea 1821 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1822 .fc_ifindex = ifindex,
1823 .fc_dst_len = prefixlen,
1824 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1825 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1826 .fc_nlinfo.pid = 0,
1827 .fc_nlinfo.nlh = NULL,
1828 .fc_nlinfo.nl_net = net,
86872cb5
TG
1829 };
1830
1831 ipv6_addr_copy(&cfg.fc_dst, prefix);
1832 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1833
e317da96
YH
1834 /* We should treat it as a default route if prefix length is 0. */
1835 if (!prefixlen)
86872cb5 1836 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1837
86872cb5 1838 ip6_route_add(&cfg);
70ceb4f5 1839
efa2cea0 1840 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1841}
1842#endif
1843
b71d1d42 1844struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1845{
1da177e4 1846 struct rt6_info *rt;
c71099ac 1847 struct fib6_table *table;
1da177e4 1848
c346dca1 1849 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1850 if (table == NULL)
1851 return NULL;
1da177e4 1852
c71099ac 1853 write_lock_bh(&table->tb6_lock);
d8d1f30b 1854 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1855 if (dev == rt->rt6i_dev &&
045927ff 1856 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1857 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1858 break;
1859 }
1860 if (rt)
d8d1f30b 1861 dst_hold(&rt->dst);
c71099ac 1862 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1863 return rt;
1864}
1865
b71d1d42 1866struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1867 struct net_device *dev,
1868 unsigned int pref)
1da177e4 1869{
86872cb5
TG
1870 struct fib6_config cfg = {
1871 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1872 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1873 .fc_ifindex = dev->ifindex,
1874 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1875 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1876 .fc_nlinfo.pid = 0,
1877 .fc_nlinfo.nlh = NULL,
c346dca1 1878 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1879 };
1da177e4 1880
86872cb5 1881 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1882
86872cb5 1883 ip6_route_add(&cfg);
1da177e4 1884
1da177e4
LT
1885 return rt6_get_dflt_router(gwaddr, dev);
1886}
1887
7b4da532 1888void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1889{
1890 struct rt6_info *rt;
c71099ac
TG
1891 struct fib6_table *table;
1892
1893 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1894 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1895 if (table == NULL)
1896 return;
1da177e4
LT
1897
1898restart:
c71099ac 1899 read_lock_bh(&table->tb6_lock);
d8d1f30b 1900 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1901 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1902 dst_hold(&rt->dst);
c71099ac 1903 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1904 ip6_del_rt(rt);
1da177e4
LT
1905 goto restart;
1906 }
1907 }
c71099ac 1908 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1909}
1910
5578689a
DL
1911static void rtmsg_to_fib6_config(struct net *net,
1912 struct in6_rtmsg *rtmsg,
86872cb5
TG
1913 struct fib6_config *cfg)
1914{
1915 memset(cfg, 0, sizeof(*cfg));
1916
1917 cfg->fc_table = RT6_TABLE_MAIN;
1918 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1919 cfg->fc_metric = rtmsg->rtmsg_metric;
1920 cfg->fc_expires = rtmsg->rtmsg_info;
1921 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1922 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1923 cfg->fc_flags = rtmsg->rtmsg_flags;
1924
5578689a 1925 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1926
86872cb5
TG
1927 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1928 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1929 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1930}
1931
5578689a 1932int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1933{
86872cb5 1934 struct fib6_config cfg;
1da177e4
LT
1935 struct in6_rtmsg rtmsg;
1936 int err;
1937
1938 switch(cmd) {
1939 case SIOCADDRT: /* Add a route */
1940 case SIOCDELRT: /* Delete a route */
1941 if (!capable(CAP_NET_ADMIN))
1942 return -EPERM;
1943 err = copy_from_user(&rtmsg, arg,
1944 sizeof(struct in6_rtmsg));
1945 if (err)
1946 return -EFAULT;
86872cb5 1947
5578689a 1948 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1949
1da177e4
LT
1950 rtnl_lock();
1951 switch (cmd) {
1952 case SIOCADDRT:
86872cb5 1953 err = ip6_route_add(&cfg);
1da177e4
LT
1954 break;
1955 case SIOCDELRT:
86872cb5 1956 err = ip6_route_del(&cfg);
1da177e4
LT
1957 break;
1958 default:
1959 err = -EINVAL;
1960 }
1961 rtnl_unlock();
1962
1963 return err;
3ff50b79 1964 }
1da177e4
LT
1965
1966 return -EINVAL;
1967}
1968
1969/*
1970 * Drop the packet on the floor
1971 */
1972
d5fdd6ba 1973static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1974{
612f09e8 1975 int type;
adf30907 1976 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1977 switch (ipstats_mib_noroutes) {
1978 case IPSTATS_MIB_INNOROUTES:
0660e03f 1979 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1980 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1981 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1982 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1983 break;
1984 }
1985 /* FALLTHROUGH */
1986 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1987 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1988 ipstats_mib_noroutes);
612f09e8
YH
1989 break;
1990 }
3ffe533c 1991 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1992 kfree_skb(skb);
1993 return 0;
1994}
1995
9ce8ade0
TG
1996static int ip6_pkt_discard(struct sk_buff *skb)
1997{
612f09e8 1998 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1999}
2000
20380731 2001static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2002{
adf30907 2003 skb->dev = skb_dst(skb)->dev;
612f09e8 2004 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2005}
2006
6723ab54
DM
2007#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2008
9ce8ade0
TG
2009static int ip6_pkt_prohibit(struct sk_buff *skb)
2010{
612f09e8 2011 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2012}
2013
2014static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2015{
adf30907 2016 skb->dev = skb_dst(skb)->dev;
612f09e8 2017 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2018}
2019
6723ab54
DM
2020#endif
2021
1da177e4
LT
2022/*
2023 * Allocate a dst for local (unicast / anycast) address.
2024 */
2025
2026struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2027 const struct in6_addr *addr,
2028 int anycast)
2029{
c346dca1 2030 struct net *net = dev_net(idev->dev);
5c1e6aa3 2031 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2032 net->loopback_dev, 0);
14deae41 2033 struct neighbour *neigh;
1da177e4 2034
40385653
BG
2035 if (rt == NULL) {
2036 if (net_ratelimit())
2037 pr_warning("IPv6: Maximum number of routes reached,"
2038 " consider increasing route/max_size.\n");
1da177e4 2039 return ERR_PTR(-ENOMEM);
40385653 2040 }
1da177e4 2041
1da177e4
LT
2042 in6_dev_hold(idev);
2043
11d53b49 2044 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2045 rt->dst.input = ip6_input;
2046 rt->dst.output = ip6_output;
1da177e4 2047 rt->rt6i_idev = idev;
d8d1f30b 2048 rt->dst.obsolete = -1;
1da177e4
LT
2049
2050 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2051 if (anycast)
2052 rt->rt6i_flags |= RTF_ANYCAST;
2053 else
1da177e4 2054 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2055 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2056 if (IS_ERR(neigh)) {
d8d1f30b 2057 dst_free(&rt->dst);
14deae41 2058
29546a64 2059 return ERR_CAST(neigh);
1da177e4 2060 }
69cce1d1 2061 dst_set_neighbour(&rt->dst, neigh);
1da177e4
LT
2062
2063 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2064 rt->rt6i_dst.plen = 128;
5578689a 2065 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2066
d8d1f30b 2067 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2068
2069 return rt;
2070}
2071
c3968a85
DW
2072int ip6_route_get_saddr(struct net *net,
2073 struct rt6_info *rt,
b71d1d42 2074 const struct in6_addr *daddr,
c3968a85
DW
2075 unsigned int prefs,
2076 struct in6_addr *saddr)
2077{
2078 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2079 int err = 0;
2080 if (rt->rt6i_prefsrc.plen)
2081 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2082 else
2083 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2084 daddr, prefs, saddr);
2085 return err;
2086}
2087
2088/* remove deleted ip from prefsrc entries */
2089struct arg_dev_net_ip {
2090 struct net_device *dev;
2091 struct net *net;
2092 struct in6_addr *addr;
2093};
2094
2095static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2096{
2097 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2098 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2099 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2100
2101 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2102 rt != net->ipv6.ip6_null_entry &&
2103 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2104 /* remove prefsrc entry */
2105 rt->rt6i_prefsrc.plen = 0;
2106 }
2107 return 0;
2108}
2109
2110void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2111{
2112 struct net *net = dev_net(ifp->idev->dev);
2113 struct arg_dev_net_ip adni = {
2114 .dev = ifp->idev->dev,
2115 .net = net,
2116 .addr = &ifp->addr,
2117 };
2118 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2119}
2120
8ed67789
DL
2121struct arg_dev_net {
2122 struct net_device *dev;
2123 struct net *net;
2124};
2125
1da177e4
LT
2126static int fib6_ifdown(struct rt6_info *rt, void *arg)
2127{
bc3ef660 2128 const struct arg_dev_net *adn = arg;
2129 const struct net_device *dev = adn->dev;
8ed67789 2130
bc3ef660 2131 if ((rt->rt6i_dev == dev || dev == NULL) &&
2132 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2133 RT6_TRACE("deleted by ifdown %p\n", rt);
2134 return -1;
2135 }
2136 return 0;
2137}
2138
f3db4851 2139void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2140{
8ed67789
DL
2141 struct arg_dev_net adn = {
2142 .dev = dev,
2143 .net = net,
2144 };
2145
2146 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2147 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2148}
2149
2150struct rt6_mtu_change_arg
2151{
2152 struct net_device *dev;
2153 unsigned mtu;
2154};
2155
2156static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2157{
2158 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2159 struct inet6_dev *idev;
2160
2161 /* In IPv6 pmtu discovery is not optional,
2162 so that RTAX_MTU lock cannot disable it.
2163 We still use this lock to block changes
2164 caused by addrconf/ndisc.
2165 */
2166
2167 idev = __in6_dev_get(arg->dev);
2168 if (idev == NULL)
2169 return 0;
2170
2171 /* For administrative MTU increase, there is no way to discover
2172 IPv6 PMTU increase, so PMTU increase should be updated here.
2173 Since RFC 1981 doesn't include administrative MTU increase
2174 update PMTU increase is a MUST. (i.e. jumbo frame)
2175 */
2176 /*
2177 If new MTU is less than route PMTU, this new MTU will be the
2178 lowest MTU in the path, update the route PMTU to reflect PMTU
2179 decreases; if new MTU is greater than route PMTU, and the
2180 old MTU is the lowest MTU in the path, update the route PMTU
2181 to reflect the increase. In this case if the other nodes' MTU
2182 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2183 PMTU discouvery.
2184 */
2185 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2186 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2187 (dst_mtu(&rt->dst) >= arg->mtu ||
2188 (dst_mtu(&rt->dst) < arg->mtu &&
2189 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2190 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2191 }
1da177e4
LT
2192 return 0;
2193}
2194
2195void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2196{
c71099ac
TG
2197 struct rt6_mtu_change_arg arg = {
2198 .dev = dev,
2199 .mtu = mtu,
2200 };
1da177e4 2201
c346dca1 2202 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2203}
2204
ef7c79ed 2205static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2206 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2207 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2208 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2209 [RTA_PRIORITY] = { .type = NLA_U32 },
2210 [RTA_METRICS] = { .type = NLA_NESTED },
2211};
2212
2213static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2214 struct fib6_config *cfg)
1da177e4 2215{
86872cb5
TG
2216 struct rtmsg *rtm;
2217 struct nlattr *tb[RTA_MAX+1];
2218 int err;
1da177e4 2219
86872cb5
TG
2220 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2221 if (err < 0)
2222 goto errout;
1da177e4 2223
86872cb5
TG
2224 err = -EINVAL;
2225 rtm = nlmsg_data(nlh);
2226 memset(cfg, 0, sizeof(*cfg));
2227
2228 cfg->fc_table = rtm->rtm_table;
2229 cfg->fc_dst_len = rtm->rtm_dst_len;
2230 cfg->fc_src_len = rtm->rtm_src_len;
2231 cfg->fc_flags = RTF_UP;
2232 cfg->fc_protocol = rtm->rtm_protocol;
2233
2234 if (rtm->rtm_type == RTN_UNREACHABLE)
2235 cfg->fc_flags |= RTF_REJECT;
2236
ab79ad14
2237 if (rtm->rtm_type == RTN_LOCAL)
2238 cfg->fc_flags |= RTF_LOCAL;
2239
86872cb5
TG
2240 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2241 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2242 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2243
2244 if (tb[RTA_GATEWAY]) {
2245 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2246 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2247 }
86872cb5
TG
2248
2249 if (tb[RTA_DST]) {
2250 int plen = (rtm->rtm_dst_len + 7) >> 3;
2251
2252 if (nla_len(tb[RTA_DST]) < plen)
2253 goto errout;
2254
2255 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2256 }
86872cb5
TG
2257
2258 if (tb[RTA_SRC]) {
2259 int plen = (rtm->rtm_src_len + 7) >> 3;
2260
2261 if (nla_len(tb[RTA_SRC]) < plen)
2262 goto errout;
2263
2264 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2265 }
86872cb5 2266
c3968a85
DW
2267 if (tb[RTA_PREFSRC])
2268 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2269
86872cb5
TG
2270 if (tb[RTA_OIF])
2271 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2272
2273 if (tb[RTA_PRIORITY])
2274 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2275
2276 if (tb[RTA_METRICS]) {
2277 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2278 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2279 }
86872cb5
TG
2280
2281 if (tb[RTA_TABLE])
2282 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2283
2284 err = 0;
2285errout:
2286 return err;
1da177e4
LT
2287}
2288
c127ea2c 2289static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2290{
86872cb5
TG
2291 struct fib6_config cfg;
2292 int err;
1da177e4 2293
86872cb5
TG
2294 err = rtm_to_fib6_config(skb, nlh, &cfg);
2295 if (err < 0)
2296 return err;
2297
2298 return ip6_route_del(&cfg);
1da177e4
LT
2299}
2300
c127ea2c 2301static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2302{
86872cb5
TG
2303 struct fib6_config cfg;
2304 int err;
1da177e4 2305
86872cb5
TG
2306 err = rtm_to_fib6_config(skb, nlh, &cfg);
2307 if (err < 0)
2308 return err;
2309
2310 return ip6_route_add(&cfg);
1da177e4
LT
2311}
2312
339bf98f
TG
2313static inline size_t rt6_nlmsg_size(void)
2314{
2315 return NLMSG_ALIGN(sizeof(struct rtmsg))
2316 + nla_total_size(16) /* RTA_SRC */
2317 + nla_total_size(16) /* RTA_DST */
2318 + nla_total_size(16) /* RTA_GATEWAY */
2319 + nla_total_size(16) /* RTA_PREFSRC */
2320 + nla_total_size(4) /* RTA_TABLE */
2321 + nla_total_size(4) /* RTA_IIF */
2322 + nla_total_size(4) /* RTA_OIF */
2323 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2324 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2325 + nla_total_size(sizeof(struct rta_cacheinfo));
2326}
2327
191cd582
BH
2328static int rt6_fill_node(struct net *net,
2329 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2330 struct in6_addr *dst, struct in6_addr *src,
2331 int iif, int type, u32 pid, u32 seq,
7bc570c8 2332 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2333{
2334 struct rtmsg *rtm;
2d7202bf 2335 struct nlmsghdr *nlh;
e3703b3d 2336 long expires;
9e762a4a 2337 u32 table;
f2c31e32 2338 struct neighbour *n;
1da177e4
LT
2339
2340 if (prefix) { /* user wants prefix routes only */
2341 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2342 /* success since this is not a prefix route */
2343 return 1;
2344 }
2345 }
2346
2d7202bf
TG
2347 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2348 if (nlh == NULL)
26932566 2349 return -EMSGSIZE;
2d7202bf
TG
2350
2351 rtm = nlmsg_data(nlh);
1da177e4
LT
2352 rtm->rtm_family = AF_INET6;
2353 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2354 rtm->rtm_src_len = rt->rt6i_src.plen;
2355 rtm->rtm_tos = 0;
c71099ac 2356 if (rt->rt6i_table)
9e762a4a 2357 table = rt->rt6i_table->tb6_id;
c71099ac 2358 else
9e762a4a
PM
2359 table = RT6_TABLE_UNSPEC;
2360 rtm->rtm_table = table;
2d7202bf 2361 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2362 if (rt->rt6i_flags&RTF_REJECT)
2363 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2364 else if (rt->rt6i_flags&RTF_LOCAL)
2365 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2366 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2367 rtm->rtm_type = RTN_LOCAL;
2368 else
2369 rtm->rtm_type = RTN_UNICAST;
2370 rtm->rtm_flags = 0;
2371 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2372 rtm->rtm_protocol = rt->rt6i_protocol;
2373 if (rt->rt6i_flags&RTF_DYNAMIC)
2374 rtm->rtm_protocol = RTPROT_REDIRECT;
2375 else if (rt->rt6i_flags & RTF_ADDRCONF)
2376 rtm->rtm_protocol = RTPROT_KERNEL;
2377 else if (rt->rt6i_flags&RTF_DEFAULT)
2378 rtm->rtm_protocol = RTPROT_RA;
2379
2380 if (rt->rt6i_flags&RTF_CACHE)
2381 rtm->rtm_flags |= RTM_F_CLONED;
2382
2383 if (dst) {
2d7202bf 2384 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2385 rtm->rtm_dst_len = 128;
1da177e4 2386 } else if (rtm->rtm_dst_len)
2d7202bf 2387 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2388#ifdef CONFIG_IPV6_SUBTREES
2389 if (src) {
2d7202bf 2390 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2391 rtm->rtm_src_len = 128;
1da177e4 2392 } else if (rtm->rtm_src_len)
2d7202bf 2393 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2394#endif
7bc570c8
YH
2395 if (iif) {
2396#ifdef CONFIG_IPV6_MROUTE
2397 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2398 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2399 if (err <= 0) {
2400 if (!nowait) {
2401 if (err == 0)
2402 return 0;
2403 goto nla_put_failure;
2404 } else {
2405 if (err == -EMSGSIZE)
2406 goto nla_put_failure;
2407 }
2408 }
2409 } else
2410#endif
2411 NLA_PUT_U32(skb, RTA_IIF, iif);
2412 } else if (dst) {
1da177e4 2413 struct in6_addr saddr_buf;
c3968a85 2414 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2415 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2416 }
2d7202bf 2417
c3968a85
DW
2418 if (rt->rt6i_prefsrc.plen) {
2419 struct in6_addr saddr_buf;
2420 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2421 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2422 }
2423
defb3519 2424 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2425 goto nla_put_failure;
2426
f2c31e32
ED
2427 rcu_read_lock();
2428 n = dst_get_neighbour(&rt->dst);
2429 if (n)
2430 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2431 rcu_read_unlock();
2d7202bf 2432
d8d1f30b 2433 if (rt->dst.dev)
2d7202bf
TG
2434 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2435
2436 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2437
36e3deae
YH
2438 if (!(rt->rt6i_flags & RTF_EXPIRES))
2439 expires = 0;
2440 else if (rt->rt6i_expires - jiffies < INT_MAX)
2441 expires = rt->rt6i_expires - jiffies;
2442 else
2443 expires = INT_MAX;
69cdf8f9 2444
d8d1f30b
CG
2445 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2446 expires, rt->dst.error) < 0)
e3703b3d 2447 goto nla_put_failure;
2d7202bf
TG
2448
2449 return nlmsg_end(skb, nlh);
2450
2451nla_put_failure:
26932566
PM
2452 nlmsg_cancel(skb, nlh);
2453 return -EMSGSIZE;
1da177e4
LT
2454}
2455
1b43af54 2456int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2457{
2458 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2459 int prefix;
2460
2d7202bf
TG
2461 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2462 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2463 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2464 } else
2465 prefix = 0;
2466
191cd582
BH
2467 return rt6_fill_node(arg->net,
2468 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2469 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2470 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2471}
2472
c127ea2c 2473static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2474{
3b1e0a65 2475 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2476 struct nlattr *tb[RTA_MAX+1];
2477 struct rt6_info *rt;
1da177e4 2478 struct sk_buff *skb;
ab364a6f 2479 struct rtmsg *rtm;
4c9483b2 2480 struct flowi6 fl6;
ab364a6f 2481 int err, iif = 0;
1da177e4 2482
ab364a6f
TG
2483 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2484 if (err < 0)
2485 goto errout;
1da177e4 2486
ab364a6f 2487 err = -EINVAL;
4c9483b2 2488 memset(&fl6, 0, sizeof(fl6));
1da177e4 2489
ab364a6f
TG
2490 if (tb[RTA_SRC]) {
2491 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2492 goto errout;
2493
4c9483b2 2494 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2495 }
2496
2497 if (tb[RTA_DST]) {
2498 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2499 goto errout;
2500
4c9483b2 2501 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2502 }
2503
2504 if (tb[RTA_IIF])
2505 iif = nla_get_u32(tb[RTA_IIF]);
2506
2507 if (tb[RTA_OIF])
4c9483b2 2508 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2509
2510 if (iif) {
2511 struct net_device *dev;
5578689a 2512 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2513 if (!dev) {
2514 err = -ENODEV;
ab364a6f 2515 goto errout;
1da177e4
LT
2516 }
2517 }
2518
ab364a6f
TG
2519 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2520 if (skb == NULL) {
2521 err = -ENOBUFS;
2522 goto errout;
2523 }
1da177e4 2524
ab364a6f
TG
2525 /* Reserve room for dummy headers, this skb can pass
2526 through good chunk of routing engine.
2527 */
459a98ed 2528 skb_reset_mac_header(skb);
ab364a6f 2529 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2530
4c9483b2 2531 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2532 skb_dst_set(skb, &rt->dst);
1da177e4 2533
4c9483b2 2534 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2535 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2536 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2537 if (err < 0) {
ab364a6f
TG
2538 kfree_skb(skb);
2539 goto errout;
1da177e4
LT
2540 }
2541
5578689a 2542 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2543errout:
1da177e4 2544 return err;
1da177e4
LT
2545}
2546
86872cb5 2547void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2548{
2549 struct sk_buff *skb;
5578689a 2550 struct net *net = info->nl_net;
528c4ceb
DL
2551 u32 seq;
2552 int err;
2553
2554 err = -ENOBUFS;
2555 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2556
339bf98f 2557 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2558 if (skb == NULL)
2559 goto errout;
2560
191cd582 2561 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2562 event, info->pid, seq, 0, 0, 0);
26932566
PM
2563 if (err < 0) {
2564 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2565 WARN_ON(err == -EMSGSIZE);
2566 kfree_skb(skb);
2567 goto errout;
2568 }
1ce85fe4
PNA
2569 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2570 info->nlh, gfp_any());
2571 return;
21713ebc
TG
2572errout:
2573 if (err < 0)
5578689a 2574 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2575}
2576
8ed67789
DL
2577static int ip6_route_dev_notify(struct notifier_block *this,
2578 unsigned long event, void *data)
2579{
2580 struct net_device *dev = (struct net_device *)data;
c346dca1 2581 struct net *net = dev_net(dev);
8ed67789
DL
2582
2583 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2584 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2585 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2586#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2587 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2588 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2589 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2590 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2591#endif
2592 }
2593
2594 return NOTIFY_OK;
2595}
2596
1da177e4
LT
2597/*
2598 * /proc
2599 */
2600
2601#ifdef CONFIG_PROC_FS
2602
1da177e4
LT
2603struct rt6_proc_arg
2604{
2605 char *buffer;
2606 int offset;
2607 int length;
2608 int skip;
2609 int len;
2610};
2611
2612static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2613{
33120b30 2614 struct seq_file *m = p_arg;
69cce1d1 2615 struct neighbour *n;
1da177e4 2616
4b7a4274 2617 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2618
2619#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2620 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2621#else
33120b30 2622 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2623#endif
f2c31e32 2624 rcu_read_lock();
69cce1d1
DM
2625 n = dst_get_neighbour(&rt->dst);
2626 if (n) {
2627 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2628 } else {
33120b30 2629 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2630 }
f2c31e32 2631 rcu_read_unlock();
33120b30 2632 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2633 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2634 rt->dst.__use, rt->rt6i_flags,
33120b30 2635 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2636 return 0;
2637}
2638
33120b30 2639static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2640{
f3db4851
DL
2641 struct net *net = (struct net *)m->private;
2642 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2643 return 0;
2644}
1da177e4 2645
33120b30
AD
2646static int ipv6_route_open(struct inode *inode, struct file *file)
2647{
de05c557 2648 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2649}
2650
33120b30
AD
2651static const struct file_operations ipv6_route_proc_fops = {
2652 .owner = THIS_MODULE,
2653 .open = ipv6_route_open,
2654 .read = seq_read,
2655 .llseek = seq_lseek,
b6fcbdb4 2656 .release = single_release_net,
33120b30
AD
2657};
2658
1da177e4
LT
2659static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2660{
69ddb805 2661 struct net *net = (struct net *)seq->private;
1da177e4 2662 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2663 net->ipv6.rt6_stats->fib_nodes,
2664 net->ipv6.rt6_stats->fib_route_nodes,
2665 net->ipv6.rt6_stats->fib_rt_alloc,
2666 net->ipv6.rt6_stats->fib_rt_entries,
2667 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2668 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2669 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2670
2671 return 0;
2672}
2673
2674static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2675{
de05c557 2676 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2677}
2678
9a32144e 2679static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2680 .owner = THIS_MODULE,
2681 .open = rt6_stats_seq_open,
2682 .read = seq_read,
2683 .llseek = seq_lseek,
b6fcbdb4 2684 .release = single_release_net,
1da177e4
LT
2685};
2686#endif /* CONFIG_PROC_FS */
2687
2688#ifdef CONFIG_SYSCTL
2689
1da177e4 2690static
8d65af78 2691int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2692 void __user *buffer, size_t *lenp, loff_t *ppos)
2693{
c486da34
LAG
2694 struct net *net;
2695 int delay;
2696 if (!write)
1da177e4 2697 return -EINVAL;
c486da34
LAG
2698
2699 net = (struct net *)ctl->extra1;
2700 delay = net->ipv6.sysctl.flush_delay;
2701 proc_dointvec(ctl, write, buffer, lenp, ppos);
2702 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2703 return 0;
1da177e4
LT
2704}
2705
760f2d01 2706ctl_table ipv6_route_table_template[] = {
1ab1457c 2707 {
1da177e4 2708 .procname = "flush",
4990509f 2709 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2710 .maxlen = sizeof(int),
89c8b3a1 2711 .mode = 0200,
6d9f239a 2712 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2713 },
2714 {
1da177e4 2715 .procname = "gc_thresh",
9a7ec3a9 2716 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2717 .maxlen = sizeof(int),
2718 .mode = 0644,
6d9f239a 2719 .proc_handler = proc_dointvec,
1da177e4
LT
2720 },
2721 {
1da177e4 2722 .procname = "max_size",
4990509f 2723 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2724 .maxlen = sizeof(int),
2725 .mode = 0644,
6d9f239a 2726 .proc_handler = proc_dointvec,
1da177e4
LT
2727 },
2728 {
1da177e4 2729 .procname = "gc_min_interval",
4990509f 2730 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2731 .maxlen = sizeof(int),
2732 .mode = 0644,
6d9f239a 2733 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2734 },
2735 {
1da177e4 2736 .procname = "gc_timeout",
4990509f 2737 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2738 .maxlen = sizeof(int),
2739 .mode = 0644,
6d9f239a 2740 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2741 },
2742 {
1da177e4 2743 .procname = "gc_interval",
4990509f 2744 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2745 .maxlen = sizeof(int),
2746 .mode = 0644,
6d9f239a 2747 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2748 },
2749 {
1da177e4 2750 .procname = "gc_elasticity",
4990509f 2751 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2752 .maxlen = sizeof(int),
2753 .mode = 0644,
f3d3f616 2754 .proc_handler = proc_dointvec,
1da177e4
LT
2755 },
2756 {
1da177e4 2757 .procname = "mtu_expires",
4990509f 2758 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
6d9f239a 2761 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2762 },
2763 {
1da177e4 2764 .procname = "min_adv_mss",
4990509f 2765 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
f3d3f616 2768 .proc_handler = proc_dointvec,
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "gc_min_interval_ms",
4990509f 2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2776 },
f8572d8f 2777 { }
1da177e4
LT
2778};
2779
2c8c1e72 2780struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2781{
2782 struct ctl_table *table;
2783
2784 table = kmemdup(ipv6_route_table_template,
2785 sizeof(ipv6_route_table_template),
2786 GFP_KERNEL);
5ee09105
YH
2787
2788 if (table) {
2789 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2790 table[0].extra1 = net;
86393e52 2791 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2792 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2793 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2794 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2795 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2796 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2797 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2798 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2799 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2800 }
2801
760f2d01
DL
2802 return table;
2803}
1da177e4
LT
2804#endif
2805
2c8c1e72 2806static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2807{
633d424b 2808 int ret = -ENOMEM;
8ed67789 2809
86393e52
AD
2810 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2811 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2812
fc66f95c
ED
2813 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2814 goto out_ip6_dst_ops;
2815
8ed67789
DL
2816 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2817 sizeof(*net->ipv6.ip6_null_entry),
2818 GFP_KERNEL);
2819 if (!net->ipv6.ip6_null_entry)
fc66f95c 2820 goto out_ip6_dst_entries;
d8d1f30b 2821 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2822 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2823 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2824 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2825 ip6_template_metrics, true);
8ed67789
DL
2826
2827#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2828 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2829 sizeof(*net->ipv6.ip6_prohibit_entry),
2830 GFP_KERNEL);
68fffc67
PZ
2831 if (!net->ipv6.ip6_prohibit_entry)
2832 goto out_ip6_null_entry;
d8d1f30b 2833 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2834 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2835 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2836 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2837 ip6_template_metrics, true);
8ed67789
DL
2838
2839 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2840 sizeof(*net->ipv6.ip6_blk_hole_entry),
2841 GFP_KERNEL);
68fffc67
PZ
2842 if (!net->ipv6.ip6_blk_hole_entry)
2843 goto out_ip6_prohibit_entry;
d8d1f30b 2844 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2845 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2846 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2847 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2848 ip6_template_metrics, true);
8ed67789
DL
2849#endif
2850
b339a47c
PZ
2851 net->ipv6.sysctl.flush_delay = 0;
2852 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2853 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2854 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2855 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2856 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2857 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2858 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2859
cdb18761
DL
2860#ifdef CONFIG_PROC_FS
2861 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2862 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2863#endif
6891a346
BT
2864 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2865
8ed67789
DL
2866 ret = 0;
2867out:
2868 return ret;
f2fc6a54 2869
68fffc67
PZ
2870#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2871out_ip6_prohibit_entry:
2872 kfree(net->ipv6.ip6_prohibit_entry);
2873out_ip6_null_entry:
2874 kfree(net->ipv6.ip6_null_entry);
2875#endif
fc66f95c
ED
2876out_ip6_dst_entries:
2877 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2878out_ip6_dst_ops:
f2fc6a54 2879 goto out;
cdb18761
DL
2880}
2881
2c8c1e72 2882static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2883{
2884#ifdef CONFIG_PROC_FS
2885 proc_net_remove(net, "ipv6_route");
2886 proc_net_remove(net, "rt6_stats");
2887#endif
8ed67789
DL
2888 kfree(net->ipv6.ip6_null_entry);
2889#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2890 kfree(net->ipv6.ip6_prohibit_entry);
2891 kfree(net->ipv6.ip6_blk_hole_entry);
2892#endif
41bb78b4 2893 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2894}
2895
2896static struct pernet_operations ip6_route_net_ops = {
2897 .init = ip6_route_net_init,
2898 .exit = ip6_route_net_exit,
2899};
2900
8ed67789
DL
2901static struct notifier_block ip6_route_dev_notifier = {
2902 .notifier_call = ip6_route_dev_notify,
2903 .priority = 0,
2904};
2905
433d49c3 2906int __init ip6_route_init(void)
1da177e4 2907{
433d49c3
DL
2908 int ret;
2909
9a7ec3a9
DL
2910 ret = -ENOMEM;
2911 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2912 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2913 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2914 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2915 goto out;
14e50e57 2916
fc66f95c 2917 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2918 if (ret)
bdb3289f 2919 goto out_kmem_cache;
bdb3289f 2920
fc66f95c
ED
2921 ret = register_pernet_subsys(&ip6_route_net_ops);
2922 if (ret)
2923 goto out_dst_entries;
2924
5dc121e9
AE
2925 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2926
8ed67789
DL
2927 /* Registering of the loopback is done before this portion of code,
2928 * the loopback reference in rt6_info will not be taken, do it
2929 * manually for init_net */
d8d1f30b 2930 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2931 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2932 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2933 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2934 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2935 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2936 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2937 #endif
433d49c3
DL
2938 ret = fib6_init();
2939 if (ret)
8ed67789 2940 goto out_register_subsys;
433d49c3 2941
433d49c3
DL
2942 ret = xfrm6_init();
2943 if (ret)
cdb18761 2944 goto out_fib6_init;
c35b7e72 2945
433d49c3
DL
2946 ret = fib6_rules_init();
2947 if (ret)
2948 goto xfrm6_init;
7e5449c2 2949
433d49c3 2950 ret = -ENOBUFS;
c7ac8679
GR
2951 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2952 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2953 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2954 goto fib6_rules_init;
c127ea2c 2955
8ed67789 2956 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2957 if (ret)
2958 goto fib6_rules_init;
8ed67789 2959
433d49c3
DL
2960out:
2961 return ret;
2962
2963fib6_rules_init:
433d49c3
DL
2964 fib6_rules_cleanup();
2965xfrm6_init:
433d49c3 2966 xfrm6_fini();
433d49c3 2967out_fib6_init:
433d49c3 2968 fib6_gc_cleanup();
8ed67789
DL
2969out_register_subsys:
2970 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2971out_dst_entries:
2972 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2973out_kmem_cache:
f2fc6a54 2974 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2975 goto out;
1da177e4
LT
2976}
2977
2978void ip6_route_cleanup(void)
2979{
8ed67789 2980 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2981 fib6_rules_cleanup();
1da177e4 2982 xfrm6_fini();
1da177e4 2983 fib6_gc_cleanup();
8ed67789 2984 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2985 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 2986 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2987}