]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - net/ipv6/route.c
net: Add export.h for EXPORT_SYMBOL/THIS_MODULE to non-modules
[mirror_ubuntu-eoan-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
d33e4553 80static unsigned int ip6_default_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
d33e4553 147 .default_mtu = ip6_default_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ec831ea7
RD
158static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
159{
160 return 0;
161}
162
14e50e57
DM
163static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
164{
165}
166
0972ddb2
HB
167static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
168 unsigned long old)
169{
170 return NULL;
171}
172
14e50e57
DM
173static struct dst_ops ip6_dst_blackhole_ops = {
174 .family = AF_INET6,
09640e63 175 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
176 .destroy = ip6_dst_destroy,
177 .check = ip6_dst_check,
ec831ea7 178 .default_mtu = ip6_blackhole_default_mtu,
214f45c9 179 .default_advmss = ip6_default_advmss,
14e50e57 180 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 181 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 182 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
183};
184
62fa8a84
DM
185static const u32 ip6_template_metrics[RTAX_MAX] = {
186 [RTAX_HOPLIMIT - 1] = 255,
187};
188
bdb3289f 189static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
190 .dst = {
191 .__refcnt = ATOMIC_INIT(1),
192 .__use = 1,
193 .obsolete = -1,
194 .error = -ENETUNREACH,
d8d1f30b
CG
195 .input = ip6_pkt_discard,
196 .output = ip6_pkt_discard_out,
1da177e4
LT
197 },
198 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 199 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
200 .rt6i_metric = ~(u32) 0,
201 .rt6i_ref = ATOMIC_INIT(1),
202};
203
101367c2
TG
204#ifdef CONFIG_IPV6_MULTIPLE_TABLES
205
6723ab54
DM
206static int ip6_pkt_prohibit(struct sk_buff *skb);
207static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 208
280a34c8 209static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
210 .dst = {
211 .__refcnt = ATOMIC_INIT(1),
212 .__use = 1,
213 .obsolete = -1,
214 .error = -EACCES,
d8d1f30b
CG
215 .input = ip6_pkt_prohibit,
216 .output = ip6_pkt_prohibit_out,
101367c2
TG
217 },
218 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 219 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
220 .rt6i_metric = ~(u32) 0,
221 .rt6i_ref = ATOMIC_INIT(1),
222};
223
bdb3289f 224static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
225 .dst = {
226 .__refcnt = ATOMIC_INIT(1),
227 .__use = 1,
228 .obsolete = -1,
229 .error = -EINVAL,
d8d1f30b
CG
230 .input = dst_discard,
231 .output = dst_discard,
101367c2
TG
232 },
233 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 234 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
235 .rt6i_metric = ~(u32) 0,
236 .rt6i_ref = ATOMIC_INIT(1),
237};
238
239#endif
240
1da177e4 241/* allocate dst with ip6_dst_ops */
5c1e6aa3 242static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
243 struct net_device *dev,
244 int flags)
1da177e4 245{
957c665f 246 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 247
fbe58186
MB
248 if (rt != NULL)
249 memset(&rt->rt6i_table, 0,
250 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
251
252 return rt;
1da177e4
LT
253}
254
255static void ip6_dst_destroy(struct dst_entry *dst)
256{
257 struct rt6_info *rt = (struct rt6_info *)dst;
258 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 259 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 260
8e2ec639
YZ
261 if (!(rt->dst.flags & DST_HOST))
262 dst_destroy_metrics_generic(dst);
263
1da177e4
LT
264 if (idev != NULL) {
265 rt->rt6i_idev = NULL;
266 in6_dev_put(idev);
1ab1457c 267 }
b3419363 268 if (peer) {
b3419363
DM
269 rt->rt6i_peer = NULL;
270 inet_putpeer(peer);
271 }
272}
273
6431cbc2
DM
274static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
275
276static u32 rt6_peer_genid(void)
277{
278 return atomic_read(&__rt6_peer_genid);
279}
280
b3419363
DM
281void rt6_bind_peer(struct rt6_info *rt, int create)
282{
283 struct inet_peer *peer;
284
b3419363
DM
285 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
286 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
287 inet_putpeer(peer);
6431cbc2
DM
288 else
289 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
290}
291
292static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
293 int how)
294{
295 struct rt6_info *rt = (struct rt6_info *)dst;
296 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 297 struct net_device *loopback_dev =
c346dca1 298 dev_net(dev)->loopback_dev;
1da177e4 299
5a3e55d6
DL
300 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
301 struct inet6_dev *loopback_idev =
302 in6_dev_get(loopback_dev);
1da177e4
LT
303 if (loopback_idev != NULL) {
304 rt->rt6i_idev = loopback_idev;
305 in6_dev_put(idev);
306 }
307 }
308}
309
310static __inline__ int rt6_check_expired(const struct rt6_info *rt)
311{
a02cec21
ED
312 return (rt->rt6i_flags & RTF_EXPIRES) &&
313 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
314}
315
b71d1d42 316static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 317{
a02cec21
ED
318 return ipv6_addr_type(daddr) &
319 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
320}
321
1da177e4 322/*
c71099ac 323 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
324 */
325
8ed67789
DL
326static inline struct rt6_info *rt6_device_match(struct net *net,
327 struct rt6_info *rt,
b71d1d42 328 const struct in6_addr *saddr,
1da177e4 329 int oif,
d420895e 330 int flags)
1da177e4
LT
331{
332 struct rt6_info *local = NULL;
333 struct rt6_info *sprt;
334
dd3abc4e
YH
335 if (!oif && ipv6_addr_any(saddr))
336 goto out;
337
d8d1f30b 338 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
339 struct net_device *dev = sprt->rt6i_dev;
340
341 if (oif) {
1da177e4
LT
342 if (dev->ifindex == oif)
343 return sprt;
344 if (dev->flags & IFF_LOOPBACK) {
345 if (sprt->rt6i_idev == NULL ||
346 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 347 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 348 continue;
1ab1457c 349 if (local && (!oif ||
1da177e4
LT
350 local->rt6i_idev->dev->ifindex == oif))
351 continue;
352 }
353 local = sprt;
354 }
dd3abc4e
YH
355 } else {
356 if (ipv6_chk_addr(net, saddr, dev,
357 flags & RT6_LOOKUP_F_IFACE))
358 return sprt;
1da177e4 359 }
dd3abc4e 360 }
1da177e4 361
dd3abc4e 362 if (oif) {
1da177e4
LT
363 if (local)
364 return local;
365
d420895e 366 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 367 return net->ipv6.ip6_null_entry;
1da177e4 368 }
dd3abc4e 369out:
1da177e4
LT
370 return rt;
371}
372
27097255
YH
373#ifdef CONFIG_IPV6_ROUTER_PREF
374static void rt6_probe(struct rt6_info *rt)
375{
f2c31e32 376 struct neighbour *neigh;
27097255
YH
377 /*
378 * Okay, this does not seem to be appropriate
379 * for now, however, we need to check if it
380 * is really so; aka Router Reachability Probing.
381 *
382 * Router Reachability Probe MUST be rate-limited
383 * to no more than one per minute.
384 */
f2c31e32
ED
385 rcu_read_lock();
386 neigh = rt ? dst_get_neighbour(&rt->dst) : NULL;
27097255 387 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 388 goto out;
27097255
YH
389 read_lock_bh(&neigh->lock);
390 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 391 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
392 struct in6_addr mcaddr;
393 struct in6_addr *target;
394
395 neigh->updated = jiffies;
396 read_unlock_bh(&neigh->lock);
397
398 target = (struct in6_addr *)&neigh->primary_key;
399 addrconf_addr_solict_mult(target, &mcaddr);
400 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 401 } else {
27097255 402 read_unlock_bh(&neigh->lock);
f2c31e32
ED
403 }
404out:
405 rcu_read_unlock();
27097255
YH
406}
407#else
408static inline void rt6_probe(struct rt6_info *rt)
409{
27097255
YH
410}
411#endif
412
1da177e4 413/*
554cfb7e 414 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 415 */
b6f99a21 416static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
417{
418 struct net_device *dev = rt->rt6i_dev;
161980f4 419 if (!oif || dev->ifindex == oif)
554cfb7e 420 return 2;
161980f4
DM
421 if ((dev->flags & IFF_LOOPBACK) &&
422 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
423 return 1;
424 return 0;
554cfb7e 425}
1da177e4 426
b6f99a21 427static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 428{
f2c31e32 429 struct neighbour *neigh;
398bcbeb 430 int m;
f2c31e32
ED
431
432 rcu_read_lock();
433 neigh = dst_get_neighbour(&rt->dst);
4d0c5911
YH
434 if (rt->rt6i_flags & RTF_NONEXTHOP ||
435 !(rt->rt6i_flags & RTF_GATEWAY))
436 m = 1;
437 else if (neigh) {
554cfb7e
YH
438 read_lock_bh(&neigh->lock);
439 if (neigh->nud_state & NUD_VALID)
4d0c5911 440 m = 2;
398bcbeb
YH
441#ifdef CONFIG_IPV6_ROUTER_PREF
442 else if (neigh->nud_state & NUD_FAILED)
443 m = 0;
444#endif
445 else
ea73ee23 446 m = 1;
554cfb7e 447 read_unlock_bh(&neigh->lock);
398bcbeb
YH
448 } else
449 m = 0;
f2c31e32 450 rcu_read_unlock();
554cfb7e 451 return m;
1da177e4
LT
452}
453
554cfb7e
YH
454static int rt6_score_route(struct rt6_info *rt, int oif,
455 int strict)
1da177e4 456{
4d0c5911 457 int m, n;
1ab1457c 458
4d0c5911 459 m = rt6_check_dev(rt, oif);
77d16f45 460 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 461 return -1;
ebacaaa0
YH
462#ifdef CONFIG_IPV6_ROUTER_PREF
463 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
464#endif
4d0c5911 465 n = rt6_check_neigh(rt);
557e92ef 466 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
467 return -1;
468 return m;
469}
470
f11e6659
DM
471static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
472 int *mpri, struct rt6_info *match)
554cfb7e 473{
f11e6659
DM
474 int m;
475
476 if (rt6_check_expired(rt))
477 goto out;
478
479 m = rt6_score_route(rt, oif, strict);
480 if (m < 0)
481 goto out;
482
483 if (m > *mpri) {
484 if (strict & RT6_LOOKUP_F_REACHABLE)
485 rt6_probe(match);
486 *mpri = m;
487 match = rt;
488 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
489 rt6_probe(rt);
490 }
491
492out:
493 return match;
494}
495
496static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
497 struct rt6_info *rr_head,
498 u32 metric, int oif, int strict)
499{
500 struct rt6_info *rt, *match;
554cfb7e 501 int mpri = -1;
1da177e4 502
f11e6659
DM
503 match = NULL;
504 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 505 rt = rt->dst.rt6_next)
f11e6659
DM
506 match = find_match(rt, oif, strict, &mpri, match);
507 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 508 rt = rt->dst.rt6_next)
f11e6659 509 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 510
f11e6659
DM
511 return match;
512}
1da177e4 513
f11e6659
DM
514static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
515{
516 struct rt6_info *match, *rt0;
8ed67789 517 struct net *net;
1da177e4 518
f11e6659 519 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 520 __func__, fn->leaf, oif);
554cfb7e 521
f11e6659
DM
522 rt0 = fn->rr_ptr;
523 if (!rt0)
524 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 525
f11e6659 526 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 527
554cfb7e 528 if (!match &&
f11e6659 529 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 530 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 531
554cfb7e 532 /* no entries matched; do round-robin */
f11e6659
DM
533 if (!next || next->rt6i_metric != rt0->rt6i_metric)
534 next = fn->leaf;
535
536 if (next != rt0)
537 fn->rr_ptr = next;
1da177e4 538 }
1da177e4 539
f11e6659 540 RT6_TRACE("%s() => %p\n",
0dc47877 541 __func__, match);
1da177e4 542
c346dca1 543 net = dev_net(rt0->rt6i_dev);
a02cec21 544 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
545}
546
70ceb4f5
YH
547#ifdef CONFIG_IPV6_ROUTE_INFO
548int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 549 const struct in6_addr *gwaddr)
70ceb4f5 550{
c346dca1 551 struct net *net = dev_net(dev);
70ceb4f5
YH
552 struct route_info *rinfo = (struct route_info *) opt;
553 struct in6_addr prefix_buf, *prefix;
554 unsigned int pref;
4bed72e4 555 unsigned long lifetime;
70ceb4f5
YH
556 struct rt6_info *rt;
557
558 if (len < sizeof(struct route_info)) {
559 return -EINVAL;
560 }
561
562 /* Sanity check for prefix_len and length */
563 if (rinfo->length > 3) {
564 return -EINVAL;
565 } else if (rinfo->prefix_len > 128) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 64) {
568 if (rinfo->length < 2) {
569 return -EINVAL;
570 }
571 } else if (rinfo->prefix_len > 0) {
572 if (rinfo->length < 1) {
573 return -EINVAL;
574 }
575 }
576
577 pref = rinfo->route_pref;
578 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 579 return -EINVAL;
70ceb4f5 580
4bed72e4 581 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
582
583 if (rinfo->length == 3)
584 prefix = (struct in6_addr *)rinfo->prefix;
585 else {
586 /* this function is safe */
587 ipv6_addr_prefix(&prefix_buf,
588 (struct in6_addr *)rinfo->prefix,
589 rinfo->prefix_len);
590 prefix = &prefix_buf;
591 }
592
efa2cea0
DL
593 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
594 dev->ifindex);
70ceb4f5
YH
595
596 if (rt && !lifetime) {
e0a1ad73 597 ip6_del_rt(rt);
70ceb4f5
YH
598 rt = NULL;
599 }
600
601 if (!rt && lifetime)
efa2cea0 602 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
603 pref);
604 else if (rt)
605 rt->rt6i_flags = RTF_ROUTEINFO |
606 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
607
608 if (rt) {
4bed72e4 609 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
610 rt->rt6i_flags &= ~RTF_EXPIRES;
611 } else {
612 rt->rt6i_expires = jiffies + HZ * lifetime;
613 rt->rt6i_flags |= RTF_EXPIRES;
614 }
d8d1f30b 615 dst_release(&rt->dst);
70ceb4f5
YH
616 }
617 return 0;
618}
619#endif
620
8ed67789 621#define BACKTRACK(__net, saddr) \
982f56f3 622do { \
8ed67789 623 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 624 struct fib6_node *pn; \
e0eda7bb 625 while (1) { \
982f56f3
YH
626 if (fn->fn_flags & RTN_TL_ROOT) \
627 goto out; \
628 pn = fn->parent; \
629 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 630 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
631 else \
632 fn = pn; \
633 if (fn->fn_flags & RTN_RTINFO) \
634 goto restart; \
c71099ac 635 } \
c71099ac 636 } \
982f56f3 637} while(0)
c71099ac 638
8ed67789
DL
639static struct rt6_info *ip6_pol_route_lookup(struct net *net,
640 struct fib6_table *table,
4c9483b2 641 struct flowi6 *fl6, int flags)
1da177e4
LT
642{
643 struct fib6_node *fn;
644 struct rt6_info *rt;
645
c71099ac 646 read_lock_bh(&table->tb6_lock);
4c9483b2 647 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
648restart:
649 rt = fn->leaf;
4c9483b2
DM
650 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
651 BACKTRACK(net, &fl6->saddr);
c71099ac 652out:
d8d1f30b 653 dst_use(&rt->dst, jiffies);
c71099ac 654 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
655 return rt;
656
657}
658
9acd9f3a
YH
659struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
660 const struct in6_addr *saddr, int oif, int strict)
c71099ac 661{
4c9483b2
DM
662 struct flowi6 fl6 = {
663 .flowi6_oif = oif,
664 .daddr = *daddr,
c71099ac
TG
665 };
666 struct dst_entry *dst;
77d16f45 667 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 668
adaa70bb 669 if (saddr) {
4c9483b2 670 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
671 flags |= RT6_LOOKUP_F_HAS_SADDR;
672 }
673
4c9483b2 674 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
675 if (dst->error == 0)
676 return (struct rt6_info *) dst;
677
678 dst_release(dst);
679
1da177e4
LT
680 return NULL;
681}
682
7159039a
YH
683EXPORT_SYMBOL(rt6_lookup);
684
c71099ac 685/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
686 It takes new route entry, the addition fails by any reason the
687 route is freed. In any case, if caller does not hold it, it may
688 be destroyed.
689 */
690
86872cb5 691static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
692{
693 int err;
c71099ac 694 struct fib6_table *table;
1da177e4 695
c71099ac
TG
696 table = rt->rt6i_table;
697 write_lock_bh(&table->tb6_lock);
86872cb5 698 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 699 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
700
701 return err;
702}
703
40e22e8f
TG
704int ip6_ins_rt(struct rt6_info *rt)
705{
4d1169c1 706 struct nl_info info = {
c346dca1 707 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 708 };
528c4ceb 709 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
710}
711
21efcfa0
ED
712static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
713 const struct in6_addr *daddr,
b71d1d42 714 const struct in6_addr *saddr)
1da177e4 715{
1da177e4
LT
716 struct rt6_info *rt;
717
718 /*
719 * Clone the route.
720 */
721
21efcfa0 722 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
723
724 if (rt) {
14deae41
DM
725 struct neighbour *neigh;
726 int attempts = !in_softirq();
727
58c4fb86
YH
728 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
729 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 730 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 731 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 732 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 733 }
1da177e4 734
1da177e4 735 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
736
737#ifdef CONFIG_IPV6_SUBTREES
738 if (rt->rt6i_src.plen && saddr) {
739 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
740 rt->rt6i_src.plen = 128;
741 }
742#endif
743
14deae41
DM
744 retry:
745 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
746 if (IS_ERR(neigh)) {
747 struct net *net = dev_net(rt->rt6i_dev);
748 int saved_rt_min_interval =
749 net->ipv6.sysctl.ip6_rt_gc_min_interval;
750 int saved_rt_elasticity =
751 net->ipv6.sysctl.ip6_rt_gc_elasticity;
752
753 if (attempts-- > 0) {
754 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
755 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
756
86393e52 757 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
758
759 net->ipv6.sysctl.ip6_rt_gc_elasticity =
760 saved_rt_elasticity;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval =
762 saved_rt_min_interval;
763 goto retry;
764 }
765
766 if (net_ratelimit())
767 printk(KERN_WARNING
7e1b33e5 768 "ipv6: Neighbour table overflow.\n");
d8d1f30b 769 dst_free(&rt->dst);
14deae41
DM
770 return NULL;
771 }
69cce1d1 772 dst_set_neighbour(&rt->dst, neigh);
1da177e4 773
95a9a5ba 774 }
1da177e4 775
95a9a5ba
YH
776 return rt;
777}
1da177e4 778
21efcfa0
ED
779static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
780 const struct in6_addr *daddr)
299d9939 781{
21efcfa0
ED
782 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
783
299d9939 784 if (rt) {
299d9939 785 rt->rt6i_flags |= RTF_CACHE;
f2c31e32 786 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_raw(&ort->dst)));
299d9939
YH
787 }
788 return rt;
789}
790
8ed67789 791static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 792 struct flowi6 *fl6, int flags)
1da177e4
LT
793{
794 struct fib6_node *fn;
519fbd87 795 struct rt6_info *rt, *nrt;
c71099ac 796 int strict = 0;
1da177e4 797 int attempts = 3;
519fbd87 798 int err;
53b7997f 799 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 800
77d16f45 801 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
802
803relookup:
c71099ac 804 read_lock_bh(&table->tb6_lock);
1da177e4 805
8238dd06 806restart_2:
4c9483b2 807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
808
809restart:
4acad72d 810 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 811
4c9483b2 812 BACKTRACK(net, &fl6->saddr);
8ed67789 813 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 814 rt->rt6i_flags & RTF_CACHE)
1ddef044 815 goto out;
1da177e4 816
d8d1f30b 817 dst_hold(&rt->dst);
c71099ac 818 read_unlock_bh(&table->tb6_lock);
fb9de91e 819
f2c31e32 820 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 821 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 822 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 823 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
824 else
825 goto out2;
e40cf353 826
d8d1f30b 827 dst_release(&rt->dst);
8ed67789 828 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 829
d8d1f30b 830 dst_hold(&rt->dst);
519fbd87 831 if (nrt) {
40e22e8f 832 err = ip6_ins_rt(nrt);
519fbd87 833 if (!err)
1da177e4 834 goto out2;
1da177e4 835 }
1da177e4 836
519fbd87
YH
837 if (--attempts <= 0)
838 goto out2;
839
840 /*
c71099ac 841 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
842 * released someone could insert this route. Relookup.
843 */
d8d1f30b 844 dst_release(&rt->dst);
519fbd87
YH
845 goto relookup;
846
847out:
8238dd06
YH
848 if (reachable) {
849 reachable = 0;
850 goto restart_2;
851 }
d8d1f30b 852 dst_hold(&rt->dst);
c71099ac 853 read_unlock_bh(&table->tb6_lock);
1da177e4 854out2:
d8d1f30b
CG
855 rt->dst.lastuse = jiffies;
856 rt->dst.__use++;
c71099ac
TG
857
858 return rt;
1da177e4
LT
859}
860
8ed67789 861static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 862 struct flowi6 *fl6, int flags)
4acad72d 863{
4c9483b2 864 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
865}
866
c71099ac
TG
867void ip6_route_input(struct sk_buff *skb)
868{
b71d1d42 869 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 870 struct net *net = dev_net(skb->dev);
adaa70bb 871 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
872 struct flowi6 fl6 = {
873 .flowi6_iif = skb->dev->ifindex,
874 .daddr = iph->daddr,
875 .saddr = iph->saddr,
876 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
877 .flowi6_mark = skb->mark,
878 .flowi6_proto = iph->nexthdr,
c71099ac 879 };
adaa70bb 880
1d6e55f1 881 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 882 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 883
4c9483b2 884 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
885}
886
8ed67789 887static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 888 struct flowi6 *fl6, int flags)
1da177e4 889{
4c9483b2 890 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
891}
892
9c7a4f9c 893struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 894 struct flowi6 *fl6)
c71099ac
TG
895{
896 int flags = 0;
897
4c9483b2 898 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 899 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 900
4c9483b2 901 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 902 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
903 else if (sk)
904 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 905
4c9483b2 906 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
907}
908
7159039a 909EXPORT_SYMBOL(ip6_route_output);
1da177e4 910
2774c131 911struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 912{
5c1e6aa3 913 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
914 struct dst_entry *new = NULL;
915
5c1e6aa3 916 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 917 if (rt) {
cf911662
DM
918 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
919
d8d1f30b 920 new = &rt->dst;
14e50e57 921
14e50e57 922 new->__use = 1;
352e512c
HX
923 new->input = dst_discard;
924 new->output = dst_discard;
14e50e57 925
21efcfa0
ED
926 if (dst_metrics_read_only(&ort->dst))
927 new->_metrics = ort->dst._metrics;
928 else
929 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
930 rt->rt6i_idev = ort->rt6i_idev;
931 if (rt->rt6i_idev)
932 in6_dev_hold(rt->rt6i_idev);
933 rt->rt6i_expires = 0;
934
935 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
936 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
937 rt->rt6i_metric = 0;
938
939 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
940#ifdef CONFIG_IPV6_SUBTREES
941 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
942#endif
943
944 dst_free(new);
945 }
946
69ead7af
DM
947 dst_release(dst_orig);
948 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 949}
14e50e57 950
1da177e4
LT
951/*
952 * Destination cache support functions
953 */
954
955static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
956{
957 struct rt6_info *rt;
958
959 rt = (struct rt6_info *) dst;
960
6431cbc2
DM
961 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
962 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
963 if (!rt->rt6i_peer)
964 rt6_bind_peer(rt, 0);
965 rt->rt6i_peer_genid = rt6_peer_genid();
966 }
1da177e4 967 return dst;
6431cbc2 968 }
1da177e4
LT
969 return NULL;
970}
971
972static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
973{
974 struct rt6_info *rt = (struct rt6_info *) dst;
975
976 if (rt) {
54c1a859
YH
977 if (rt->rt6i_flags & RTF_CACHE) {
978 if (rt6_check_expired(rt)) {
979 ip6_del_rt(rt);
980 dst = NULL;
981 }
982 } else {
1da177e4 983 dst_release(dst);
54c1a859
YH
984 dst = NULL;
985 }
1da177e4 986 }
54c1a859 987 return dst;
1da177e4
LT
988}
989
990static void ip6_link_failure(struct sk_buff *skb)
991{
992 struct rt6_info *rt;
993
3ffe533c 994 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 995
adf30907 996 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
997 if (rt) {
998 if (rt->rt6i_flags&RTF_CACHE) {
d8d1f30b 999 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1000 rt->rt6i_flags |= RTF_EXPIRES;
1001 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1002 rt->rt6i_node->fn_sernum = -1;
1003 }
1004}
1005
1006static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1007{
1008 struct rt6_info *rt6 = (struct rt6_info*)dst;
1009
1010 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1011 rt6->rt6i_flags |= RTF_MODIFIED;
1012 if (mtu < IPV6_MIN_MTU) {
defb3519 1013 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1014 mtu = IPV6_MIN_MTU;
defb3519
DM
1015 features |= RTAX_FEATURE_ALLFRAG;
1016 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1017 }
defb3519 1018 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1019 }
1020}
1021
0dbaee3b 1022static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1023{
0dbaee3b
DM
1024 struct net_device *dev = dst->dev;
1025 unsigned int mtu = dst_mtu(dst);
1026 struct net *net = dev_net(dev);
1027
1da177e4
LT
1028 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1029
5578689a
DL
1030 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1031 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1032
1033 /*
1ab1457c
YH
1034 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1035 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1036 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1037 * rely only on pmtu discovery"
1038 */
1039 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1040 mtu = IPV6_MAXPLEN;
1041 return mtu;
1042}
1043
d33e4553
DM
1044static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1045{
1046 unsigned int mtu = IPV6_MIN_MTU;
1047 struct inet6_dev *idev;
1048
1049 rcu_read_lock();
1050 idev = __in6_dev_get(dst->dev);
1051 if (idev)
1052 mtu = idev->cnf.mtu6;
1053 rcu_read_unlock();
1054
1055 return mtu;
1056}
1057
3b00944c
YH
1058static struct dst_entry *icmp6_dst_gc_list;
1059static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1060
3b00944c 1061struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1062 struct neighbour *neigh,
9acd9f3a 1063 const struct in6_addr *addr)
1da177e4
LT
1064{
1065 struct rt6_info *rt;
1066 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1067 struct net *net = dev_net(dev);
1da177e4
LT
1068
1069 if (unlikely(idev == NULL))
1070 return NULL;
1071
957c665f 1072 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
1da177e4
LT
1073 if (unlikely(rt == NULL)) {
1074 in6_dev_put(idev);
1075 goto out;
1076 }
1077
1da177e4
LT
1078 if (neigh)
1079 neigh_hold(neigh);
14deae41 1080 else {
1da177e4 1081 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
1082 if (IS_ERR(neigh))
1083 neigh = NULL;
1084 }
1da177e4 1085
8e2ec639
YZ
1086 rt->dst.flags |= DST_HOST;
1087 rt->dst.output = ip6_output;
69cce1d1 1088 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1089 atomic_set(&rt->dst.__refcnt, 1);
defb3519 1090 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
8e2ec639
YZ
1091
1092 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1093 rt->rt6i_dst.plen = 128;
1094 rt->rt6i_idev = idev;
1da177e4 1095
3b00944c 1096 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1097 rt->dst.next = icmp6_dst_gc_list;
1098 icmp6_dst_gc_list = &rt->dst;
3b00944c 1099 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1100
5578689a 1101 fib6_force_start_gc(net);
1da177e4
LT
1102
1103out:
d8d1f30b 1104 return &rt->dst;
1da177e4
LT
1105}
1106
3d0f24a7 1107int icmp6_dst_gc(void)
1da177e4 1108{
e9476e95 1109 struct dst_entry *dst, **pprev;
3d0f24a7 1110 int more = 0;
1da177e4 1111
3b00944c
YH
1112 spin_lock_bh(&icmp6_dst_lock);
1113 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1114
1da177e4
LT
1115 while ((dst = *pprev) != NULL) {
1116 if (!atomic_read(&dst->__refcnt)) {
1117 *pprev = dst->next;
1118 dst_free(dst);
1da177e4
LT
1119 } else {
1120 pprev = &dst->next;
3d0f24a7 1121 ++more;
1da177e4
LT
1122 }
1123 }
1124
3b00944c 1125 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1126
3d0f24a7 1127 return more;
1da177e4
LT
1128}
1129
1e493d19
DM
1130static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1131 void *arg)
1132{
1133 struct dst_entry *dst, **pprev;
1134
1135 spin_lock_bh(&icmp6_dst_lock);
1136 pprev = &icmp6_dst_gc_list;
1137 while ((dst = *pprev) != NULL) {
1138 struct rt6_info *rt = (struct rt6_info *) dst;
1139 if (func(rt, arg)) {
1140 *pprev = dst->next;
1141 dst_free(dst);
1142 } else {
1143 pprev = &dst->next;
1144 }
1145 }
1146 spin_unlock_bh(&icmp6_dst_lock);
1147}
1148
569d3645 1149static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1150{
1da177e4 1151 unsigned long now = jiffies;
86393e52 1152 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1153 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1154 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1155 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1156 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1157 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1158 int entries;
7019b78e 1159
fc66f95c 1160 entries = dst_entries_get_fast(ops);
7019b78e 1161 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1162 entries <= rt_max_size)
1da177e4
LT
1163 goto out;
1164
6891a346
BT
1165 net->ipv6.ip6_rt_gc_expire++;
1166 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1167 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1168 entries = dst_entries_get_slow(ops);
1169 if (entries < ops->gc_thresh)
7019b78e 1170 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1171out:
7019b78e 1172 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1173 return entries > rt_max_size;
1da177e4
LT
1174}
1175
1176/* Clean host part of a prefix. Not necessary in radix tree,
1177 but results in cleaner routing tables.
1178
1179 Remove it only when all the things will work!
1180 */
1181
6b75d090 1182int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1183{
5170ae82 1184 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1185 if (hoplimit == 0) {
6b75d090 1186 struct net_device *dev = dst->dev;
c68f24cc
ED
1187 struct inet6_dev *idev;
1188
1189 rcu_read_lock();
1190 idev = __in6_dev_get(dev);
1191 if (idev)
6b75d090 1192 hoplimit = idev->cnf.hop_limit;
c68f24cc 1193 else
53b7997f 1194 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1195 rcu_read_unlock();
1da177e4
LT
1196 }
1197 return hoplimit;
1198}
abbf46ae 1199EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1200
1201/*
1202 *
1203 */
1204
86872cb5 1205int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1206{
1207 int err;
5578689a 1208 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1209 struct rt6_info *rt = NULL;
1210 struct net_device *dev = NULL;
1211 struct inet6_dev *idev = NULL;
c71099ac 1212 struct fib6_table *table;
1da177e4
LT
1213 int addr_type;
1214
86872cb5 1215 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1216 return -EINVAL;
1217#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1218 if (cfg->fc_src_len)
1da177e4
LT
1219 return -EINVAL;
1220#endif
86872cb5 1221 if (cfg->fc_ifindex) {
1da177e4 1222 err = -ENODEV;
5578689a 1223 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1224 if (!dev)
1225 goto out;
1226 idev = in6_dev_get(dev);
1227 if (!idev)
1228 goto out;
1229 }
1230
86872cb5
TG
1231 if (cfg->fc_metric == 0)
1232 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1233
5578689a 1234 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1235 if (table == NULL) {
1236 err = -ENOBUFS;
1237 goto out;
1238 }
1239
957c665f 1240 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4
LT
1241
1242 if (rt == NULL) {
1243 err = -ENOMEM;
1244 goto out;
1245 }
1246
d8d1f30b 1247 rt->dst.obsolete = -1;
6f704992
YH
1248 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1249 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1250 0;
1da177e4 1251
86872cb5
TG
1252 if (cfg->fc_protocol == RTPROT_UNSPEC)
1253 cfg->fc_protocol = RTPROT_BOOT;
1254 rt->rt6i_protocol = cfg->fc_protocol;
1255
1256 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1257
1258 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1259 rt->dst.input = ip6_mc_input;
ab79ad14
1260 else if (cfg->fc_flags & RTF_LOCAL)
1261 rt->dst.input = ip6_input;
1da177e4 1262 else
d8d1f30b 1263 rt->dst.input = ip6_forward;
1da177e4 1264
d8d1f30b 1265 rt->dst.output = ip6_output;
1da177e4 1266
86872cb5
TG
1267 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1268 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1269 if (rt->rt6i_dst.plen == 128)
11d53b49 1270 rt->dst.flags |= DST_HOST;
1da177e4 1271
8e2ec639
YZ
1272 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1273 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1274 if (!metrics) {
1275 err = -ENOMEM;
1276 goto out;
1277 }
1278 dst_init_metrics(&rt->dst, metrics, 0);
1279 }
1da177e4 1280#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1281 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1282 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1283#endif
1284
86872cb5 1285 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1286
1287 /* We cannot add true routes via loopback here,
1288 they would result in kernel looping; promote them to reject routes
1289 */
86872cb5 1290 if ((cfg->fc_flags & RTF_REJECT) ||
ab79ad14
1291 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1292 && !(cfg->fc_flags&RTF_LOCAL))) {
1da177e4 1293 /* hold loopback dev/idev if we haven't done so. */
5578689a 1294 if (dev != net->loopback_dev) {
1da177e4
LT
1295 if (dev) {
1296 dev_put(dev);
1297 in6_dev_put(idev);
1298 }
5578689a 1299 dev = net->loopback_dev;
1da177e4
LT
1300 dev_hold(dev);
1301 idev = in6_dev_get(dev);
1302 if (!idev) {
1303 err = -ENODEV;
1304 goto out;
1305 }
1306 }
d8d1f30b
CG
1307 rt->dst.output = ip6_pkt_discard_out;
1308 rt->dst.input = ip6_pkt_discard;
1309 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1310 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1311 goto install_route;
1312 }
1313
86872cb5 1314 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1315 const struct in6_addr *gw_addr;
1da177e4
LT
1316 int gwa_type;
1317
86872cb5
TG
1318 gw_addr = &cfg->fc_gateway;
1319 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1320 gwa_type = ipv6_addr_type(gw_addr);
1321
1322 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1323 struct rt6_info *grt;
1324
1325 /* IPv6 strictly inhibits using not link-local
1326 addresses as nexthop address.
1327 Otherwise, router will not able to send redirects.
1328 It is very good, but in some (rare!) circumstances
1329 (SIT, PtP, NBMA NOARP links) it is handy to allow
1330 some exceptions. --ANK
1331 */
1332 err = -EINVAL;
1333 if (!(gwa_type&IPV6_ADDR_UNICAST))
1334 goto out;
1335
5578689a 1336 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1337
1338 err = -EHOSTUNREACH;
1339 if (grt == NULL)
1340 goto out;
1341 if (dev) {
1342 if (dev != grt->rt6i_dev) {
d8d1f30b 1343 dst_release(&grt->dst);
1da177e4
LT
1344 goto out;
1345 }
1346 } else {
1347 dev = grt->rt6i_dev;
1348 idev = grt->rt6i_idev;
1349 dev_hold(dev);
1350 in6_dev_hold(grt->rt6i_idev);
1351 }
1352 if (!(grt->rt6i_flags&RTF_GATEWAY))
1353 err = 0;
d8d1f30b 1354 dst_release(&grt->dst);
1da177e4
LT
1355
1356 if (err)
1357 goto out;
1358 }
1359 err = -EINVAL;
1360 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1361 goto out;
1362 }
1363
1364 err = -ENODEV;
1365 if (dev == NULL)
1366 goto out;
1367
c3968a85
DW
1368 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1369 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1370 err = -EINVAL;
1371 goto out;
1372 }
1373 ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
1374 rt->rt6i_prefsrc.plen = 128;
1375 } else
1376 rt->rt6i_prefsrc.plen = 0;
1377
86872cb5 1378 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1379 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1380 if (IS_ERR(n)) {
1381 err = PTR_ERR(n);
1da177e4
LT
1382 goto out;
1383 }
69cce1d1 1384 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1385 }
1386
86872cb5 1387 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1388
1389install_route:
86872cb5
TG
1390 if (cfg->fc_mx) {
1391 struct nlattr *nla;
1392 int remaining;
1393
1394 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1395 int type = nla_type(nla);
86872cb5
TG
1396
1397 if (type) {
1398 if (type > RTAX_MAX) {
1da177e4
LT
1399 err = -EINVAL;
1400 goto out;
1401 }
86872cb5 1402
defb3519 1403 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1404 }
1da177e4
LT
1405 }
1406 }
1407
d8d1f30b 1408 rt->dst.dev = dev;
1da177e4 1409 rt->rt6i_idev = idev;
c71099ac 1410 rt->rt6i_table = table;
63152fc0 1411
c346dca1 1412 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1413
86872cb5 1414 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1415
1416out:
1417 if (dev)
1418 dev_put(dev);
1419 if (idev)
1420 in6_dev_put(idev);
1421 if (rt)
d8d1f30b 1422 dst_free(&rt->dst);
1da177e4
LT
1423 return err;
1424}
1425
86872cb5 1426static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1427{
1428 int err;
c71099ac 1429 struct fib6_table *table;
c346dca1 1430 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1431
8ed67789 1432 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1433 return -ENOENT;
1434
c71099ac
TG
1435 table = rt->rt6i_table;
1436 write_lock_bh(&table->tb6_lock);
1da177e4 1437
86872cb5 1438 err = fib6_del(rt, info);
d8d1f30b 1439 dst_release(&rt->dst);
1da177e4 1440
c71099ac 1441 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1442
1443 return err;
1444}
1445
e0a1ad73
TG
1446int ip6_del_rt(struct rt6_info *rt)
1447{
4d1169c1 1448 struct nl_info info = {
c346dca1 1449 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1450 };
528c4ceb 1451 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1452}
1453
86872cb5 1454static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1455{
c71099ac 1456 struct fib6_table *table;
1da177e4
LT
1457 struct fib6_node *fn;
1458 struct rt6_info *rt;
1459 int err = -ESRCH;
1460
5578689a 1461 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1462 if (table == NULL)
1463 return err;
1464
1465 read_lock_bh(&table->tb6_lock);
1da177e4 1466
c71099ac 1467 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1468 &cfg->fc_dst, cfg->fc_dst_len,
1469 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1470
1da177e4 1471 if (fn) {
d8d1f30b 1472 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1473 if (cfg->fc_ifindex &&
1da177e4 1474 (rt->rt6i_dev == NULL ||
86872cb5 1475 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1476 continue;
86872cb5
TG
1477 if (cfg->fc_flags & RTF_GATEWAY &&
1478 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1479 continue;
86872cb5 1480 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1481 continue;
d8d1f30b 1482 dst_hold(&rt->dst);
c71099ac 1483 read_unlock_bh(&table->tb6_lock);
1da177e4 1484
86872cb5 1485 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1486 }
1487 }
c71099ac 1488 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1489
1490 return err;
1491}
1492
1493/*
1494 * Handle redirects
1495 */
a6279458 1496struct ip6rd_flowi {
4c9483b2 1497 struct flowi6 fl6;
a6279458
YH
1498 struct in6_addr gateway;
1499};
1500
8ed67789
DL
1501static struct rt6_info *__ip6_route_redirect(struct net *net,
1502 struct fib6_table *table,
4c9483b2 1503 struct flowi6 *fl6,
a6279458 1504 int flags)
1da177e4 1505{
4c9483b2 1506 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1507 struct rt6_info *rt;
e843b9e1 1508 struct fib6_node *fn;
c71099ac 1509
1da177e4 1510 /*
e843b9e1
YH
1511 * Get the "current" route for this destination and
1512 * check if the redirect has come from approriate router.
1513 *
1514 * RFC 2461 specifies that redirects should only be
1515 * accepted if they come from the nexthop to the target.
1516 * Due to the way the routes are chosen, this notion
1517 * is a bit fuzzy and one might need to check all possible
1518 * routes.
1da177e4 1519 */
1da177e4 1520
c71099ac 1521 read_lock_bh(&table->tb6_lock);
4c9483b2 1522 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1523restart:
d8d1f30b 1524 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1525 /*
1526 * Current route is on-link; redirect is always invalid.
1527 *
1528 * Seems, previous statement is not true. It could
1529 * be node, which looks for us as on-link (f.e. proxy ndisc)
1530 * But then router serving it might decide, that we should
1531 * know truth 8)8) --ANK (980726).
1532 */
1533 if (rt6_check_expired(rt))
1534 continue;
1535 if (!(rt->rt6i_flags & RTF_GATEWAY))
1536 continue;
4c9483b2 1537 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1538 continue;
a6279458 1539 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1540 continue;
1541 break;
1542 }
a6279458 1543
cb15d9c2 1544 if (!rt)
8ed67789 1545 rt = net->ipv6.ip6_null_entry;
4c9483b2 1546 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1547out:
d8d1f30b 1548 dst_hold(&rt->dst);
a6279458 1549
c71099ac 1550 read_unlock_bh(&table->tb6_lock);
e843b9e1 1551
a6279458
YH
1552 return rt;
1553};
1554
b71d1d42
ED
1555static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1556 const struct in6_addr *src,
1557 const struct in6_addr *gateway,
a6279458
YH
1558 struct net_device *dev)
1559{
adaa70bb 1560 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1561 struct net *net = dev_net(dev);
a6279458 1562 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1563 .fl6 = {
1564 .flowi6_oif = dev->ifindex,
1565 .daddr = *dest,
1566 .saddr = *src,
a6279458 1567 },
a6279458 1568 };
adaa70bb 1569
86c36ce4
BH
1570 ipv6_addr_copy(&rdfl.gateway, gateway);
1571
adaa70bb
TG
1572 if (rt6_need_strict(dest))
1573 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1574
4c9483b2 1575 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1576 flags, __ip6_route_redirect);
a6279458
YH
1577}
1578
b71d1d42
ED
1579void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1580 const struct in6_addr *saddr,
a6279458
YH
1581 struct neighbour *neigh, u8 *lladdr, int on_link)
1582{
1583 struct rt6_info *rt, *nrt = NULL;
1584 struct netevent_redirect netevent;
c346dca1 1585 struct net *net = dev_net(neigh->dev);
a6279458
YH
1586
1587 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1588
8ed67789 1589 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1590 if (net_ratelimit())
1591 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1592 "for redirect target\n");
a6279458 1593 goto out;
1da177e4
LT
1594 }
1595
1da177e4
LT
1596 /*
1597 * We have finally decided to accept it.
1598 */
1599
1ab1457c 1600 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1601 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1602 NEIGH_UPDATE_F_OVERRIDE|
1603 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1604 NEIGH_UPDATE_F_ISROUTER))
1605 );
1606
1607 /*
1608 * Redirect received -> path was valid.
1609 * Look, redirects are sent only in response to data packets,
1610 * so that this nexthop apparently is reachable. --ANK
1611 */
d8d1f30b 1612 dst_confirm(&rt->dst);
1da177e4
LT
1613
1614 /* Duplicate redirect: silently ignore. */
f2c31e32 1615 if (neigh == dst_get_neighbour_raw(&rt->dst))
1da177e4
LT
1616 goto out;
1617
21efcfa0 1618 nrt = ip6_rt_copy(rt, dest);
1da177e4
LT
1619 if (nrt == NULL)
1620 goto out;
1621
1622 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1623 if (on_link)
1624 nrt->rt6i_flags &= ~RTF_GATEWAY;
1625
1da177e4 1626 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
69cce1d1 1627 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1628
40e22e8f 1629 if (ip6_ins_rt(nrt))
1da177e4
LT
1630 goto out;
1631
d8d1f30b
CG
1632 netevent.old = &rt->dst;
1633 netevent.new = &nrt->dst;
8d71740c
TT
1634 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1635
1da177e4 1636 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1637 ip6_del_rt(rt);
1da177e4
LT
1638 return;
1639 }
1640
1641out:
d8d1f30b 1642 dst_release(&rt->dst);
1da177e4
LT
1643}
1644
1645/*
1646 * Handle ICMP "packet too big" messages
1647 * i.e. Path MTU discovery
1648 */
1649
b71d1d42 1650static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1651 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1652{
1653 struct rt6_info *rt, *nrt;
1654 int allfrag = 0;
d3052b55 1655again:
ae878ae2 1656 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1da177e4
LT
1657 if (rt == NULL)
1658 return;
1659
d3052b55
AV
1660 if (rt6_check_expired(rt)) {
1661 ip6_del_rt(rt);
1662 goto again;
1663 }
1664
d8d1f30b 1665 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1666 goto out;
1667
1668 if (pmtu < IPV6_MIN_MTU) {
1669 /*
1ab1457c 1670 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1671 * MTU (1280) and a fragment header should always be included
1672 * after a node receiving Too Big message reporting PMTU is
1673 * less than the IPv6 Minimum Link MTU.
1674 */
1675 pmtu = IPV6_MIN_MTU;
1676 allfrag = 1;
1677 }
1678
1679 /* New mtu received -> path was valid.
1680 They are sent only in response to data packets,
1681 so that this nexthop apparently is reachable. --ANK
1682 */
d8d1f30b 1683 dst_confirm(&rt->dst);
1da177e4
LT
1684
1685 /* Host route. If it is static, it would be better
1686 not to override it, but add new one, so that
1687 when cache entry will expire old pmtu
1688 would return automatically.
1689 */
1690 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1691 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1692 if (allfrag) {
1693 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1694 features |= RTAX_FEATURE_ALLFRAG;
1695 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1696 }
d8d1f30b 1697 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1698 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1699 goto out;
1700 }
1701
1702 /* Network route.
1703 Two cases are possible:
1704 1. It is connected route. Action: COW
1705 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1706 */
f2c31e32 1707 if (!dst_get_neighbour_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1708 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1709 else
1710 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1711
d5315b50 1712 if (nrt) {
defb3519
DM
1713 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1714 if (allfrag) {
1715 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1716 features |= RTAX_FEATURE_ALLFRAG;
1717 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1718 }
a1e78363
YH
1719
1720 /* According to RFC 1981, detecting PMTU increase shouldn't be
1721 * happened within 5 mins, the recommended timer is 10 mins.
1722 * Here this route expiration time is set to ip6_rt_mtu_expires
1723 * which is 10 mins. After 10 mins the decreased pmtu is expired
1724 * and detecting PMTU increase will be automatically happened.
1725 */
d8d1f30b 1726 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1727 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1728
40e22e8f 1729 ip6_ins_rt(nrt);
1da177e4 1730 }
1da177e4 1731out:
d8d1f30b 1732 dst_release(&rt->dst);
1da177e4
LT
1733}
1734
b71d1d42 1735void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1736 struct net_device *dev, u32 pmtu)
1737{
1738 struct net *net = dev_net(dev);
1739
1740 /*
1741 * RFC 1981 states that a node "MUST reduce the size of the packets it
1742 * is sending along the path" that caused the Packet Too Big message.
1743 * Since it's not possible in the general case to determine which
1744 * interface was used to send the original packet, we update the MTU
1745 * on the interface that will be used to send future packets. We also
1746 * update the MTU on the interface that received the Packet Too Big in
1747 * case the original packet was forced out that interface with
1748 * SO_BINDTODEVICE or similar. This is the next best thing to the
1749 * correct behaviour, which would be to update the MTU on all
1750 * interfaces.
1751 */
1752 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1753 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1754}
1755
1da177e4
LT
1756/*
1757 * Misc support functions
1758 */
1759
21efcfa0
ED
1760static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1761 const struct in6_addr *dest)
1da177e4 1762{
c346dca1 1763 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1764 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1765 ort->dst.dev, 0);
1da177e4
LT
1766
1767 if (rt) {
d8d1f30b
CG
1768 rt->dst.input = ort->dst.input;
1769 rt->dst.output = ort->dst.output;
8e2ec639 1770 rt->dst.flags |= DST_HOST;
d8d1f30b 1771
21efcfa0 1772 ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
8e2ec639 1773 rt->rt6i_dst.plen = 128;
defb3519 1774 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1775 rt->dst.error = ort->dst.error;
1da177e4
LT
1776 rt->rt6i_idev = ort->rt6i_idev;
1777 if (rt->rt6i_idev)
1778 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1779 rt->dst.lastuse = jiffies;
1da177e4
LT
1780 rt->rt6i_expires = 0;
1781
1782 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1783 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1784 rt->rt6i_metric = 0;
1785
1da177e4
LT
1786#ifdef CONFIG_IPV6_SUBTREES
1787 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1788#endif
0f6c6392 1789 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1790 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1791 }
1792 return rt;
1793}
1794
70ceb4f5 1795#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1796static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1797 const struct in6_addr *prefix, int prefixlen,
1798 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1799{
1800 struct fib6_node *fn;
1801 struct rt6_info *rt = NULL;
c71099ac
TG
1802 struct fib6_table *table;
1803
efa2cea0 1804 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1805 if (table == NULL)
1806 return NULL;
70ceb4f5 1807
c71099ac
TG
1808 write_lock_bh(&table->tb6_lock);
1809 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1810 if (!fn)
1811 goto out;
1812
d8d1f30b 1813 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1814 if (rt->rt6i_dev->ifindex != ifindex)
1815 continue;
1816 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1817 continue;
1818 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1819 continue;
d8d1f30b 1820 dst_hold(&rt->dst);
70ceb4f5
YH
1821 break;
1822 }
1823out:
c71099ac 1824 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1825 return rt;
1826}
1827
efa2cea0 1828static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1829 const struct in6_addr *prefix, int prefixlen,
1830 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1831 unsigned pref)
1832{
86872cb5
TG
1833 struct fib6_config cfg = {
1834 .fc_table = RT6_TABLE_INFO,
238fc7ea 1835 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1836 .fc_ifindex = ifindex,
1837 .fc_dst_len = prefixlen,
1838 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1839 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1840 .fc_nlinfo.pid = 0,
1841 .fc_nlinfo.nlh = NULL,
1842 .fc_nlinfo.nl_net = net,
86872cb5
TG
1843 };
1844
1845 ipv6_addr_copy(&cfg.fc_dst, prefix);
1846 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1847
e317da96
YH
1848 /* We should treat it as a default route if prefix length is 0. */
1849 if (!prefixlen)
86872cb5 1850 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1851
86872cb5 1852 ip6_route_add(&cfg);
70ceb4f5 1853
efa2cea0 1854 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1855}
1856#endif
1857
b71d1d42 1858struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1859{
1da177e4 1860 struct rt6_info *rt;
c71099ac 1861 struct fib6_table *table;
1da177e4 1862
c346dca1 1863 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1864 if (table == NULL)
1865 return NULL;
1da177e4 1866
c71099ac 1867 write_lock_bh(&table->tb6_lock);
d8d1f30b 1868 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1869 if (dev == rt->rt6i_dev &&
045927ff 1870 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1871 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1872 break;
1873 }
1874 if (rt)
d8d1f30b 1875 dst_hold(&rt->dst);
c71099ac 1876 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1877 return rt;
1878}
1879
b71d1d42 1880struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1881 struct net_device *dev,
1882 unsigned int pref)
1da177e4 1883{
86872cb5
TG
1884 struct fib6_config cfg = {
1885 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1886 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1887 .fc_ifindex = dev->ifindex,
1888 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1889 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1890 .fc_nlinfo.pid = 0,
1891 .fc_nlinfo.nlh = NULL,
c346dca1 1892 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1893 };
1da177e4 1894
86872cb5 1895 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1896
86872cb5 1897 ip6_route_add(&cfg);
1da177e4 1898
1da177e4
LT
1899 return rt6_get_dflt_router(gwaddr, dev);
1900}
1901
7b4da532 1902void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1903{
1904 struct rt6_info *rt;
c71099ac
TG
1905 struct fib6_table *table;
1906
1907 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1908 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1909 if (table == NULL)
1910 return;
1da177e4
LT
1911
1912restart:
c71099ac 1913 read_lock_bh(&table->tb6_lock);
d8d1f30b 1914 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1915 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1916 dst_hold(&rt->dst);
c71099ac 1917 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1918 ip6_del_rt(rt);
1da177e4
LT
1919 goto restart;
1920 }
1921 }
c71099ac 1922 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1923}
1924
5578689a
DL
1925static void rtmsg_to_fib6_config(struct net *net,
1926 struct in6_rtmsg *rtmsg,
86872cb5
TG
1927 struct fib6_config *cfg)
1928{
1929 memset(cfg, 0, sizeof(*cfg));
1930
1931 cfg->fc_table = RT6_TABLE_MAIN;
1932 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1933 cfg->fc_metric = rtmsg->rtmsg_metric;
1934 cfg->fc_expires = rtmsg->rtmsg_info;
1935 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1936 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1937 cfg->fc_flags = rtmsg->rtmsg_flags;
1938
5578689a 1939 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1940
86872cb5
TG
1941 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1942 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1943 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1944}
1945
5578689a 1946int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1947{
86872cb5 1948 struct fib6_config cfg;
1da177e4
LT
1949 struct in6_rtmsg rtmsg;
1950 int err;
1951
1952 switch(cmd) {
1953 case SIOCADDRT: /* Add a route */
1954 case SIOCDELRT: /* Delete a route */
1955 if (!capable(CAP_NET_ADMIN))
1956 return -EPERM;
1957 err = copy_from_user(&rtmsg, arg,
1958 sizeof(struct in6_rtmsg));
1959 if (err)
1960 return -EFAULT;
86872cb5 1961
5578689a 1962 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1963
1da177e4
LT
1964 rtnl_lock();
1965 switch (cmd) {
1966 case SIOCADDRT:
86872cb5 1967 err = ip6_route_add(&cfg);
1da177e4
LT
1968 break;
1969 case SIOCDELRT:
86872cb5 1970 err = ip6_route_del(&cfg);
1da177e4
LT
1971 break;
1972 default:
1973 err = -EINVAL;
1974 }
1975 rtnl_unlock();
1976
1977 return err;
3ff50b79 1978 }
1da177e4
LT
1979
1980 return -EINVAL;
1981}
1982
1983/*
1984 * Drop the packet on the floor
1985 */
1986
d5fdd6ba 1987static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1988{
612f09e8 1989 int type;
adf30907 1990 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1991 switch (ipstats_mib_noroutes) {
1992 case IPSTATS_MIB_INNOROUTES:
0660e03f 1993 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1994 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1995 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1996 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1997 break;
1998 }
1999 /* FALLTHROUGH */
2000 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2001 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2002 ipstats_mib_noroutes);
612f09e8
YH
2003 break;
2004 }
3ffe533c 2005 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2006 kfree_skb(skb);
2007 return 0;
2008}
2009
9ce8ade0
TG
2010static int ip6_pkt_discard(struct sk_buff *skb)
2011{
612f09e8 2012 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2013}
2014
20380731 2015static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2016{
adf30907 2017 skb->dev = skb_dst(skb)->dev;
612f09e8 2018 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2019}
2020
6723ab54
DM
2021#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2022
9ce8ade0
TG
2023static int ip6_pkt_prohibit(struct sk_buff *skb)
2024{
612f09e8 2025 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2026}
2027
2028static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2029{
adf30907 2030 skb->dev = skb_dst(skb)->dev;
612f09e8 2031 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2032}
2033
6723ab54
DM
2034#endif
2035
1da177e4
LT
2036/*
2037 * Allocate a dst for local (unicast / anycast) address.
2038 */
2039
2040struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2041 const struct in6_addr *addr,
2042 int anycast)
2043{
c346dca1 2044 struct net *net = dev_net(idev->dev);
5c1e6aa3 2045 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2046 net->loopback_dev, 0);
14deae41 2047 struct neighbour *neigh;
1da177e4 2048
40385653
BG
2049 if (rt == NULL) {
2050 if (net_ratelimit())
2051 pr_warning("IPv6: Maximum number of routes reached,"
2052 " consider increasing route/max_size.\n");
1da177e4 2053 return ERR_PTR(-ENOMEM);
40385653 2054 }
1da177e4 2055
1da177e4
LT
2056 in6_dev_hold(idev);
2057
11d53b49 2058 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2059 rt->dst.input = ip6_input;
2060 rt->dst.output = ip6_output;
1da177e4 2061 rt->rt6i_idev = idev;
d8d1f30b 2062 rt->dst.obsolete = -1;
1da177e4
LT
2063
2064 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2065 if (anycast)
2066 rt->rt6i_flags |= RTF_ANYCAST;
2067 else
1da177e4 2068 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
2069 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2070 if (IS_ERR(neigh)) {
d8d1f30b 2071 dst_free(&rt->dst);
14deae41 2072
29546a64 2073 return ERR_CAST(neigh);
1da177e4 2074 }
69cce1d1 2075 dst_set_neighbour(&rt->dst, neigh);
1da177e4
LT
2076
2077 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2078 rt->rt6i_dst.plen = 128;
5578689a 2079 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2080
d8d1f30b 2081 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2082
2083 return rt;
2084}
2085
c3968a85
DW
2086int ip6_route_get_saddr(struct net *net,
2087 struct rt6_info *rt,
b71d1d42 2088 const struct in6_addr *daddr,
c3968a85
DW
2089 unsigned int prefs,
2090 struct in6_addr *saddr)
2091{
2092 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2093 int err = 0;
2094 if (rt->rt6i_prefsrc.plen)
2095 ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
2096 else
2097 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2098 daddr, prefs, saddr);
2099 return err;
2100}
2101
2102/* remove deleted ip from prefsrc entries */
2103struct arg_dev_net_ip {
2104 struct net_device *dev;
2105 struct net *net;
2106 struct in6_addr *addr;
2107};
2108
2109static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2110{
2111 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2112 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2113 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2114
2115 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
2116 rt != net->ipv6.ip6_null_entry &&
2117 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2118 /* remove prefsrc entry */
2119 rt->rt6i_prefsrc.plen = 0;
2120 }
2121 return 0;
2122}
2123
2124void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2125{
2126 struct net *net = dev_net(ifp->idev->dev);
2127 struct arg_dev_net_ip adni = {
2128 .dev = ifp->idev->dev,
2129 .net = net,
2130 .addr = &ifp->addr,
2131 };
2132 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2133}
2134
8ed67789
DL
2135struct arg_dev_net {
2136 struct net_device *dev;
2137 struct net *net;
2138};
2139
1da177e4
LT
2140static int fib6_ifdown(struct rt6_info *rt, void *arg)
2141{
bc3ef660 2142 const struct arg_dev_net *adn = arg;
2143 const struct net_device *dev = adn->dev;
8ed67789 2144
bc3ef660 2145 if ((rt->rt6i_dev == dev || dev == NULL) &&
2146 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2147 RT6_TRACE("deleted by ifdown %p\n", rt);
2148 return -1;
2149 }
2150 return 0;
2151}
2152
f3db4851 2153void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2154{
8ed67789
DL
2155 struct arg_dev_net adn = {
2156 .dev = dev,
2157 .net = net,
2158 };
2159
2160 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2161 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2162}
2163
2164struct rt6_mtu_change_arg
2165{
2166 struct net_device *dev;
2167 unsigned mtu;
2168};
2169
2170static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2171{
2172 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2173 struct inet6_dev *idev;
2174
2175 /* In IPv6 pmtu discovery is not optional,
2176 so that RTAX_MTU lock cannot disable it.
2177 We still use this lock to block changes
2178 caused by addrconf/ndisc.
2179 */
2180
2181 idev = __in6_dev_get(arg->dev);
2182 if (idev == NULL)
2183 return 0;
2184
2185 /* For administrative MTU increase, there is no way to discover
2186 IPv6 PMTU increase, so PMTU increase should be updated here.
2187 Since RFC 1981 doesn't include administrative MTU increase
2188 update PMTU increase is a MUST. (i.e. jumbo frame)
2189 */
2190 /*
2191 If new MTU is less than route PMTU, this new MTU will be the
2192 lowest MTU in the path, update the route PMTU to reflect PMTU
2193 decreases; if new MTU is greater than route PMTU, and the
2194 old MTU is the lowest MTU in the path, update the route PMTU
2195 to reflect the increase. In this case if the other nodes' MTU
2196 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2197 PMTU discouvery.
2198 */
2199 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2200 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2201 (dst_mtu(&rt->dst) >= arg->mtu ||
2202 (dst_mtu(&rt->dst) < arg->mtu &&
2203 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2204 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2205 }
1da177e4
LT
2206 return 0;
2207}
2208
2209void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2210{
c71099ac
TG
2211 struct rt6_mtu_change_arg arg = {
2212 .dev = dev,
2213 .mtu = mtu,
2214 };
1da177e4 2215
c346dca1 2216 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2217}
2218
ef7c79ed 2219static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2220 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2221 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2222 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2223 [RTA_PRIORITY] = { .type = NLA_U32 },
2224 [RTA_METRICS] = { .type = NLA_NESTED },
2225};
2226
2227static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2228 struct fib6_config *cfg)
1da177e4 2229{
86872cb5
TG
2230 struct rtmsg *rtm;
2231 struct nlattr *tb[RTA_MAX+1];
2232 int err;
1da177e4 2233
86872cb5
TG
2234 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2235 if (err < 0)
2236 goto errout;
1da177e4 2237
86872cb5
TG
2238 err = -EINVAL;
2239 rtm = nlmsg_data(nlh);
2240 memset(cfg, 0, sizeof(*cfg));
2241
2242 cfg->fc_table = rtm->rtm_table;
2243 cfg->fc_dst_len = rtm->rtm_dst_len;
2244 cfg->fc_src_len = rtm->rtm_src_len;
2245 cfg->fc_flags = RTF_UP;
2246 cfg->fc_protocol = rtm->rtm_protocol;
2247
2248 if (rtm->rtm_type == RTN_UNREACHABLE)
2249 cfg->fc_flags |= RTF_REJECT;
2250
ab79ad14
2251 if (rtm->rtm_type == RTN_LOCAL)
2252 cfg->fc_flags |= RTF_LOCAL;
2253
86872cb5
TG
2254 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2255 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2256 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2257
2258 if (tb[RTA_GATEWAY]) {
2259 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2260 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2261 }
86872cb5
TG
2262
2263 if (tb[RTA_DST]) {
2264 int plen = (rtm->rtm_dst_len + 7) >> 3;
2265
2266 if (nla_len(tb[RTA_DST]) < plen)
2267 goto errout;
2268
2269 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2270 }
86872cb5
TG
2271
2272 if (tb[RTA_SRC]) {
2273 int plen = (rtm->rtm_src_len + 7) >> 3;
2274
2275 if (nla_len(tb[RTA_SRC]) < plen)
2276 goto errout;
2277
2278 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2279 }
86872cb5 2280
c3968a85
DW
2281 if (tb[RTA_PREFSRC])
2282 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2283
86872cb5
TG
2284 if (tb[RTA_OIF])
2285 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2286
2287 if (tb[RTA_PRIORITY])
2288 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2289
2290 if (tb[RTA_METRICS]) {
2291 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2292 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2293 }
86872cb5
TG
2294
2295 if (tb[RTA_TABLE])
2296 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2297
2298 err = 0;
2299errout:
2300 return err;
1da177e4
LT
2301}
2302
c127ea2c 2303static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2304{
86872cb5
TG
2305 struct fib6_config cfg;
2306 int err;
1da177e4 2307
86872cb5
TG
2308 err = rtm_to_fib6_config(skb, nlh, &cfg);
2309 if (err < 0)
2310 return err;
2311
2312 return ip6_route_del(&cfg);
1da177e4
LT
2313}
2314
c127ea2c 2315static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2316{
86872cb5
TG
2317 struct fib6_config cfg;
2318 int err;
1da177e4 2319
86872cb5
TG
2320 err = rtm_to_fib6_config(skb, nlh, &cfg);
2321 if (err < 0)
2322 return err;
2323
2324 return ip6_route_add(&cfg);
1da177e4
LT
2325}
2326
339bf98f
TG
2327static inline size_t rt6_nlmsg_size(void)
2328{
2329 return NLMSG_ALIGN(sizeof(struct rtmsg))
2330 + nla_total_size(16) /* RTA_SRC */
2331 + nla_total_size(16) /* RTA_DST */
2332 + nla_total_size(16) /* RTA_GATEWAY */
2333 + nla_total_size(16) /* RTA_PREFSRC */
2334 + nla_total_size(4) /* RTA_TABLE */
2335 + nla_total_size(4) /* RTA_IIF */
2336 + nla_total_size(4) /* RTA_OIF */
2337 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2338 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2339 + nla_total_size(sizeof(struct rta_cacheinfo));
2340}
2341
191cd582
BH
2342static int rt6_fill_node(struct net *net,
2343 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2344 struct in6_addr *dst, struct in6_addr *src,
2345 int iif, int type, u32 pid, u32 seq,
7bc570c8 2346 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2347{
2348 struct rtmsg *rtm;
2d7202bf 2349 struct nlmsghdr *nlh;
e3703b3d 2350 long expires;
9e762a4a 2351 u32 table;
f2c31e32 2352 struct neighbour *n;
1da177e4
LT
2353
2354 if (prefix) { /* user wants prefix routes only */
2355 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2356 /* success since this is not a prefix route */
2357 return 1;
2358 }
2359 }
2360
2d7202bf
TG
2361 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2362 if (nlh == NULL)
26932566 2363 return -EMSGSIZE;
2d7202bf
TG
2364
2365 rtm = nlmsg_data(nlh);
1da177e4
LT
2366 rtm->rtm_family = AF_INET6;
2367 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2368 rtm->rtm_src_len = rt->rt6i_src.plen;
2369 rtm->rtm_tos = 0;
c71099ac 2370 if (rt->rt6i_table)
9e762a4a 2371 table = rt->rt6i_table->tb6_id;
c71099ac 2372 else
9e762a4a
PM
2373 table = RT6_TABLE_UNSPEC;
2374 rtm->rtm_table = table;
2d7202bf 2375 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2376 if (rt->rt6i_flags&RTF_REJECT)
2377 rtm->rtm_type = RTN_UNREACHABLE;
ab79ad14
2378 else if (rt->rt6i_flags&RTF_LOCAL)
2379 rtm->rtm_type = RTN_LOCAL;
1da177e4
LT
2380 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2381 rtm->rtm_type = RTN_LOCAL;
2382 else
2383 rtm->rtm_type = RTN_UNICAST;
2384 rtm->rtm_flags = 0;
2385 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2386 rtm->rtm_protocol = rt->rt6i_protocol;
2387 if (rt->rt6i_flags&RTF_DYNAMIC)
2388 rtm->rtm_protocol = RTPROT_REDIRECT;
2389 else if (rt->rt6i_flags & RTF_ADDRCONF)
2390 rtm->rtm_protocol = RTPROT_KERNEL;
2391 else if (rt->rt6i_flags&RTF_DEFAULT)
2392 rtm->rtm_protocol = RTPROT_RA;
2393
2394 if (rt->rt6i_flags&RTF_CACHE)
2395 rtm->rtm_flags |= RTM_F_CLONED;
2396
2397 if (dst) {
2d7202bf 2398 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2399 rtm->rtm_dst_len = 128;
1da177e4 2400 } else if (rtm->rtm_dst_len)
2d7202bf 2401 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2402#ifdef CONFIG_IPV6_SUBTREES
2403 if (src) {
2d7202bf 2404 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2405 rtm->rtm_src_len = 128;
1da177e4 2406 } else if (rtm->rtm_src_len)
2d7202bf 2407 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2408#endif
7bc570c8
YH
2409 if (iif) {
2410#ifdef CONFIG_IPV6_MROUTE
2411 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2412 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2413 if (err <= 0) {
2414 if (!nowait) {
2415 if (err == 0)
2416 return 0;
2417 goto nla_put_failure;
2418 } else {
2419 if (err == -EMSGSIZE)
2420 goto nla_put_failure;
2421 }
2422 }
2423 } else
2424#endif
2425 NLA_PUT_U32(skb, RTA_IIF, iif);
2426 } else if (dst) {
1da177e4 2427 struct in6_addr saddr_buf;
c3968a85 2428 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2429 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2430 }
2d7202bf 2431
c3968a85
DW
2432 if (rt->rt6i_prefsrc.plen) {
2433 struct in6_addr saddr_buf;
2434 ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
2435 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2436 }
2437
defb3519 2438 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2439 goto nla_put_failure;
2440
f2c31e32
ED
2441 rcu_read_lock();
2442 n = dst_get_neighbour(&rt->dst);
2443 if (n)
2444 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2445 rcu_read_unlock();
2d7202bf 2446
d8d1f30b 2447 if (rt->dst.dev)
2d7202bf
TG
2448 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2449
2450 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2451
36e3deae
YH
2452 if (!(rt->rt6i_flags & RTF_EXPIRES))
2453 expires = 0;
2454 else if (rt->rt6i_expires - jiffies < INT_MAX)
2455 expires = rt->rt6i_expires - jiffies;
2456 else
2457 expires = INT_MAX;
69cdf8f9 2458
d8d1f30b
CG
2459 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2460 expires, rt->dst.error) < 0)
e3703b3d 2461 goto nla_put_failure;
2d7202bf
TG
2462
2463 return nlmsg_end(skb, nlh);
2464
2465nla_put_failure:
26932566
PM
2466 nlmsg_cancel(skb, nlh);
2467 return -EMSGSIZE;
1da177e4
LT
2468}
2469
1b43af54 2470int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2471{
2472 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2473 int prefix;
2474
2d7202bf
TG
2475 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2476 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2477 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2478 } else
2479 prefix = 0;
2480
191cd582
BH
2481 return rt6_fill_node(arg->net,
2482 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2483 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2484 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2485}
2486
c127ea2c 2487static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2488{
3b1e0a65 2489 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2490 struct nlattr *tb[RTA_MAX+1];
2491 struct rt6_info *rt;
1da177e4 2492 struct sk_buff *skb;
ab364a6f 2493 struct rtmsg *rtm;
4c9483b2 2494 struct flowi6 fl6;
ab364a6f 2495 int err, iif = 0;
1da177e4 2496
ab364a6f
TG
2497 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2498 if (err < 0)
2499 goto errout;
1da177e4 2500
ab364a6f 2501 err = -EINVAL;
4c9483b2 2502 memset(&fl6, 0, sizeof(fl6));
1da177e4 2503
ab364a6f
TG
2504 if (tb[RTA_SRC]) {
2505 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2506 goto errout;
2507
4c9483b2 2508 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
ab364a6f
TG
2509 }
2510
2511 if (tb[RTA_DST]) {
2512 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2513 goto errout;
2514
4c9483b2 2515 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
ab364a6f
TG
2516 }
2517
2518 if (tb[RTA_IIF])
2519 iif = nla_get_u32(tb[RTA_IIF]);
2520
2521 if (tb[RTA_OIF])
4c9483b2 2522 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2523
2524 if (iif) {
2525 struct net_device *dev;
5578689a 2526 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2527 if (!dev) {
2528 err = -ENODEV;
ab364a6f 2529 goto errout;
1da177e4
LT
2530 }
2531 }
2532
ab364a6f
TG
2533 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2534 if (skb == NULL) {
2535 err = -ENOBUFS;
2536 goto errout;
2537 }
1da177e4 2538
ab364a6f
TG
2539 /* Reserve room for dummy headers, this skb can pass
2540 through good chunk of routing engine.
2541 */
459a98ed 2542 skb_reset_mac_header(skb);
ab364a6f 2543 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2544
4c9483b2 2545 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2546 skb_dst_set(skb, &rt->dst);
1da177e4 2547
4c9483b2 2548 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2549 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2550 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2551 if (err < 0) {
ab364a6f
TG
2552 kfree_skb(skb);
2553 goto errout;
1da177e4
LT
2554 }
2555
5578689a 2556 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2557errout:
1da177e4 2558 return err;
1da177e4
LT
2559}
2560
86872cb5 2561void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2562{
2563 struct sk_buff *skb;
5578689a 2564 struct net *net = info->nl_net;
528c4ceb
DL
2565 u32 seq;
2566 int err;
2567
2568 err = -ENOBUFS;
2569 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2570
339bf98f 2571 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2572 if (skb == NULL)
2573 goto errout;
2574
191cd582 2575 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2576 event, info->pid, seq, 0, 0, 0);
26932566
PM
2577 if (err < 0) {
2578 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2579 WARN_ON(err == -EMSGSIZE);
2580 kfree_skb(skb);
2581 goto errout;
2582 }
1ce85fe4
PNA
2583 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2584 info->nlh, gfp_any());
2585 return;
21713ebc
TG
2586errout:
2587 if (err < 0)
5578689a 2588 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2589}
2590
8ed67789
DL
2591static int ip6_route_dev_notify(struct notifier_block *this,
2592 unsigned long event, void *data)
2593{
2594 struct net_device *dev = (struct net_device *)data;
c346dca1 2595 struct net *net = dev_net(dev);
8ed67789
DL
2596
2597 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2598 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2599 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2600#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2601 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2602 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2603 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2604 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2605#endif
2606 }
2607
2608 return NOTIFY_OK;
2609}
2610
1da177e4
LT
2611/*
2612 * /proc
2613 */
2614
2615#ifdef CONFIG_PROC_FS
2616
1da177e4
LT
2617struct rt6_proc_arg
2618{
2619 char *buffer;
2620 int offset;
2621 int length;
2622 int skip;
2623 int len;
2624};
2625
2626static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2627{
33120b30 2628 struct seq_file *m = p_arg;
69cce1d1 2629 struct neighbour *n;
1da177e4 2630
4b7a4274 2631 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2632
2633#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2634 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2635#else
33120b30 2636 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2637#endif
f2c31e32 2638 rcu_read_lock();
69cce1d1
DM
2639 n = dst_get_neighbour(&rt->dst);
2640 if (n) {
2641 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2642 } else {
33120b30 2643 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2644 }
f2c31e32 2645 rcu_read_unlock();
33120b30 2646 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2647 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2648 rt->dst.__use, rt->rt6i_flags,
33120b30 2649 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2650 return 0;
2651}
2652
33120b30 2653static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2654{
f3db4851
DL
2655 struct net *net = (struct net *)m->private;
2656 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2657 return 0;
2658}
1da177e4 2659
33120b30
AD
2660static int ipv6_route_open(struct inode *inode, struct file *file)
2661{
de05c557 2662 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2663}
2664
33120b30
AD
2665static const struct file_operations ipv6_route_proc_fops = {
2666 .owner = THIS_MODULE,
2667 .open = ipv6_route_open,
2668 .read = seq_read,
2669 .llseek = seq_lseek,
b6fcbdb4 2670 .release = single_release_net,
33120b30
AD
2671};
2672
1da177e4
LT
2673static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2674{
69ddb805 2675 struct net *net = (struct net *)seq->private;
1da177e4 2676 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2677 net->ipv6.rt6_stats->fib_nodes,
2678 net->ipv6.rt6_stats->fib_route_nodes,
2679 net->ipv6.rt6_stats->fib_rt_alloc,
2680 net->ipv6.rt6_stats->fib_rt_entries,
2681 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2682 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2683 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2684
2685 return 0;
2686}
2687
2688static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2689{
de05c557 2690 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2691}
2692
9a32144e 2693static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2694 .owner = THIS_MODULE,
2695 .open = rt6_stats_seq_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
b6fcbdb4 2698 .release = single_release_net,
1da177e4
LT
2699};
2700#endif /* CONFIG_PROC_FS */
2701
2702#ifdef CONFIG_SYSCTL
2703
1da177e4 2704static
8d65af78 2705int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2706 void __user *buffer, size_t *lenp, loff_t *ppos)
2707{
c486da34
LAG
2708 struct net *net;
2709 int delay;
2710 if (!write)
1da177e4 2711 return -EINVAL;
c486da34
LAG
2712
2713 net = (struct net *)ctl->extra1;
2714 delay = net->ipv6.sysctl.flush_delay;
2715 proc_dointvec(ctl, write, buffer, lenp, ppos);
2716 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2717 return 0;
1da177e4
LT
2718}
2719
760f2d01 2720ctl_table ipv6_route_table_template[] = {
1ab1457c 2721 {
1da177e4 2722 .procname = "flush",
4990509f 2723 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2724 .maxlen = sizeof(int),
89c8b3a1 2725 .mode = 0200,
6d9f239a 2726 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2727 },
2728 {
1da177e4 2729 .procname = "gc_thresh",
9a7ec3a9 2730 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2731 .maxlen = sizeof(int),
2732 .mode = 0644,
6d9f239a 2733 .proc_handler = proc_dointvec,
1da177e4
LT
2734 },
2735 {
1da177e4 2736 .procname = "max_size",
4990509f 2737 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2738 .maxlen = sizeof(int),
2739 .mode = 0644,
6d9f239a 2740 .proc_handler = proc_dointvec,
1da177e4
LT
2741 },
2742 {
1da177e4 2743 .procname = "gc_min_interval",
4990509f 2744 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2745 .maxlen = sizeof(int),
2746 .mode = 0644,
6d9f239a 2747 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2748 },
2749 {
1da177e4 2750 .procname = "gc_timeout",
4990509f 2751 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2752 .maxlen = sizeof(int),
2753 .mode = 0644,
6d9f239a 2754 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2755 },
2756 {
1da177e4 2757 .procname = "gc_interval",
4990509f 2758 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
6d9f239a 2761 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2762 },
2763 {
1da177e4 2764 .procname = "gc_elasticity",
4990509f 2765 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
f3d3f616 2768 .proc_handler = proc_dointvec,
1da177e4
LT
2769 },
2770 {
1da177e4 2771 .procname = "mtu_expires",
4990509f 2772 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
6d9f239a 2775 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2776 },
2777 {
1da177e4 2778 .procname = "min_adv_mss",
4990509f 2779 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
f3d3f616 2782 .proc_handler = proc_dointvec,
1da177e4
LT
2783 },
2784 {
1da177e4 2785 .procname = "gc_min_interval_ms",
4990509f 2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
6d9f239a 2789 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2790 },
f8572d8f 2791 { }
1da177e4
LT
2792};
2793
2c8c1e72 2794struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2795{
2796 struct ctl_table *table;
2797
2798 table = kmemdup(ipv6_route_table_template,
2799 sizeof(ipv6_route_table_template),
2800 GFP_KERNEL);
5ee09105
YH
2801
2802 if (table) {
2803 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2804 table[0].extra1 = net;
86393e52 2805 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2806 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2807 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2808 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2809 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2810 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2811 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2812 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2813 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2814 }
2815
760f2d01
DL
2816 return table;
2817}
1da177e4
LT
2818#endif
2819
2c8c1e72 2820static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2821{
633d424b 2822 int ret = -ENOMEM;
8ed67789 2823
86393e52
AD
2824 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2825 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2826
fc66f95c
ED
2827 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2828 goto out_ip6_dst_ops;
2829
8ed67789
DL
2830 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2831 sizeof(*net->ipv6.ip6_null_entry),
2832 GFP_KERNEL);
2833 if (!net->ipv6.ip6_null_entry)
fc66f95c 2834 goto out_ip6_dst_entries;
d8d1f30b 2835 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2836 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2837 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2838 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2839 ip6_template_metrics, true);
8ed67789
DL
2840
2841#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2842 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2843 sizeof(*net->ipv6.ip6_prohibit_entry),
2844 GFP_KERNEL);
68fffc67
PZ
2845 if (!net->ipv6.ip6_prohibit_entry)
2846 goto out_ip6_null_entry;
d8d1f30b 2847 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2848 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2849 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2850 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2851 ip6_template_metrics, true);
8ed67789
DL
2852
2853 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2854 sizeof(*net->ipv6.ip6_blk_hole_entry),
2855 GFP_KERNEL);
68fffc67
PZ
2856 if (!net->ipv6.ip6_blk_hole_entry)
2857 goto out_ip6_prohibit_entry;
d8d1f30b 2858 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2859 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2860 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2861 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2862 ip6_template_metrics, true);
8ed67789
DL
2863#endif
2864
b339a47c
PZ
2865 net->ipv6.sysctl.flush_delay = 0;
2866 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2867 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2868 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2869 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2870 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2871 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2872 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2873
cdb18761
DL
2874#ifdef CONFIG_PROC_FS
2875 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2876 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2877#endif
6891a346
BT
2878 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2879
8ed67789
DL
2880 ret = 0;
2881out:
2882 return ret;
f2fc6a54 2883
68fffc67
PZ
2884#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2885out_ip6_prohibit_entry:
2886 kfree(net->ipv6.ip6_prohibit_entry);
2887out_ip6_null_entry:
2888 kfree(net->ipv6.ip6_null_entry);
2889#endif
fc66f95c
ED
2890out_ip6_dst_entries:
2891 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2892out_ip6_dst_ops:
f2fc6a54 2893 goto out;
cdb18761
DL
2894}
2895
2c8c1e72 2896static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2897{
2898#ifdef CONFIG_PROC_FS
2899 proc_net_remove(net, "ipv6_route");
2900 proc_net_remove(net, "rt6_stats");
2901#endif
8ed67789
DL
2902 kfree(net->ipv6.ip6_null_entry);
2903#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2904 kfree(net->ipv6.ip6_prohibit_entry);
2905 kfree(net->ipv6.ip6_blk_hole_entry);
2906#endif
41bb78b4 2907 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2908}
2909
2910static struct pernet_operations ip6_route_net_ops = {
2911 .init = ip6_route_net_init,
2912 .exit = ip6_route_net_exit,
2913};
2914
8ed67789
DL
2915static struct notifier_block ip6_route_dev_notifier = {
2916 .notifier_call = ip6_route_dev_notify,
2917 .priority = 0,
2918};
2919
433d49c3 2920int __init ip6_route_init(void)
1da177e4 2921{
433d49c3
DL
2922 int ret;
2923
9a7ec3a9
DL
2924 ret = -ENOMEM;
2925 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2926 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2927 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2928 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2929 goto out;
14e50e57 2930
fc66f95c 2931 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2932 if (ret)
bdb3289f 2933 goto out_kmem_cache;
bdb3289f 2934
fc66f95c
ED
2935 ret = register_pernet_subsys(&ip6_route_net_ops);
2936 if (ret)
2937 goto out_dst_entries;
2938
5dc121e9
AE
2939 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2940
8ed67789
DL
2941 /* Registering of the loopback is done before this portion of code,
2942 * the loopback reference in rt6_info will not be taken, do it
2943 * manually for init_net */
d8d1f30b 2944 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2945 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2946 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2947 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2948 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2949 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2950 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2951 #endif
433d49c3
DL
2952 ret = fib6_init();
2953 if (ret)
8ed67789 2954 goto out_register_subsys;
433d49c3 2955
433d49c3
DL
2956 ret = xfrm6_init();
2957 if (ret)
cdb18761 2958 goto out_fib6_init;
c35b7e72 2959
433d49c3
DL
2960 ret = fib6_rules_init();
2961 if (ret)
2962 goto xfrm6_init;
7e5449c2 2963
433d49c3 2964 ret = -ENOBUFS;
c7ac8679
GR
2965 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2966 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2967 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2968 goto fib6_rules_init;
c127ea2c 2969
8ed67789 2970 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2971 if (ret)
2972 goto fib6_rules_init;
8ed67789 2973
433d49c3
DL
2974out:
2975 return ret;
2976
2977fib6_rules_init:
433d49c3
DL
2978 fib6_rules_cleanup();
2979xfrm6_init:
433d49c3 2980 xfrm6_fini();
433d49c3 2981out_fib6_init:
433d49c3 2982 fib6_gc_cleanup();
8ed67789
DL
2983out_register_subsys:
2984 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
2985out_dst_entries:
2986 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 2987out_kmem_cache:
f2fc6a54 2988 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2989 goto out;
1da177e4
LT
2990}
2991
2992void ip6_route_cleanup(void)
2993{
8ed67789 2994 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2995 fib6_rules_cleanup();
1da177e4 2996 xfrm6_fini();
1da177e4 2997 fib6_gc_cleanup();
8ed67789 2998 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 2999 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3000 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3001}