]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/route.c
Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
21efcfa0
ED
65static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
66 const struct in6_addr *dest);
1da177e4 67static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 68static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 69static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
70static struct dst_entry *ip6_negative_advice(struct dst_entry *);
71static void ip6_dst_destroy(struct dst_entry *);
72static void ip6_dst_ifdown(struct dst_entry *,
73 struct net_device *dev, int how);
569d3645 74static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
75
76static int ip6_pkt_discard(struct sk_buff *skb);
77static int ip6_pkt_discard_out(struct sk_buff *skb);
78static void ip6_link_failure(struct sk_buff *skb);
79static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
80
70ceb4f5 81#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 82static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
83 const struct in6_addr *prefix, int prefixlen,
84 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 85 unsigned pref);
efa2cea0 86static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
87 const struct in6_addr *prefix, int prefixlen,
88 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
89#endif
90
06582540
DM
91static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
92{
93 struct rt6_info *rt = (struct rt6_info *) dst;
94 struct inet_peer *peer;
95 u32 *p = NULL;
96
8e2ec639
YZ
97 if (!(rt->dst.flags & DST_HOST))
98 return NULL;
99
06582540
DM
100 if (!rt->rt6i_peer)
101 rt6_bind_peer(rt, 1);
102
103 peer = rt->rt6i_peer;
104 if (peer) {
105 u32 *old_p = __DST_METRICS_PTR(old);
106 unsigned long prev, new;
107
108 p = peer->metrics;
109 if (inet_metrics_new(peer))
110 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
111
112 new = (unsigned long) p;
113 prev = cmpxchg(&dst->_metrics, old, new);
114
115 if (prev != old) {
116 p = __DST_METRICS_PTR(prev);
117 if (prev & DST_METRICS_READ_ONLY)
118 p = NULL;
119 }
120 }
121 return p;
122}
123
d3aaeb38
DM
124static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
125{
f83c7790
DM
126 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
127 if (n)
128 return n;
129 return neigh_create(&nd_tbl, daddr, dst->dev);
130}
131
8ade06c6 132static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 133{
8ade06c6
DM
134 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
135 if (!n) {
136 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
137 if (IS_ERR(n))
138 return PTR_ERR(n);
139 }
f83c7790
DM
140 dst_set_neighbour(&rt->dst, n);
141
142 return 0;
d3aaeb38
DM
143}
144
9a7ec3a9 145static struct dst_ops ip6_dst_ops_template = {
1da177e4 146 .family = AF_INET6,
09640e63 147 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
148 .gc = ip6_dst_gc,
149 .gc_thresh = 1024,
150 .check = ip6_dst_check,
0dbaee3b 151 .default_advmss = ip6_default_advmss,
ebb762f2 152 .mtu = ip6_mtu,
06582540 153 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
154 .destroy = ip6_dst_destroy,
155 .ifdown = ip6_dst_ifdown,
156 .negative_advice = ip6_negative_advice,
157 .link_failure = ip6_link_failure,
158 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 159 .local_out = __ip6_local_out,
d3aaeb38 160 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
161};
162
ebb762f2 163static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 164{
618f9bc7
SK
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167 return mtu ? : dst->dev->mtu;
ec831ea7
RD
168}
169
14e50e57
DM
170static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
171{
172}
173
0972ddb2
HB
174static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
175 unsigned long old)
176{
177 return NULL;
178}
179
14e50e57
DM
180static struct dst_ops ip6_dst_blackhole_ops = {
181 .family = AF_INET6,
09640e63 182 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
183 .destroy = ip6_dst_destroy,
184 .check = ip6_dst_check,
ebb762f2 185 .mtu = ip6_blackhole_mtu,
214f45c9 186 .default_advmss = ip6_default_advmss,
14e50e57 187 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 188 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 189 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
190};
191
62fa8a84
DM
192static const u32 ip6_template_metrics[RTAX_MAX] = {
193 [RTAX_HOPLIMIT - 1] = 255,
194};
195
bdb3289f 196static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
197 .dst = {
198 .__refcnt = ATOMIC_INIT(1),
199 .__use = 1,
200 .obsolete = -1,
201 .error = -ENETUNREACH,
d8d1f30b
CG
202 .input = ip6_pkt_discard,
203 .output = ip6_pkt_discard_out,
1da177e4
LT
204 },
205 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 206 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
207 .rt6i_metric = ~(u32) 0,
208 .rt6i_ref = ATOMIC_INIT(1),
209};
210
101367c2
TG
211#ifdef CONFIG_IPV6_MULTIPLE_TABLES
212
6723ab54
DM
213static int ip6_pkt_prohibit(struct sk_buff *skb);
214static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 215
280a34c8 216static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
217 .dst = {
218 .__refcnt = ATOMIC_INIT(1),
219 .__use = 1,
220 .obsolete = -1,
221 .error = -EACCES,
d8d1f30b
CG
222 .input = ip6_pkt_prohibit,
223 .output = ip6_pkt_prohibit_out,
101367c2
TG
224 },
225 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 226 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
227 .rt6i_metric = ~(u32) 0,
228 .rt6i_ref = ATOMIC_INIT(1),
229};
230
bdb3289f 231static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
232 .dst = {
233 .__refcnt = ATOMIC_INIT(1),
234 .__use = 1,
235 .obsolete = -1,
236 .error = -EINVAL,
d8d1f30b
CG
237 .input = dst_discard,
238 .output = dst_discard,
101367c2
TG
239 },
240 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 241 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
242 .rt6i_metric = ~(u32) 0,
243 .rt6i_ref = ATOMIC_INIT(1),
244};
245
246#endif
247
1da177e4 248/* allocate dst with ip6_dst_ops */
5c1e6aa3 249static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
250 struct net_device *dev,
251 int flags)
1da177e4 252{
957c665f 253 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 254
38308473 255 if (rt)
fbe58186 256 memset(&rt->rt6i_table, 0,
38308473 257 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
258
259 return rt;
1da177e4
LT
260}
261
262static void ip6_dst_destroy(struct dst_entry *dst)
263{
264 struct rt6_info *rt = (struct rt6_info *)dst;
265 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 266 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 267
8e2ec639
YZ
268 if (!(rt->dst.flags & DST_HOST))
269 dst_destroy_metrics_generic(dst);
270
38308473 271 if (idev) {
1da177e4
LT
272 rt->rt6i_idev = NULL;
273 in6_dev_put(idev);
1ab1457c 274 }
b3419363 275 if (peer) {
b3419363
DM
276 rt->rt6i_peer = NULL;
277 inet_putpeer(peer);
278 }
279}
280
6431cbc2
DM
281static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
282
283static u32 rt6_peer_genid(void)
284{
285 return atomic_read(&__rt6_peer_genid);
286}
287
b3419363
DM
288void rt6_bind_peer(struct rt6_info *rt, int create)
289{
290 struct inet_peer *peer;
291
b3419363
DM
292 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
293 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
294 inet_putpeer(peer);
6431cbc2
DM
295 else
296 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
297}
298
299static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
300 int how)
301{
302 struct rt6_info *rt = (struct rt6_info *)dst;
303 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 304 struct net_device *loopback_dev =
c346dca1 305 dev_net(dev)->loopback_dev;
1da177e4 306
38308473 307 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
308 struct inet6_dev *loopback_idev =
309 in6_dev_get(loopback_dev);
38308473 310 if (loopback_idev) {
1da177e4
LT
311 rt->rt6i_idev = loopback_idev;
312 in6_dev_put(idev);
313 }
314 }
315}
316
317static __inline__ int rt6_check_expired(const struct rt6_info *rt)
318{
a02cec21 319 return (rt->rt6i_flags & RTF_EXPIRES) &&
d1918542 320 time_after(jiffies, rt->dst.expires);
1da177e4
LT
321}
322
b71d1d42 323static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 324{
a02cec21
ED
325 return ipv6_addr_type(daddr) &
326 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
327}
328
1da177e4 329/*
c71099ac 330 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
331 */
332
8ed67789
DL
333static inline struct rt6_info *rt6_device_match(struct net *net,
334 struct rt6_info *rt,
b71d1d42 335 const struct in6_addr *saddr,
1da177e4 336 int oif,
d420895e 337 int flags)
1da177e4
LT
338{
339 struct rt6_info *local = NULL;
340 struct rt6_info *sprt;
341
dd3abc4e
YH
342 if (!oif && ipv6_addr_any(saddr))
343 goto out;
344
d8d1f30b 345 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 346 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
347
348 if (oif) {
1da177e4
LT
349 if (dev->ifindex == oif)
350 return sprt;
351 if (dev->flags & IFF_LOOPBACK) {
38308473 352 if (!sprt->rt6i_idev ||
1da177e4 353 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 354 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 355 continue;
1ab1457c 356 if (local && (!oif ||
1da177e4
LT
357 local->rt6i_idev->dev->ifindex == oif))
358 continue;
359 }
360 local = sprt;
361 }
dd3abc4e
YH
362 } else {
363 if (ipv6_chk_addr(net, saddr, dev,
364 flags & RT6_LOOKUP_F_IFACE))
365 return sprt;
1da177e4 366 }
dd3abc4e 367 }
1da177e4 368
dd3abc4e 369 if (oif) {
1da177e4
LT
370 if (local)
371 return local;
372
d420895e 373 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 374 return net->ipv6.ip6_null_entry;
1da177e4 375 }
dd3abc4e 376out:
1da177e4
LT
377 return rt;
378}
379
27097255
YH
380#ifdef CONFIG_IPV6_ROUTER_PREF
381static void rt6_probe(struct rt6_info *rt)
382{
f2c31e32 383 struct neighbour *neigh;
27097255
YH
384 /*
385 * Okay, this does not seem to be appropriate
386 * for now, however, we need to check if it
387 * is really so; aka Router Reachability Probing.
388 *
389 * Router Reachability Probe MUST be rate-limited
390 * to no more than one per minute.
391 */
f2c31e32 392 rcu_read_lock();
27217455 393 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 394 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 395 goto out;
27097255
YH
396 read_lock_bh(&neigh->lock);
397 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 398 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
399 struct in6_addr mcaddr;
400 struct in6_addr *target;
401
402 neigh->updated = jiffies;
403 read_unlock_bh(&neigh->lock);
404
405 target = (struct in6_addr *)&neigh->primary_key;
406 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 407 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 408 } else {
27097255 409 read_unlock_bh(&neigh->lock);
f2c31e32
ED
410 }
411out:
412 rcu_read_unlock();
27097255
YH
413}
414#else
415static inline void rt6_probe(struct rt6_info *rt)
416{
27097255
YH
417}
418#endif
419
1da177e4 420/*
554cfb7e 421 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 422 */
b6f99a21 423static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 424{
d1918542 425 struct net_device *dev = rt->dst.dev;
161980f4 426 if (!oif || dev->ifindex == oif)
554cfb7e 427 return 2;
161980f4
DM
428 if ((dev->flags & IFF_LOOPBACK) &&
429 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
430 return 1;
431 return 0;
554cfb7e 432}
1da177e4 433
b6f99a21 434static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 435{
f2c31e32 436 struct neighbour *neigh;
398bcbeb 437 int m;
f2c31e32
ED
438
439 rcu_read_lock();
27217455 440 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
441 if (rt->rt6i_flags & RTF_NONEXTHOP ||
442 !(rt->rt6i_flags & RTF_GATEWAY))
443 m = 1;
444 else if (neigh) {
554cfb7e
YH
445 read_lock_bh(&neigh->lock);
446 if (neigh->nud_state & NUD_VALID)
4d0c5911 447 m = 2;
398bcbeb
YH
448#ifdef CONFIG_IPV6_ROUTER_PREF
449 else if (neigh->nud_state & NUD_FAILED)
450 m = 0;
451#endif
452 else
ea73ee23 453 m = 1;
554cfb7e 454 read_unlock_bh(&neigh->lock);
398bcbeb
YH
455 } else
456 m = 0;
f2c31e32 457 rcu_read_unlock();
554cfb7e 458 return m;
1da177e4
LT
459}
460
554cfb7e
YH
461static int rt6_score_route(struct rt6_info *rt, int oif,
462 int strict)
1da177e4 463{
4d0c5911 464 int m, n;
1ab1457c 465
4d0c5911 466 m = rt6_check_dev(rt, oif);
77d16f45 467 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 468 return -1;
ebacaaa0
YH
469#ifdef CONFIG_IPV6_ROUTER_PREF
470 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
471#endif
4d0c5911 472 n = rt6_check_neigh(rt);
557e92ef 473 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
474 return -1;
475 return m;
476}
477
f11e6659
DM
478static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
479 int *mpri, struct rt6_info *match)
554cfb7e 480{
f11e6659
DM
481 int m;
482
483 if (rt6_check_expired(rt))
484 goto out;
485
486 m = rt6_score_route(rt, oif, strict);
487 if (m < 0)
488 goto out;
489
490 if (m > *mpri) {
491 if (strict & RT6_LOOKUP_F_REACHABLE)
492 rt6_probe(match);
493 *mpri = m;
494 match = rt;
495 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
496 rt6_probe(rt);
497 }
498
499out:
500 return match;
501}
502
503static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
504 struct rt6_info *rr_head,
505 u32 metric, int oif, int strict)
506{
507 struct rt6_info *rt, *match;
554cfb7e 508 int mpri = -1;
1da177e4 509
f11e6659
DM
510 match = NULL;
511 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 512 rt = rt->dst.rt6_next)
f11e6659
DM
513 match = find_match(rt, oif, strict, &mpri, match);
514 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 515 rt = rt->dst.rt6_next)
f11e6659 516 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 517
f11e6659
DM
518 return match;
519}
1da177e4 520
f11e6659
DM
521static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
522{
523 struct rt6_info *match, *rt0;
8ed67789 524 struct net *net;
1da177e4 525
f11e6659
DM
526 rt0 = fn->rr_ptr;
527 if (!rt0)
528 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 529
f11e6659 530 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 531
554cfb7e 532 if (!match &&
f11e6659 533 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 534 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 535
554cfb7e 536 /* no entries matched; do round-robin */
f11e6659
DM
537 if (!next || next->rt6i_metric != rt0->rt6i_metric)
538 next = fn->leaf;
539
540 if (next != rt0)
541 fn->rr_ptr = next;
1da177e4 542 }
1da177e4 543
d1918542 544 net = dev_net(rt0->dst.dev);
a02cec21 545 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
546}
547
70ceb4f5
YH
548#ifdef CONFIG_IPV6_ROUTE_INFO
549int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 550 const struct in6_addr *gwaddr)
70ceb4f5 551{
c346dca1 552 struct net *net = dev_net(dev);
70ceb4f5
YH
553 struct route_info *rinfo = (struct route_info *) opt;
554 struct in6_addr prefix_buf, *prefix;
555 unsigned int pref;
4bed72e4 556 unsigned long lifetime;
70ceb4f5
YH
557 struct rt6_info *rt;
558
559 if (len < sizeof(struct route_info)) {
560 return -EINVAL;
561 }
562
563 /* Sanity check for prefix_len and length */
564 if (rinfo->length > 3) {
565 return -EINVAL;
566 } else if (rinfo->prefix_len > 128) {
567 return -EINVAL;
568 } else if (rinfo->prefix_len > 64) {
569 if (rinfo->length < 2) {
570 return -EINVAL;
571 }
572 } else if (rinfo->prefix_len > 0) {
573 if (rinfo->length < 1) {
574 return -EINVAL;
575 }
576 }
577
578 pref = rinfo->route_pref;
579 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 580 return -EINVAL;
70ceb4f5 581
4bed72e4 582 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
583
584 if (rinfo->length == 3)
585 prefix = (struct in6_addr *)rinfo->prefix;
586 else {
587 /* this function is safe */
588 ipv6_addr_prefix(&prefix_buf,
589 (struct in6_addr *)rinfo->prefix,
590 rinfo->prefix_len);
591 prefix = &prefix_buf;
592 }
593
efa2cea0
DL
594 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
595 dev->ifindex);
70ceb4f5
YH
596
597 if (rt && !lifetime) {
e0a1ad73 598 ip6_del_rt(rt);
70ceb4f5
YH
599 rt = NULL;
600 }
601
602 if (!rt && lifetime)
efa2cea0 603 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
604 pref);
605 else if (rt)
606 rt->rt6i_flags = RTF_ROUTEINFO |
607 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
608
609 if (rt) {
4bed72e4 610 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
611 rt->rt6i_flags &= ~RTF_EXPIRES;
612 } else {
d1918542 613 rt->dst.expires = jiffies + HZ * lifetime;
70ceb4f5
YH
614 rt->rt6i_flags |= RTF_EXPIRES;
615 }
d8d1f30b 616 dst_release(&rt->dst);
70ceb4f5
YH
617 }
618 return 0;
619}
620#endif
621
8ed67789 622#define BACKTRACK(__net, saddr) \
982f56f3 623do { \
8ed67789 624 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 625 struct fib6_node *pn; \
e0eda7bb 626 while (1) { \
982f56f3
YH
627 if (fn->fn_flags & RTN_TL_ROOT) \
628 goto out; \
629 pn = fn->parent; \
630 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 631 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
632 else \
633 fn = pn; \
634 if (fn->fn_flags & RTN_RTINFO) \
635 goto restart; \
c71099ac 636 } \
c71099ac 637 } \
38308473 638} while (0)
c71099ac 639
8ed67789
DL
640static struct rt6_info *ip6_pol_route_lookup(struct net *net,
641 struct fib6_table *table,
4c9483b2 642 struct flowi6 *fl6, int flags)
1da177e4
LT
643{
644 struct fib6_node *fn;
645 struct rt6_info *rt;
646
c71099ac 647 read_lock_bh(&table->tb6_lock);
4c9483b2 648 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
649restart:
650 rt = fn->leaf;
4c9483b2
DM
651 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
652 BACKTRACK(net, &fl6->saddr);
c71099ac 653out:
d8d1f30b 654 dst_use(&rt->dst, jiffies);
c71099ac 655 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
656 return rt;
657
658}
659
ea6e574e
FW
660struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
661 int flags)
662{
663 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
664}
665EXPORT_SYMBOL_GPL(ip6_route_lookup);
666
9acd9f3a
YH
667struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
668 const struct in6_addr *saddr, int oif, int strict)
c71099ac 669{
4c9483b2
DM
670 struct flowi6 fl6 = {
671 .flowi6_oif = oif,
672 .daddr = *daddr,
c71099ac
TG
673 };
674 struct dst_entry *dst;
77d16f45 675 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 676
adaa70bb 677 if (saddr) {
4c9483b2 678 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
679 flags |= RT6_LOOKUP_F_HAS_SADDR;
680 }
681
4c9483b2 682 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
683 if (dst->error == 0)
684 return (struct rt6_info *) dst;
685
686 dst_release(dst);
687
1da177e4
LT
688 return NULL;
689}
690
7159039a
YH
691EXPORT_SYMBOL(rt6_lookup);
692
c71099ac 693/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
694 It takes new route entry, the addition fails by any reason the
695 route is freed. In any case, if caller does not hold it, it may
696 be destroyed.
697 */
698
86872cb5 699static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
700{
701 int err;
c71099ac 702 struct fib6_table *table;
1da177e4 703
c71099ac
TG
704 table = rt->rt6i_table;
705 write_lock_bh(&table->tb6_lock);
86872cb5 706 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 707 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
708
709 return err;
710}
711
40e22e8f
TG
712int ip6_ins_rt(struct rt6_info *rt)
713{
4d1169c1 714 struct nl_info info = {
d1918542 715 .nl_net = dev_net(rt->dst.dev),
4d1169c1 716 };
528c4ceb 717 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
718}
719
21efcfa0
ED
720static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
721 const struct in6_addr *daddr,
b71d1d42 722 const struct in6_addr *saddr)
1da177e4 723{
1da177e4
LT
724 struct rt6_info *rt;
725
726 /*
727 * Clone the route.
728 */
729
21efcfa0 730 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
731
732 if (rt) {
14deae41
DM
733 int attempts = !in_softirq();
734
38308473 735 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 736 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 737 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 738 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 739 rt->rt6i_gateway = *daddr;
58c4fb86 740 }
1da177e4 741
1da177e4 742 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
743
744#ifdef CONFIG_IPV6_SUBTREES
745 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 746 rt->rt6i_src.addr = *saddr;
1da177e4
LT
747 rt->rt6i_src.plen = 128;
748 }
749#endif
750
14deae41 751 retry:
8ade06c6 752 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 753 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
754 int saved_rt_min_interval =
755 net->ipv6.sysctl.ip6_rt_gc_min_interval;
756 int saved_rt_elasticity =
757 net->ipv6.sysctl.ip6_rt_gc_elasticity;
758
759 if (attempts-- > 0) {
760 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
761 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
762
86393e52 763 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
764
765 net->ipv6.sysctl.ip6_rt_gc_elasticity =
766 saved_rt_elasticity;
767 net->ipv6.sysctl.ip6_rt_gc_min_interval =
768 saved_rt_min_interval;
769 goto retry;
770 }
771
772 if (net_ratelimit())
773 printk(KERN_WARNING
7e1b33e5 774 "ipv6: Neighbour table overflow.\n");
d8d1f30b 775 dst_free(&rt->dst);
14deae41
DM
776 return NULL;
777 }
95a9a5ba 778 }
1da177e4 779
95a9a5ba
YH
780 return rt;
781}
1da177e4 782
21efcfa0
ED
783static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
784 const struct in6_addr *daddr)
299d9939 785{
21efcfa0
ED
786 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
787
299d9939 788 if (rt) {
299d9939 789 rt->rt6i_flags |= RTF_CACHE;
27217455 790 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
791 }
792 return rt;
793}
794
8ed67789 795static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 796 struct flowi6 *fl6, int flags)
1da177e4
LT
797{
798 struct fib6_node *fn;
519fbd87 799 struct rt6_info *rt, *nrt;
c71099ac 800 int strict = 0;
1da177e4 801 int attempts = 3;
519fbd87 802 int err;
53b7997f 803 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 804
77d16f45 805 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
806
807relookup:
c71099ac 808 read_lock_bh(&table->tb6_lock);
1da177e4 809
8238dd06 810restart_2:
4c9483b2 811 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
812
813restart:
4acad72d 814 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 815
4c9483b2 816 BACKTRACK(net, &fl6->saddr);
8ed67789 817 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 818 rt->rt6i_flags & RTF_CACHE)
1ddef044 819 goto out;
1da177e4 820
d8d1f30b 821 dst_hold(&rt->dst);
c71099ac 822 read_unlock_bh(&table->tb6_lock);
fb9de91e 823
27217455 824 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 825 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 826 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 827 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
828 else
829 goto out2;
e40cf353 830
d8d1f30b 831 dst_release(&rt->dst);
8ed67789 832 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 833
d8d1f30b 834 dst_hold(&rt->dst);
519fbd87 835 if (nrt) {
40e22e8f 836 err = ip6_ins_rt(nrt);
519fbd87 837 if (!err)
1da177e4 838 goto out2;
1da177e4 839 }
1da177e4 840
519fbd87
YH
841 if (--attempts <= 0)
842 goto out2;
843
844 /*
c71099ac 845 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
846 * released someone could insert this route. Relookup.
847 */
d8d1f30b 848 dst_release(&rt->dst);
519fbd87
YH
849 goto relookup;
850
851out:
8238dd06
YH
852 if (reachable) {
853 reachable = 0;
854 goto restart_2;
855 }
d8d1f30b 856 dst_hold(&rt->dst);
c71099ac 857 read_unlock_bh(&table->tb6_lock);
1da177e4 858out2:
d8d1f30b
CG
859 rt->dst.lastuse = jiffies;
860 rt->dst.__use++;
c71099ac
TG
861
862 return rt;
1da177e4
LT
863}
864
8ed67789 865static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 866 struct flowi6 *fl6, int flags)
4acad72d 867{
4c9483b2 868 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
869}
870
c71099ac
TG
871void ip6_route_input(struct sk_buff *skb)
872{
b71d1d42 873 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 874 struct net *net = dev_net(skb->dev);
adaa70bb 875 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
876 struct flowi6 fl6 = {
877 .flowi6_iif = skb->dev->ifindex,
878 .daddr = iph->daddr,
879 .saddr = iph->saddr,
38308473 880 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
881 .flowi6_mark = skb->mark,
882 .flowi6_proto = iph->nexthdr,
c71099ac 883 };
adaa70bb 884
1d6e55f1 885 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 886 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 887
4c9483b2 888 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
889}
890
8ed67789 891static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 892 struct flowi6 *fl6, int flags)
1da177e4 893{
4c9483b2 894 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
895}
896
9c7a4f9c 897struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 898 struct flowi6 *fl6)
c71099ac
TG
899{
900 int flags = 0;
901
4c9483b2 902 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 903 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 904
4c9483b2 905 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 906 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
907 else if (sk)
908 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 909
4c9483b2 910 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
911}
912
7159039a 913EXPORT_SYMBOL(ip6_route_output);
1da177e4 914
2774c131 915struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 916{
5c1e6aa3 917 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
918 struct dst_entry *new = NULL;
919
5c1e6aa3 920 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 921 if (rt) {
cf911662
DM
922 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
923
d8d1f30b 924 new = &rt->dst;
14e50e57 925
14e50e57 926 new->__use = 1;
352e512c
HX
927 new->input = dst_discard;
928 new->output = dst_discard;
14e50e57 929
21efcfa0
ED
930 if (dst_metrics_read_only(&ort->dst))
931 new->_metrics = ort->dst._metrics;
932 else
933 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
934 rt->rt6i_idev = ort->rt6i_idev;
935 if (rt->rt6i_idev)
936 in6_dev_hold(rt->rt6i_idev);
d1918542 937 rt->dst.expires = 0;
14e50e57 938
4e3fd7a0 939 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
940 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
941 rt->rt6i_metric = 0;
942
943 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
944#ifdef CONFIG_IPV6_SUBTREES
945 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
946#endif
947
948 dst_free(new);
949 }
950
69ead7af
DM
951 dst_release(dst_orig);
952 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 953}
14e50e57 954
1da177e4
LT
955/*
956 * Destination cache support functions
957 */
958
959static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
960{
961 struct rt6_info *rt;
962
963 rt = (struct rt6_info *) dst;
964
6431cbc2
DM
965 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
966 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
967 if (!rt->rt6i_peer)
968 rt6_bind_peer(rt, 0);
969 rt->rt6i_peer_genid = rt6_peer_genid();
970 }
1da177e4 971 return dst;
6431cbc2 972 }
1da177e4
LT
973 return NULL;
974}
975
976static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
977{
978 struct rt6_info *rt = (struct rt6_info *) dst;
979
980 if (rt) {
54c1a859
YH
981 if (rt->rt6i_flags & RTF_CACHE) {
982 if (rt6_check_expired(rt)) {
983 ip6_del_rt(rt);
984 dst = NULL;
985 }
986 } else {
1da177e4 987 dst_release(dst);
54c1a859
YH
988 dst = NULL;
989 }
1da177e4 990 }
54c1a859 991 return dst;
1da177e4
LT
992}
993
994static void ip6_link_failure(struct sk_buff *skb)
995{
996 struct rt6_info *rt;
997
3ffe533c 998 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 999
adf30907 1000 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1001 if (rt) {
38308473 1002 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1003 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1004 rt->rt6i_flags |= RTF_EXPIRES;
1005 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1006 rt->rt6i_node->fn_sernum = -1;
1007 }
1008}
1009
1010static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1011{
1012 struct rt6_info *rt6 = (struct rt6_info*)dst;
1013
1014 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1015 rt6->rt6i_flags |= RTF_MODIFIED;
1016 if (mtu < IPV6_MIN_MTU) {
defb3519 1017 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1018 mtu = IPV6_MIN_MTU;
defb3519
DM
1019 features |= RTAX_FEATURE_ALLFRAG;
1020 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1021 }
defb3519 1022 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1023 }
1024}
1025
0dbaee3b 1026static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1027{
0dbaee3b
DM
1028 struct net_device *dev = dst->dev;
1029 unsigned int mtu = dst_mtu(dst);
1030 struct net *net = dev_net(dev);
1031
1da177e4
LT
1032 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1033
5578689a
DL
1034 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1035 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1036
1037 /*
1ab1457c
YH
1038 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1039 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1040 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1041 * rely only on pmtu discovery"
1042 */
1043 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1044 mtu = IPV6_MAXPLEN;
1045 return mtu;
1046}
1047
ebb762f2 1048static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1049{
d33e4553 1050 struct inet6_dev *idev;
618f9bc7
SK
1051 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1052
1053 if (mtu)
1054 return mtu;
1055
1056 mtu = IPV6_MIN_MTU;
d33e4553
DM
1057
1058 rcu_read_lock();
1059 idev = __in6_dev_get(dst->dev);
1060 if (idev)
1061 mtu = idev->cnf.mtu6;
1062 rcu_read_unlock();
1063
1064 return mtu;
1065}
1066
3b00944c
YH
1067static struct dst_entry *icmp6_dst_gc_list;
1068static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1069
3b00944c 1070struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1071 struct neighbour *neigh,
87a11578 1072 struct flowi6 *fl6)
1da177e4 1073{
87a11578 1074 struct dst_entry *dst;
1da177e4
LT
1075 struct rt6_info *rt;
1076 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1077 struct net *net = dev_net(dev);
1da177e4 1078
38308473 1079 if (unlikely(!idev))
1da177e4
LT
1080 return NULL;
1081
957c665f 1082 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1083 if (unlikely(!rt)) {
1da177e4 1084 in6_dev_put(idev);
87a11578 1085 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1086 goto out;
1087 }
1088
1da177e4
LT
1089 if (neigh)
1090 neigh_hold(neigh);
14deae41 1091 else {
f83c7790 1092 neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
b43faac6
DM
1093 if (IS_ERR(neigh)) {
1094 dst_free(&rt->dst);
1095 return ERR_CAST(neigh);
1096 }
14deae41 1097 }
1da177e4 1098
8e2ec639
YZ
1099 rt->dst.flags |= DST_HOST;
1100 rt->dst.output = ip6_output;
69cce1d1 1101 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1102 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1103 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1104 rt->rt6i_dst.plen = 128;
1105 rt->rt6i_idev = idev;
7011687f 1106 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1107
3b00944c 1108 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1109 rt->dst.next = icmp6_dst_gc_list;
1110 icmp6_dst_gc_list = &rt->dst;
3b00944c 1111 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1112
5578689a 1113 fib6_force_start_gc(net);
1da177e4 1114
87a11578
DM
1115 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1116
1da177e4 1117out:
87a11578 1118 return dst;
1da177e4
LT
1119}
1120
3d0f24a7 1121int icmp6_dst_gc(void)
1da177e4 1122{
e9476e95 1123 struct dst_entry *dst, **pprev;
3d0f24a7 1124 int more = 0;
1da177e4 1125
3b00944c
YH
1126 spin_lock_bh(&icmp6_dst_lock);
1127 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1128
1da177e4
LT
1129 while ((dst = *pprev) != NULL) {
1130 if (!atomic_read(&dst->__refcnt)) {
1131 *pprev = dst->next;
1132 dst_free(dst);
1da177e4
LT
1133 } else {
1134 pprev = &dst->next;
3d0f24a7 1135 ++more;
1da177e4
LT
1136 }
1137 }
1138
3b00944c 1139 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1140
3d0f24a7 1141 return more;
1da177e4
LT
1142}
1143
1e493d19
DM
1144static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1145 void *arg)
1146{
1147 struct dst_entry *dst, **pprev;
1148
1149 spin_lock_bh(&icmp6_dst_lock);
1150 pprev = &icmp6_dst_gc_list;
1151 while ((dst = *pprev) != NULL) {
1152 struct rt6_info *rt = (struct rt6_info *) dst;
1153 if (func(rt, arg)) {
1154 *pprev = dst->next;
1155 dst_free(dst);
1156 } else {
1157 pprev = &dst->next;
1158 }
1159 }
1160 spin_unlock_bh(&icmp6_dst_lock);
1161}
1162
569d3645 1163static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1164{
1da177e4 1165 unsigned long now = jiffies;
86393e52 1166 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1167 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1168 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1169 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1170 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1171 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1172 int entries;
7019b78e 1173
fc66f95c 1174 entries = dst_entries_get_fast(ops);
7019b78e 1175 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1176 entries <= rt_max_size)
1da177e4
LT
1177 goto out;
1178
6891a346
BT
1179 net->ipv6.ip6_rt_gc_expire++;
1180 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1181 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1182 entries = dst_entries_get_slow(ops);
1183 if (entries < ops->gc_thresh)
7019b78e 1184 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1185out:
7019b78e 1186 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1187 return entries > rt_max_size;
1da177e4
LT
1188}
1189
1190/* Clean host part of a prefix. Not necessary in radix tree,
1191 but results in cleaner routing tables.
1192
1193 Remove it only when all the things will work!
1194 */
1195
6b75d090 1196int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1197{
5170ae82 1198 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1199 if (hoplimit == 0) {
6b75d090 1200 struct net_device *dev = dst->dev;
c68f24cc
ED
1201 struct inet6_dev *idev;
1202
1203 rcu_read_lock();
1204 idev = __in6_dev_get(dev);
1205 if (idev)
6b75d090 1206 hoplimit = idev->cnf.hop_limit;
c68f24cc 1207 else
53b7997f 1208 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1209 rcu_read_unlock();
1da177e4
LT
1210 }
1211 return hoplimit;
1212}
abbf46ae 1213EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1214
1215/*
1216 *
1217 */
1218
86872cb5 1219int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1220{
1221 int err;
5578689a 1222 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1223 struct rt6_info *rt = NULL;
1224 struct net_device *dev = NULL;
1225 struct inet6_dev *idev = NULL;
c71099ac 1226 struct fib6_table *table;
1da177e4
LT
1227 int addr_type;
1228
86872cb5 1229 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1230 return -EINVAL;
1231#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1232 if (cfg->fc_src_len)
1da177e4
LT
1233 return -EINVAL;
1234#endif
86872cb5 1235 if (cfg->fc_ifindex) {
1da177e4 1236 err = -ENODEV;
5578689a 1237 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1238 if (!dev)
1239 goto out;
1240 idev = in6_dev_get(dev);
1241 if (!idev)
1242 goto out;
1243 }
1244
86872cb5
TG
1245 if (cfg->fc_metric == 0)
1246 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1247
d71314b4 1248 err = -ENOBUFS;
38308473
DM
1249 if (cfg->fc_nlinfo.nlh &&
1250 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1251 table = fib6_get_table(net, cfg->fc_table);
38308473 1252 if (!table) {
d71314b4
MV
1253 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1254 table = fib6_new_table(net, cfg->fc_table);
1255 }
1256 } else {
1257 table = fib6_new_table(net, cfg->fc_table);
1258 }
38308473
DM
1259
1260 if (!table)
c71099ac 1261 goto out;
c71099ac 1262
957c665f 1263 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1264
38308473 1265 if (!rt) {
1da177e4
LT
1266 err = -ENOMEM;
1267 goto out;
1268 }
1269
d8d1f30b 1270 rt->dst.obsolete = -1;
d1918542 1271 rt->dst.expires = (cfg->fc_flags & RTF_EXPIRES) ?
6f704992
YH
1272 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1273 0;
1da177e4 1274
86872cb5
TG
1275 if (cfg->fc_protocol == RTPROT_UNSPEC)
1276 cfg->fc_protocol = RTPROT_BOOT;
1277 rt->rt6i_protocol = cfg->fc_protocol;
1278
1279 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1280
1281 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1282 rt->dst.input = ip6_mc_input;
ab79ad14
1283 else if (cfg->fc_flags & RTF_LOCAL)
1284 rt->dst.input = ip6_input;
1da177e4 1285 else
d8d1f30b 1286 rt->dst.input = ip6_forward;
1da177e4 1287
d8d1f30b 1288 rt->dst.output = ip6_output;
1da177e4 1289
86872cb5
TG
1290 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1291 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1292 if (rt->rt6i_dst.plen == 128)
11d53b49 1293 rt->dst.flags |= DST_HOST;
1da177e4 1294
8e2ec639
YZ
1295 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1296 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1297 if (!metrics) {
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301 dst_init_metrics(&rt->dst, metrics, 0);
1302 }
1da177e4 1303#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1304 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1305 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1306#endif
1307
86872cb5 1308 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1309
1310 /* We cannot add true routes via loopback here,
1311 they would result in kernel looping; promote them to reject routes
1312 */
86872cb5 1313 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1314 (dev && (dev->flags & IFF_LOOPBACK) &&
1315 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1316 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1317 /* hold loopback dev/idev if we haven't done so. */
5578689a 1318 if (dev != net->loopback_dev) {
1da177e4
LT
1319 if (dev) {
1320 dev_put(dev);
1321 in6_dev_put(idev);
1322 }
5578689a 1323 dev = net->loopback_dev;
1da177e4
LT
1324 dev_hold(dev);
1325 idev = in6_dev_get(dev);
1326 if (!idev) {
1327 err = -ENODEV;
1328 goto out;
1329 }
1330 }
d8d1f30b
CG
1331 rt->dst.output = ip6_pkt_discard_out;
1332 rt->dst.input = ip6_pkt_discard;
1333 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1334 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1335 goto install_route;
1336 }
1337
86872cb5 1338 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1339 const struct in6_addr *gw_addr;
1da177e4
LT
1340 int gwa_type;
1341
86872cb5 1342 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1343 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1344 gwa_type = ipv6_addr_type(gw_addr);
1345
1346 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1347 struct rt6_info *grt;
1348
1349 /* IPv6 strictly inhibits using not link-local
1350 addresses as nexthop address.
1351 Otherwise, router will not able to send redirects.
1352 It is very good, but in some (rare!) circumstances
1353 (SIT, PtP, NBMA NOARP links) it is handy to allow
1354 some exceptions. --ANK
1355 */
1356 err = -EINVAL;
38308473 1357 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1358 goto out;
1359
5578689a 1360 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1361
1362 err = -EHOSTUNREACH;
38308473 1363 if (!grt)
1da177e4
LT
1364 goto out;
1365 if (dev) {
d1918542 1366 if (dev != grt->dst.dev) {
d8d1f30b 1367 dst_release(&grt->dst);
1da177e4
LT
1368 goto out;
1369 }
1370 } else {
d1918542 1371 dev = grt->dst.dev;
1da177e4
LT
1372 idev = grt->rt6i_idev;
1373 dev_hold(dev);
1374 in6_dev_hold(grt->rt6i_idev);
1375 }
38308473 1376 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1377 err = 0;
d8d1f30b 1378 dst_release(&grt->dst);
1da177e4
LT
1379
1380 if (err)
1381 goto out;
1382 }
1383 err = -EINVAL;
38308473 1384 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1385 goto out;
1386 }
1387
1388 err = -ENODEV;
38308473 1389 if (!dev)
1da177e4
LT
1390 goto out;
1391
c3968a85
DW
1392 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1393 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1394 err = -EINVAL;
1395 goto out;
1396 }
4e3fd7a0 1397 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1398 rt->rt6i_prefsrc.plen = 128;
1399 } else
1400 rt->rt6i_prefsrc.plen = 0;
1401
86872cb5 1402 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1403 err = rt6_bind_neighbour(rt, dev);
f83c7790 1404 if (err)
1da177e4 1405 goto out;
1da177e4
LT
1406 }
1407
86872cb5 1408 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1409
1410install_route:
86872cb5
TG
1411 if (cfg->fc_mx) {
1412 struct nlattr *nla;
1413 int remaining;
1414
1415 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1416 int type = nla_type(nla);
86872cb5
TG
1417
1418 if (type) {
1419 if (type > RTAX_MAX) {
1da177e4
LT
1420 err = -EINVAL;
1421 goto out;
1422 }
86872cb5 1423
defb3519 1424 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1425 }
1da177e4
LT
1426 }
1427 }
1428
d8d1f30b 1429 rt->dst.dev = dev;
1da177e4 1430 rt->rt6i_idev = idev;
c71099ac 1431 rt->rt6i_table = table;
63152fc0 1432
c346dca1 1433 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1434
86872cb5 1435 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1436
1437out:
1438 if (dev)
1439 dev_put(dev);
1440 if (idev)
1441 in6_dev_put(idev);
1442 if (rt)
d8d1f30b 1443 dst_free(&rt->dst);
1da177e4
LT
1444 return err;
1445}
1446
86872cb5 1447static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1448{
1449 int err;
c71099ac 1450 struct fib6_table *table;
d1918542 1451 struct net *net = dev_net(rt->dst.dev);
1da177e4 1452
8ed67789 1453 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1454 return -ENOENT;
1455
c71099ac
TG
1456 table = rt->rt6i_table;
1457 write_lock_bh(&table->tb6_lock);
1da177e4 1458
86872cb5 1459 err = fib6_del(rt, info);
d8d1f30b 1460 dst_release(&rt->dst);
1da177e4 1461
c71099ac 1462 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1463
1464 return err;
1465}
1466
e0a1ad73
TG
1467int ip6_del_rt(struct rt6_info *rt)
1468{
4d1169c1 1469 struct nl_info info = {
d1918542 1470 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1471 };
528c4ceb 1472 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1473}
1474
86872cb5 1475static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1476{
c71099ac 1477 struct fib6_table *table;
1da177e4
LT
1478 struct fib6_node *fn;
1479 struct rt6_info *rt;
1480 int err = -ESRCH;
1481
5578689a 1482 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1483 if (!table)
c71099ac
TG
1484 return err;
1485
1486 read_lock_bh(&table->tb6_lock);
1da177e4 1487
c71099ac 1488 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1489 &cfg->fc_dst, cfg->fc_dst_len,
1490 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1491
1da177e4 1492 if (fn) {
d8d1f30b 1493 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1494 if (cfg->fc_ifindex &&
d1918542
DM
1495 (!rt->dst.dev ||
1496 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1497 continue;
86872cb5
TG
1498 if (cfg->fc_flags & RTF_GATEWAY &&
1499 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1500 continue;
86872cb5 1501 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1502 continue;
d8d1f30b 1503 dst_hold(&rt->dst);
c71099ac 1504 read_unlock_bh(&table->tb6_lock);
1da177e4 1505
86872cb5 1506 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1507 }
1508 }
c71099ac 1509 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1510
1511 return err;
1512}
1513
1514/*
1515 * Handle redirects
1516 */
a6279458 1517struct ip6rd_flowi {
4c9483b2 1518 struct flowi6 fl6;
a6279458
YH
1519 struct in6_addr gateway;
1520};
1521
8ed67789
DL
1522static struct rt6_info *__ip6_route_redirect(struct net *net,
1523 struct fib6_table *table,
4c9483b2 1524 struct flowi6 *fl6,
a6279458 1525 int flags)
1da177e4 1526{
4c9483b2 1527 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1528 struct rt6_info *rt;
e843b9e1 1529 struct fib6_node *fn;
c71099ac 1530
1da177e4 1531 /*
e843b9e1
YH
1532 * Get the "current" route for this destination and
1533 * check if the redirect has come from approriate router.
1534 *
1535 * RFC 2461 specifies that redirects should only be
1536 * accepted if they come from the nexthop to the target.
1537 * Due to the way the routes are chosen, this notion
1538 * is a bit fuzzy and one might need to check all possible
1539 * routes.
1da177e4 1540 */
1da177e4 1541
c71099ac 1542 read_lock_bh(&table->tb6_lock);
4c9483b2 1543 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1544restart:
d8d1f30b 1545 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1546 /*
1547 * Current route is on-link; redirect is always invalid.
1548 *
1549 * Seems, previous statement is not true. It could
1550 * be node, which looks for us as on-link (f.e. proxy ndisc)
1551 * But then router serving it might decide, that we should
1552 * know truth 8)8) --ANK (980726).
1553 */
1554 if (rt6_check_expired(rt))
1555 continue;
1556 if (!(rt->rt6i_flags & RTF_GATEWAY))
1557 continue;
d1918542 1558 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1559 continue;
a6279458 1560 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1561 continue;
1562 break;
1563 }
a6279458 1564
cb15d9c2 1565 if (!rt)
8ed67789 1566 rt = net->ipv6.ip6_null_entry;
4c9483b2 1567 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1568out:
d8d1f30b 1569 dst_hold(&rt->dst);
a6279458 1570
c71099ac 1571 read_unlock_bh(&table->tb6_lock);
e843b9e1 1572
a6279458
YH
1573 return rt;
1574};
1575
b71d1d42
ED
1576static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1577 const struct in6_addr *src,
1578 const struct in6_addr *gateway,
a6279458
YH
1579 struct net_device *dev)
1580{
adaa70bb 1581 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1582 struct net *net = dev_net(dev);
a6279458 1583 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1584 .fl6 = {
1585 .flowi6_oif = dev->ifindex,
1586 .daddr = *dest,
1587 .saddr = *src,
a6279458 1588 },
a6279458 1589 };
adaa70bb 1590
4e3fd7a0 1591 rdfl.gateway = *gateway;
86c36ce4 1592
adaa70bb
TG
1593 if (rt6_need_strict(dest))
1594 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1595
4c9483b2 1596 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1597 flags, __ip6_route_redirect);
a6279458
YH
1598}
1599
b71d1d42
ED
1600void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1601 const struct in6_addr *saddr,
a6279458
YH
1602 struct neighbour *neigh, u8 *lladdr, int on_link)
1603{
1604 struct rt6_info *rt, *nrt = NULL;
1605 struct netevent_redirect netevent;
c346dca1 1606 struct net *net = dev_net(neigh->dev);
a6279458
YH
1607
1608 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1609
8ed67789 1610 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1611 if (net_ratelimit())
1612 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1613 "for redirect target\n");
a6279458 1614 goto out;
1da177e4
LT
1615 }
1616
1da177e4
LT
1617 /*
1618 * We have finally decided to accept it.
1619 */
1620
1ab1457c 1621 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1622 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1623 NEIGH_UPDATE_F_OVERRIDE|
1624 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1625 NEIGH_UPDATE_F_ISROUTER))
1626 );
1627
1628 /*
1629 * Redirect received -> path was valid.
1630 * Look, redirects are sent only in response to data packets,
1631 * so that this nexthop apparently is reachable. --ANK
1632 */
d8d1f30b 1633 dst_confirm(&rt->dst);
1da177e4
LT
1634
1635 /* Duplicate redirect: silently ignore. */
27217455 1636 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1637 goto out;
1638
21efcfa0 1639 nrt = ip6_rt_copy(rt, dest);
38308473 1640 if (!nrt)
1da177e4
LT
1641 goto out;
1642
1643 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1644 if (on_link)
1645 nrt->rt6i_flags &= ~RTF_GATEWAY;
1646
4e3fd7a0 1647 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1648 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1649
40e22e8f 1650 if (ip6_ins_rt(nrt))
1da177e4
LT
1651 goto out;
1652
d8d1f30b
CG
1653 netevent.old = &rt->dst;
1654 netevent.new = &nrt->dst;
8d71740c
TT
1655 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1656
38308473 1657 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1658 ip6_del_rt(rt);
1da177e4
LT
1659 return;
1660 }
1661
1662out:
d8d1f30b 1663 dst_release(&rt->dst);
1da177e4
LT
1664}
1665
1666/*
1667 * Handle ICMP "packet too big" messages
1668 * i.e. Path MTU discovery
1669 */
1670
b71d1d42 1671static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1672 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1673{
1674 struct rt6_info *rt, *nrt;
1675 int allfrag = 0;
d3052b55 1676again:
ae878ae2 1677 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1678 if (!rt)
1da177e4
LT
1679 return;
1680
d3052b55
AV
1681 if (rt6_check_expired(rt)) {
1682 ip6_del_rt(rt);
1683 goto again;
1684 }
1685
d8d1f30b 1686 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1687 goto out;
1688
1689 if (pmtu < IPV6_MIN_MTU) {
1690 /*
1ab1457c 1691 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1692 * MTU (1280) and a fragment header should always be included
1693 * after a node receiving Too Big message reporting PMTU is
1694 * less than the IPv6 Minimum Link MTU.
1695 */
1696 pmtu = IPV6_MIN_MTU;
1697 allfrag = 1;
1698 }
1699
1700 /* New mtu received -> path was valid.
1701 They are sent only in response to data packets,
1702 so that this nexthop apparently is reachable. --ANK
1703 */
d8d1f30b 1704 dst_confirm(&rt->dst);
1da177e4
LT
1705
1706 /* Host route. If it is static, it would be better
1707 not to override it, but add new one, so that
1708 when cache entry will expire old pmtu
1709 would return automatically.
1710 */
1711 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1712 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1713 if (allfrag) {
1714 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1715 features |= RTAX_FEATURE_ALLFRAG;
1716 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1717 }
d8d1f30b 1718 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1719 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1720 goto out;
1721 }
1722
1723 /* Network route.
1724 Two cases are possible:
1725 1. It is connected route. Action: COW
1726 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1727 */
27217455 1728 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1729 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1730 else
1731 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1732
d5315b50 1733 if (nrt) {
defb3519
DM
1734 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1735 if (allfrag) {
1736 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1737 features |= RTAX_FEATURE_ALLFRAG;
1738 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1739 }
a1e78363
YH
1740
1741 /* According to RFC 1981, detecting PMTU increase shouldn't be
1742 * happened within 5 mins, the recommended timer is 10 mins.
1743 * Here this route expiration time is set to ip6_rt_mtu_expires
1744 * which is 10 mins. After 10 mins the decreased pmtu is expired
1745 * and detecting PMTU increase will be automatically happened.
1746 */
d8d1f30b 1747 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1748 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1749
40e22e8f 1750 ip6_ins_rt(nrt);
1da177e4 1751 }
1da177e4 1752out:
d8d1f30b 1753 dst_release(&rt->dst);
1da177e4
LT
1754}
1755
b71d1d42 1756void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1757 struct net_device *dev, u32 pmtu)
1758{
1759 struct net *net = dev_net(dev);
1760
1761 /*
1762 * RFC 1981 states that a node "MUST reduce the size of the packets it
1763 * is sending along the path" that caused the Packet Too Big message.
1764 * Since it's not possible in the general case to determine which
1765 * interface was used to send the original packet, we update the MTU
1766 * on the interface that will be used to send future packets. We also
1767 * update the MTU on the interface that received the Packet Too Big in
1768 * case the original packet was forced out that interface with
1769 * SO_BINDTODEVICE or similar. This is the next best thing to the
1770 * correct behaviour, which would be to update the MTU on all
1771 * interfaces.
1772 */
1773 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1774 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1775}
1776
1da177e4
LT
1777/*
1778 * Misc support functions
1779 */
1780
21efcfa0
ED
1781static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1782 const struct in6_addr *dest)
1da177e4 1783{
d1918542 1784 struct net *net = dev_net(ort->dst.dev);
5c1e6aa3 1785 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1786 ort->dst.dev, 0);
1da177e4
LT
1787
1788 if (rt) {
d8d1f30b
CG
1789 rt->dst.input = ort->dst.input;
1790 rt->dst.output = ort->dst.output;
8e2ec639 1791 rt->dst.flags |= DST_HOST;
d8d1f30b 1792
4e3fd7a0 1793 rt->rt6i_dst.addr = *dest;
8e2ec639 1794 rt->rt6i_dst.plen = 128;
defb3519 1795 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1796 rt->dst.error = ort->dst.error;
1da177e4
LT
1797 rt->rt6i_idev = ort->rt6i_idev;
1798 if (rt->rt6i_idev)
1799 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1800 rt->dst.lastuse = jiffies;
d1918542 1801 rt->dst.expires = 0;
1da177e4 1802
4e3fd7a0 1803 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1804 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1805 rt->rt6i_metric = 0;
1806
1da177e4
LT
1807#ifdef CONFIG_IPV6_SUBTREES
1808 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1809#endif
0f6c6392 1810 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1811 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1812 }
1813 return rt;
1814}
1815
70ceb4f5 1816#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1817static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1818 const struct in6_addr *prefix, int prefixlen,
1819 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1820{
1821 struct fib6_node *fn;
1822 struct rt6_info *rt = NULL;
c71099ac
TG
1823 struct fib6_table *table;
1824
efa2cea0 1825 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1826 if (!table)
c71099ac 1827 return NULL;
70ceb4f5 1828
c71099ac
TG
1829 write_lock_bh(&table->tb6_lock);
1830 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1831 if (!fn)
1832 goto out;
1833
d8d1f30b 1834 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1835 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1836 continue;
1837 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1838 continue;
1839 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1840 continue;
d8d1f30b 1841 dst_hold(&rt->dst);
70ceb4f5
YH
1842 break;
1843 }
1844out:
c71099ac 1845 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1846 return rt;
1847}
1848
efa2cea0 1849static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1850 const struct in6_addr *prefix, int prefixlen,
1851 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1852 unsigned pref)
1853{
86872cb5
TG
1854 struct fib6_config cfg = {
1855 .fc_table = RT6_TABLE_INFO,
238fc7ea 1856 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1857 .fc_ifindex = ifindex,
1858 .fc_dst_len = prefixlen,
1859 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1860 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1861 .fc_nlinfo.pid = 0,
1862 .fc_nlinfo.nlh = NULL,
1863 .fc_nlinfo.nl_net = net,
86872cb5
TG
1864 };
1865
4e3fd7a0
AD
1866 cfg.fc_dst = *prefix;
1867 cfg.fc_gateway = *gwaddr;
70ceb4f5 1868
e317da96
YH
1869 /* We should treat it as a default route if prefix length is 0. */
1870 if (!prefixlen)
86872cb5 1871 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1872
86872cb5 1873 ip6_route_add(&cfg);
70ceb4f5 1874
efa2cea0 1875 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1876}
1877#endif
1878
b71d1d42 1879struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1880{
1da177e4 1881 struct rt6_info *rt;
c71099ac 1882 struct fib6_table *table;
1da177e4 1883
c346dca1 1884 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1885 if (!table)
c71099ac 1886 return NULL;
1da177e4 1887
c71099ac 1888 write_lock_bh(&table->tb6_lock);
d8d1f30b 1889 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1890 if (dev == rt->dst.dev &&
045927ff 1891 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1892 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1893 break;
1894 }
1895 if (rt)
d8d1f30b 1896 dst_hold(&rt->dst);
c71099ac 1897 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1898 return rt;
1899}
1900
b71d1d42 1901struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1902 struct net_device *dev,
1903 unsigned int pref)
1da177e4 1904{
86872cb5
TG
1905 struct fib6_config cfg = {
1906 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1907 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1908 .fc_ifindex = dev->ifindex,
1909 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1910 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1911 .fc_nlinfo.pid = 0,
1912 .fc_nlinfo.nlh = NULL,
c346dca1 1913 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1914 };
1da177e4 1915
4e3fd7a0 1916 cfg.fc_gateway = *gwaddr;
1da177e4 1917
86872cb5 1918 ip6_route_add(&cfg);
1da177e4 1919
1da177e4
LT
1920 return rt6_get_dflt_router(gwaddr, dev);
1921}
1922
7b4da532 1923void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1924{
1925 struct rt6_info *rt;
c71099ac
TG
1926 struct fib6_table *table;
1927
1928 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1929 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1930 if (!table)
c71099ac 1931 return;
1da177e4
LT
1932
1933restart:
c71099ac 1934 read_lock_bh(&table->tb6_lock);
d8d1f30b 1935 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1936 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1937 dst_hold(&rt->dst);
c71099ac 1938 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1939 ip6_del_rt(rt);
1da177e4
LT
1940 goto restart;
1941 }
1942 }
c71099ac 1943 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1944}
1945
5578689a
DL
1946static void rtmsg_to_fib6_config(struct net *net,
1947 struct in6_rtmsg *rtmsg,
86872cb5
TG
1948 struct fib6_config *cfg)
1949{
1950 memset(cfg, 0, sizeof(*cfg));
1951
1952 cfg->fc_table = RT6_TABLE_MAIN;
1953 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1954 cfg->fc_metric = rtmsg->rtmsg_metric;
1955 cfg->fc_expires = rtmsg->rtmsg_info;
1956 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1957 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1958 cfg->fc_flags = rtmsg->rtmsg_flags;
1959
5578689a 1960 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1961
4e3fd7a0
AD
1962 cfg->fc_dst = rtmsg->rtmsg_dst;
1963 cfg->fc_src = rtmsg->rtmsg_src;
1964 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1965}
1966
5578689a 1967int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1968{
86872cb5 1969 struct fib6_config cfg;
1da177e4
LT
1970 struct in6_rtmsg rtmsg;
1971 int err;
1972
1973 switch(cmd) {
1974 case SIOCADDRT: /* Add a route */
1975 case SIOCDELRT: /* Delete a route */
1976 if (!capable(CAP_NET_ADMIN))
1977 return -EPERM;
1978 err = copy_from_user(&rtmsg, arg,
1979 sizeof(struct in6_rtmsg));
1980 if (err)
1981 return -EFAULT;
86872cb5 1982
5578689a 1983 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1984
1da177e4
LT
1985 rtnl_lock();
1986 switch (cmd) {
1987 case SIOCADDRT:
86872cb5 1988 err = ip6_route_add(&cfg);
1da177e4
LT
1989 break;
1990 case SIOCDELRT:
86872cb5 1991 err = ip6_route_del(&cfg);
1da177e4
LT
1992 break;
1993 default:
1994 err = -EINVAL;
1995 }
1996 rtnl_unlock();
1997
1998 return err;
3ff50b79 1999 }
1da177e4
LT
2000
2001 return -EINVAL;
2002}
2003
2004/*
2005 * Drop the packet on the floor
2006 */
2007
d5fdd6ba 2008static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2009{
612f09e8 2010 int type;
adf30907 2011 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2012 switch (ipstats_mib_noroutes) {
2013 case IPSTATS_MIB_INNOROUTES:
0660e03f 2014 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2015 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2016 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2017 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2018 break;
2019 }
2020 /* FALLTHROUGH */
2021 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2022 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2023 ipstats_mib_noroutes);
612f09e8
YH
2024 break;
2025 }
3ffe533c 2026 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2027 kfree_skb(skb);
2028 return 0;
2029}
2030
9ce8ade0
TG
2031static int ip6_pkt_discard(struct sk_buff *skb)
2032{
612f09e8 2033 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2034}
2035
20380731 2036static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2037{
adf30907 2038 skb->dev = skb_dst(skb)->dev;
612f09e8 2039 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2040}
2041
6723ab54
DM
2042#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2043
9ce8ade0
TG
2044static int ip6_pkt_prohibit(struct sk_buff *skb)
2045{
612f09e8 2046 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2047}
2048
2049static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2050{
adf30907 2051 skb->dev = skb_dst(skb)->dev;
612f09e8 2052 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2053}
2054
6723ab54
DM
2055#endif
2056
1da177e4
LT
2057/*
2058 * Allocate a dst for local (unicast / anycast) address.
2059 */
2060
2061struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2062 const struct in6_addr *addr,
8f031519 2063 bool anycast)
1da177e4 2064{
c346dca1 2065 struct net *net = dev_net(idev->dev);
5c1e6aa3 2066 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2067 net->loopback_dev, 0);
f83c7790 2068 int err;
1da177e4 2069
38308473 2070 if (!rt) {
40385653
BG
2071 if (net_ratelimit())
2072 pr_warning("IPv6: Maximum number of routes reached,"
2073 " consider increasing route/max_size.\n");
1da177e4 2074 return ERR_PTR(-ENOMEM);
40385653 2075 }
1da177e4 2076
1da177e4
LT
2077 in6_dev_hold(idev);
2078
11d53b49 2079 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2080 rt->dst.input = ip6_input;
2081 rt->dst.output = ip6_output;
1da177e4 2082 rt->rt6i_idev = idev;
d8d1f30b 2083 rt->dst.obsolete = -1;
1da177e4
LT
2084
2085 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2086 if (anycast)
2087 rt->rt6i_flags |= RTF_ANYCAST;
2088 else
1da177e4 2089 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2090 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2091 if (err) {
d8d1f30b 2092 dst_free(&rt->dst);
f83c7790 2093 return ERR_PTR(err);
1da177e4
LT
2094 }
2095
4e3fd7a0 2096 rt->rt6i_dst.addr = *addr;
1da177e4 2097 rt->rt6i_dst.plen = 128;
5578689a 2098 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2099
d8d1f30b 2100 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2101
2102 return rt;
2103}
2104
c3968a85
DW
2105int ip6_route_get_saddr(struct net *net,
2106 struct rt6_info *rt,
b71d1d42 2107 const struct in6_addr *daddr,
c3968a85
DW
2108 unsigned int prefs,
2109 struct in6_addr *saddr)
2110{
2111 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2112 int err = 0;
2113 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2114 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2115 else
2116 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2117 daddr, prefs, saddr);
2118 return err;
2119}
2120
2121/* remove deleted ip from prefsrc entries */
2122struct arg_dev_net_ip {
2123 struct net_device *dev;
2124 struct net *net;
2125 struct in6_addr *addr;
2126};
2127
2128static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2129{
2130 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2131 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2132 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2133
d1918542 2134 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2135 rt != net->ipv6.ip6_null_entry &&
2136 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2137 /* remove prefsrc entry */
2138 rt->rt6i_prefsrc.plen = 0;
2139 }
2140 return 0;
2141}
2142
2143void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2144{
2145 struct net *net = dev_net(ifp->idev->dev);
2146 struct arg_dev_net_ip adni = {
2147 .dev = ifp->idev->dev,
2148 .net = net,
2149 .addr = &ifp->addr,
2150 };
2151 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2152}
2153
8ed67789
DL
2154struct arg_dev_net {
2155 struct net_device *dev;
2156 struct net *net;
2157};
2158
1da177e4
LT
2159static int fib6_ifdown(struct rt6_info *rt, void *arg)
2160{
bc3ef660 2161 const struct arg_dev_net *adn = arg;
2162 const struct net_device *dev = adn->dev;
8ed67789 2163
d1918542 2164 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2165 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2166 return -1;
c159d30c 2167
1da177e4
LT
2168 return 0;
2169}
2170
f3db4851 2171void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2172{
8ed67789
DL
2173 struct arg_dev_net adn = {
2174 .dev = dev,
2175 .net = net,
2176 };
2177
2178 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2179 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2180}
2181
2182struct rt6_mtu_change_arg
2183{
2184 struct net_device *dev;
2185 unsigned mtu;
2186};
2187
2188static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2189{
2190 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2191 struct inet6_dev *idev;
2192
2193 /* In IPv6 pmtu discovery is not optional,
2194 so that RTAX_MTU lock cannot disable it.
2195 We still use this lock to block changes
2196 caused by addrconf/ndisc.
2197 */
2198
2199 idev = __in6_dev_get(arg->dev);
38308473 2200 if (!idev)
1da177e4
LT
2201 return 0;
2202
2203 /* For administrative MTU increase, there is no way to discover
2204 IPv6 PMTU increase, so PMTU increase should be updated here.
2205 Since RFC 1981 doesn't include administrative MTU increase
2206 update PMTU increase is a MUST. (i.e. jumbo frame)
2207 */
2208 /*
2209 If new MTU is less than route PMTU, this new MTU will be the
2210 lowest MTU in the path, update the route PMTU to reflect PMTU
2211 decreases; if new MTU is greater than route PMTU, and the
2212 old MTU is the lowest MTU in the path, update the route PMTU
2213 to reflect the increase. In this case if the other nodes' MTU
2214 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2215 PMTU discouvery.
2216 */
d1918542 2217 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2218 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2219 (dst_mtu(&rt->dst) >= arg->mtu ||
2220 (dst_mtu(&rt->dst) < arg->mtu &&
2221 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2222 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2223 }
1da177e4
LT
2224 return 0;
2225}
2226
2227void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2228{
c71099ac
TG
2229 struct rt6_mtu_change_arg arg = {
2230 .dev = dev,
2231 .mtu = mtu,
2232 };
1da177e4 2233
c346dca1 2234 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2235}
2236
ef7c79ed 2237static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2238 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2239 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2240 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2241 [RTA_PRIORITY] = { .type = NLA_U32 },
2242 [RTA_METRICS] = { .type = NLA_NESTED },
2243};
2244
2245static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2246 struct fib6_config *cfg)
1da177e4 2247{
86872cb5
TG
2248 struct rtmsg *rtm;
2249 struct nlattr *tb[RTA_MAX+1];
2250 int err;
1da177e4 2251
86872cb5
TG
2252 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2253 if (err < 0)
2254 goto errout;
1da177e4 2255
86872cb5
TG
2256 err = -EINVAL;
2257 rtm = nlmsg_data(nlh);
2258 memset(cfg, 0, sizeof(*cfg));
2259
2260 cfg->fc_table = rtm->rtm_table;
2261 cfg->fc_dst_len = rtm->rtm_dst_len;
2262 cfg->fc_src_len = rtm->rtm_src_len;
2263 cfg->fc_flags = RTF_UP;
2264 cfg->fc_protocol = rtm->rtm_protocol;
2265
2266 if (rtm->rtm_type == RTN_UNREACHABLE)
2267 cfg->fc_flags |= RTF_REJECT;
2268
ab79ad14
2269 if (rtm->rtm_type == RTN_LOCAL)
2270 cfg->fc_flags |= RTF_LOCAL;
2271
86872cb5
TG
2272 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2273 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2274 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2275
2276 if (tb[RTA_GATEWAY]) {
2277 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2278 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2279 }
86872cb5
TG
2280
2281 if (tb[RTA_DST]) {
2282 int plen = (rtm->rtm_dst_len + 7) >> 3;
2283
2284 if (nla_len(tb[RTA_DST]) < plen)
2285 goto errout;
2286
2287 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2288 }
86872cb5
TG
2289
2290 if (tb[RTA_SRC]) {
2291 int plen = (rtm->rtm_src_len + 7) >> 3;
2292
2293 if (nla_len(tb[RTA_SRC]) < plen)
2294 goto errout;
2295
2296 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2297 }
86872cb5 2298
c3968a85
DW
2299 if (tb[RTA_PREFSRC])
2300 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2301
86872cb5
TG
2302 if (tb[RTA_OIF])
2303 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2304
2305 if (tb[RTA_PRIORITY])
2306 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2307
2308 if (tb[RTA_METRICS]) {
2309 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2310 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2311 }
86872cb5
TG
2312
2313 if (tb[RTA_TABLE])
2314 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2315
2316 err = 0;
2317errout:
2318 return err;
1da177e4
LT
2319}
2320
c127ea2c 2321static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2322{
86872cb5
TG
2323 struct fib6_config cfg;
2324 int err;
1da177e4 2325
86872cb5
TG
2326 err = rtm_to_fib6_config(skb, nlh, &cfg);
2327 if (err < 0)
2328 return err;
2329
2330 return ip6_route_del(&cfg);
1da177e4
LT
2331}
2332
c127ea2c 2333static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2334{
86872cb5
TG
2335 struct fib6_config cfg;
2336 int err;
1da177e4 2337
86872cb5
TG
2338 err = rtm_to_fib6_config(skb, nlh, &cfg);
2339 if (err < 0)
2340 return err;
2341
2342 return ip6_route_add(&cfg);
1da177e4
LT
2343}
2344
339bf98f
TG
2345static inline size_t rt6_nlmsg_size(void)
2346{
2347 return NLMSG_ALIGN(sizeof(struct rtmsg))
2348 + nla_total_size(16) /* RTA_SRC */
2349 + nla_total_size(16) /* RTA_DST */
2350 + nla_total_size(16) /* RTA_GATEWAY */
2351 + nla_total_size(16) /* RTA_PREFSRC */
2352 + nla_total_size(4) /* RTA_TABLE */
2353 + nla_total_size(4) /* RTA_IIF */
2354 + nla_total_size(4) /* RTA_OIF */
2355 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2356 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2357 + nla_total_size(sizeof(struct rta_cacheinfo));
2358}
2359
191cd582
BH
2360static int rt6_fill_node(struct net *net,
2361 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2362 struct in6_addr *dst, struct in6_addr *src,
2363 int iif, int type, u32 pid, u32 seq,
7bc570c8 2364 int prefix, int nowait, unsigned int flags)
1da177e4 2365{
346f870b 2366 const struct inet_peer *peer;
1da177e4 2367 struct rtmsg *rtm;
2d7202bf 2368 struct nlmsghdr *nlh;
e3703b3d 2369 long expires;
9e762a4a 2370 u32 table;
f2c31e32 2371 struct neighbour *n;
346f870b 2372 u32 ts, tsage;
1da177e4
LT
2373
2374 if (prefix) { /* user wants prefix routes only */
2375 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2376 /* success since this is not a prefix route */
2377 return 1;
2378 }
2379 }
2380
2d7202bf 2381 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2382 if (!nlh)
26932566 2383 return -EMSGSIZE;
2d7202bf
TG
2384
2385 rtm = nlmsg_data(nlh);
1da177e4
LT
2386 rtm->rtm_family = AF_INET6;
2387 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2388 rtm->rtm_src_len = rt->rt6i_src.plen;
2389 rtm->rtm_tos = 0;
c71099ac 2390 if (rt->rt6i_table)
9e762a4a 2391 table = rt->rt6i_table->tb6_id;
c71099ac 2392 else
9e762a4a
PM
2393 table = RT6_TABLE_UNSPEC;
2394 rtm->rtm_table = table;
2d7202bf 2395 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2396 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2397 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2398 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2399 rtm->rtm_type = RTN_LOCAL;
d1918542 2400 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2401 rtm->rtm_type = RTN_LOCAL;
2402 else
2403 rtm->rtm_type = RTN_UNICAST;
2404 rtm->rtm_flags = 0;
2405 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2406 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2407 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2408 rtm->rtm_protocol = RTPROT_REDIRECT;
2409 else if (rt->rt6i_flags & RTF_ADDRCONF)
2410 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2411 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2412 rtm->rtm_protocol = RTPROT_RA;
2413
38308473 2414 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2415 rtm->rtm_flags |= RTM_F_CLONED;
2416
2417 if (dst) {
2d7202bf 2418 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2419 rtm->rtm_dst_len = 128;
1da177e4 2420 } else if (rtm->rtm_dst_len)
2d7202bf 2421 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2422#ifdef CONFIG_IPV6_SUBTREES
2423 if (src) {
2d7202bf 2424 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2425 rtm->rtm_src_len = 128;
1da177e4 2426 } else if (rtm->rtm_src_len)
2d7202bf 2427 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2428#endif
7bc570c8
YH
2429 if (iif) {
2430#ifdef CONFIG_IPV6_MROUTE
2431 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2432 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2433 if (err <= 0) {
2434 if (!nowait) {
2435 if (err == 0)
2436 return 0;
2437 goto nla_put_failure;
2438 } else {
2439 if (err == -EMSGSIZE)
2440 goto nla_put_failure;
2441 }
2442 }
2443 } else
2444#endif
2445 NLA_PUT_U32(skb, RTA_IIF, iif);
2446 } else if (dst) {
1da177e4 2447 struct in6_addr saddr_buf;
c3968a85 2448 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2449 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2450 }
2d7202bf 2451
c3968a85
DW
2452 if (rt->rt6i_prefsrc.plen) {
2453 struct in6_addr saddr_buf;
4e3fd7a0 2454 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2455 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2456 }
2457
defb3519 2458 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2459 goto nla_put_failure;
2460
f2c31e32 2461 rcu_read_lock();
27217455 2462 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2463 if (n)
2464 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2465 rcu_read_unlock();
2d7202bf 2466
d8d1f30b 2467 if (rt->dst.dev)
d1918542 2468 NLA_PUT_U32(skb, RTA_OIF, rt->dst.dev->ifindex);
2d7202bf
TG
2469
2470 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2471
36e3deae
YH
2472 if (!(rt->rt6i_flags & RTF_EXPIRES))
2473 expires = 0;
d1918542
DM
2474 else if (rt->dst.expires - jiffies < INT_MAX)
2475 expires = rt->dst.expires - jiffies;
36e3deae
YH
2476 else
2477 expires = INT_MAX;
69cdf8f9 2478
346f870b
DM
2479 peer = rt->rt6i_peer;
2480 ts = tsage = 0;
2481 if (peer && peer->tcp_ts_stamp) {
2482 ts = peer->tcp_ts;
2483 tsage = get_seconds() - peer->tcp_ts_stamp;
2484 }
2485
2486 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, ts, tsage,
d8d1f30b 2487 expires, rt->dst.error) < 0)
e3703b3d 2488 goto nla_put_failure;
2d7202bf
TG
2489
2490 return nlmsg_end(skb, nlh);
2491
2492nla_put_failure:
26932566
PM
2493 nlmsg_cancel(skb, nlh);
2494 return -EMSGSIZE;
1da177e4
LT
2495}
2496
1b43af54 2497int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2498{
2499 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2500 int prefix;
2501
2d7202bf
TG
2502 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2503 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2504 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2505 } else
2506 prefix = 0;
2507
191cd582
BH
2508 return rt6_fill_node(arg->net,
2509 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2510 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2511 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2512}
2513
c127ea2c 2514static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2515{
3b1e0a65 2516 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2517 struct nlattr *tb[RTA_MAX+1];
2518 struct rt6_info *rt;
1da177e4 2519 struct sk_buff *skb;
ab364a6f 2520 struct rtmsg *rtm;
4c9483b2 2521 struct flowi6 fl6;
ab364a6f 2522 int err, iif = 0;
1da177e4 2523
ab364a6f
TG
2524 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2525 if (err < 0)
2526 goto errout;
1da177e4 2527
ab364a6f 2528 err = -EINVAL;
4c9483b2 2529 memset(&fl6, 0, sizeof(fl6));
1da177e4 2530
ab364a6f
TG
2531 if (tb[RTA_SRC]) {
2532 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2533 goto errout;
2534
4e3fd7a0 2535 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2536 }
2537
2538 if (tb[RTA_DST]) {
2539 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2540 goto errout;
2541
4e3fd7a0 2542 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2543 }
2544
2545 if (tb[RTA_IIF])
2546 iif = nla_get_u32(tb[RTA_IIF]);
2547
2548 if (tb[RTA_OIF])
4c9483b2 2549 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2550
2551 if (iif) {
2552 struct net_device *dev;
5578689a 2553 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2554 if (!dev) {
2555 err = -ENODEV;
ab364a6f 2556 goto errout;
1da177e4
LT
2557 }
2558 }
2559
ab364a6f 2560 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2561 if (!skb) {
ab364a6f
TG
2562 err = -ENOBUFS;
2563 goto errout;
2564 }
1da177e4 2565
ab364a6f
TG
2566 /* Reserve room for dummy headers, this skb can pass
2567 through good chunk of routing engine.
2568 */
459a98ed 2569 skb_reset_mac_header(skb);
ab364a6f 2570 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2571
4c9483b2 2572 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2573 skb_dst_set(skb, &rt->dst);
1da177e4 2574
4c9483b2 2575 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2576 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2577 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2578 if (err < 0) {
ab364a6f
TG
2579 kfree_skb(skb);
2580 goto errout;
1da177e4
LT
2581 }
2582
5578689a 2583 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2584errout:
1da177e4 2585 return err;
1da177e4
LT
2586}
2587
86872cb5 2588void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2589{
2590 struct sk_buff *skb;
5578689a 2591 struct net *net = info->nl_net;
528c4ceb
DL
2592 u32 seq;
2593 int err;
2594
2595 err = -ENOBUFS;
38308473 2596 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2597
339bf98f 2598 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2599 if (!skb)
21713ebc
TG
2600 goto errout;
2601
191cd582 2602 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2603 event, info->pid, seq, 0, 0, 0);
26932566
PM
2604 if (err < 0) {
2605 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2606 WARN_ON(err == -EMSGSIZE);
2607 kfree_skb(skb);
2608 goto errout;
2609 }
1ce85fe4
PNA
2610 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2611 info->nlh, gfp_any());
2612 return;
21713ebc
TG
2613errout:
2614 if (err < 0)
5578689a 2615 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2616}
2617
8ed67789
DL
2618static int ip6_route_dev_notify(struct notifier_block *this,
2619 unsigned long event, void *data)
2620{
2621 struct net_device *dev = (struct net_device *)data;
c346dca1 2622 struct net *net = dev_net(dev);
8ed67789
DL
2623
2624 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2625 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2626 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2627#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2628 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2629 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2630 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2631 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2632#endif
2633 }
2634
2635 return NOTIFY_OK;
2636}
2637
1da177e4
LT
2638/*
2639 * /proc
2640 */
2641
2642#ifdef CONFIG_PROC_FS
2643
1da177e4
LT
2644struct rt6_proc_arg
2645{
2646 char *buffer;
2647 int offset;
2648 int length;
2649 int skip;
2650 int len;
2651};
2652
2653static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2654{
33120b30 2655 struct seq_file *m = p_arg;
69cce1d1 2656 struct neighbour *n;
1da177e4 2657
4b7a4274 2658 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2659
2660#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2661 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2662#else
33120b30 2663 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2664#endif
f2c31e32 2665 rcu_read_lock();
27217455 2666 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2667 if (n) {
2668 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2669 } else {
33120b30 2670 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2671 }
f2c31e32 2672 rcu_read_unlock();
33120b30 2673 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2674 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2675 rt->dst.__use, rt->rt6i_flags,
d1918542 2676 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2677 return 0;
2678}
2679
33120b30 2680static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2681{
f3db4851 2682 struct net *net = (struct net *)m->private;
32b293a5 2683 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2684 return 0;
2685}
1da177e4 2686
33120b30
AD
2687static int ipv6_route_open(struct inode *inode, struct file *file)
2688{
de05c557 2689 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2690}
2691
33120b30
AD
2692static const struct file_operations ipv6_route_proc_fops = {
2693 .owner = THIS_MODULE,
2694 .open = ipv6_route_open,
2695 .read = seq_read,
2696 .llseek = seq_lseek,
b6fcbdb4 2697 .release = single_release_net,
33120b30
AD
2698};
2699
1da177e4
LT
2700static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2701{
69ddb805 2702 struct net *net = (struct net *)seq->private;
1da177e4 2703 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2704 net->ipv6.rt6_stats->fib_nodes,
2705 net->ipv6.rt6_stats->fib_route_nodes,
2706 net->ipv6.rt6_stats->fib_rt_alloc,
2707 net->ipv6.rt6_stats->fib_rt_entries,
2708 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2709 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2710 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2711
2712 return 0;
2713}
2714
2715static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2716{
de05c557 2717 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2718}
2719
9a32144e 2720static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2721 .owner = THIS_MODULE,
2722 .open = rt6_stats_seq_open,
2723 .read = seq_read,
2724 .llseek = seq_lseek,
b6fcbdb4 2725 .release = single_release_net,
1da177e4
LT
2726};
2727#endif /* CONFIG_PROC_FS */
2728
2729#ifdef CONFIG_SYSCTL
2730
1da177e4 2731static
8d65af78 2732int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2733 void __user *buffer, size_t *lenp, loff_t *ppos)
2734{
c486da34
LAG
2735 struct net *net;
2736 int delay;
2737 if (!write)
1da177e4 2738 return -EINVAL;
c486da34
LAG
2739
2740 net = (struct net *)ctl->extra1;
2741 delay = net->ipv6.sysctl.flush_delay;
2742 proc_dointvec(ctl, write, buffer, lenp, ppos);
2743 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2744 return 0;
1da177e4
LT
2745}
2746
760f2d01 2747ctl_table ipv6_route_table_template[] = {
1ab1457c 2748 {
1da177e4 2749 .procname = "flush",
4990509f 2750 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2751 .maxlen = sizeof(int),
89c8b3a1 2752 .mode = 0200,
6d9f239a 2753 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2754 },
2755 {
1da177e4 2756 .procname = "gc_thresh",
9a7ec3a9 2757 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2758 .maxlen = sizeof(int),
2759 .mode = 0644,
6d9f239a 2760 .proc_handler = proc_dointvec,
1da177e4
LT
2761 },
2762 {
1da177e4 2763 .procname = "max_size",
4990509f 2764 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2765 .maxlen = sizeof(int),
2766 .mode = 0644,
6d9f239a 2767 .proc_handler = proc_dointvec,
1da177e4
LT
2768 },
2769 {
1da177e4 2770 .procname = "gc_min_interval",
4990509f 2771 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2772 .maxlen = sizeof(int),
2773 .mode = 0644,
6d9f239a 2774 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2775 },
2776 {
1da177e4 2777 .procname = "gc_timeout",
4990509f 2778 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2779 .maxlen = sizeof(int),
2780 .mode = 0644,
6d9f239a 2781 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2782 },
2783 {
1da177e4 2784 .procname = "gc_interval",
4990509f 2785 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2786 .maxlen = sizeof(int),
2787 .mode = 0644,
6d9f239a 2788 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2789 },
2790 {
1da177e4 2791 .procname = "gc_elasticity",
4990509f 2792 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2793 .maxlen = sizeof(int),
2794 .mode = 0644,
f3d3f616 2795 .proc_handler = proc_dointvec,
1da177e4
LT
2796 },
2797 {
1da177e4 2798 .procname = "mtu_expires",
4990509f 2799 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2800 .maxlen = sizeof(int),
2801 .mode = 0644,
6d9f239a 2802 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2803 },
2804 {
1da177e4 2805 .procname = "min_adv_mss",
4990509f 2806 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2807 .maxlen = sizeof(int),
2808 .mode = 0644,
f3d3f616 2809 .proc_handler = proc_dointvec,
1da177e4
LT
2810 },
2811 {
1da177e4 2812 .procname = "gc_min_interval_ms",
4990509f 2813 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2814 .maxlen = sizeof(int),
2815 .mode = 0644,
6d9f239a 2816 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2817 },
f8572d8f 2818 { }
1da177e4
LT
2819};
2820
2c8c1e72 2821struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2822{
2823 struct ctl_table *table;
2824
2825 table = kmemdup(ipv6_route_table_template,
2826 sizeof(ipv6_route_table_template),
2827 GFP_KERNEL);
5ee09105
YH
2828
2829 if (table) {
2830 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2831 table[0].extra1 = net;
86393e52 2832 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2833 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2834 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2835 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2836 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2837 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2838 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2839 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2840 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2841 }
2842
760f2d01
DL
2843 return table;
2844}
1da177e4
LT
2845#endif
2846
2c8c1e72 2847static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2848{
633d424b 2849 int ret = -ENOMEM;
8ed67789 2850
86393e52
AD
2851 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2852 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2853
fc66f95c
ED
2854 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2855 goto out_ip6_dst_ops;
2856
8ed67789
DL
2857 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2858 sizeof(*net->ipv6.ip6_null_entry),
2859 GFP_KERNEL);
2860 if (!net->ipv6.ip6_null_entry)
fc66f95c 2861 goto out_ip6_dst_entries;
d8d1f30b 2862 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2863 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2864 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2865 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2866 ip6_template_metrics, true);
8ed67789
DL
2867
2868#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2869 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2870 sizeof(*net->ipv6.ip6_prohibit_entry),
2871 GFP_KERNEL);
68fffc67
PZ
2872 if (!net->ipv6.ip6_prohibit_entry)
2873 goto out_ip6_null_entry;
d8d1f30b 2874 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2875 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2876 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2877 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2878 ip6_template_metrics, true);
8ed67789
DL
2879
2880 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2881 sizeof(*net->ipv6.ip6_blk_hole_entry),
2882 GFP_KERNEL);
68fffc67
PZ
2883 if (!net->ipv6.ip6_blk_hole_entry)
2884 goto out_ip6_prohibit_entry;
d8d1f30b 2885 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2886 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2887 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2888 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2889 ip6_template_metrics, true);
8ed67789
DL
2890#endif
2891
b339a47c
PZ
2892 net->ipv6.sysctl.flush_delay = 0;
2893 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2894 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2895 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2896 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2897 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2898 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2899 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2900
cdb18761
DL
2901#ifdef CONFIG_PROC_FS
2902 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2903 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2904#endif
6891a346
BT
2905 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2906
8ed67789
DL
2907 ret = 0;
2908out:
2909 return ret;
f2fc6a54 2910
68fffc67
PZ
2911#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2912out_ip6_prohibit_entry:
2913 kfree(net->ipv6.ip6_prohibit_entry);
2914out_ip6_null_entry:
2915 kfree(net->ipv6.ip6_null_entry);
2916#endif
fc66f95c
ED
2917out_ip6_dst_entries:
2918 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2919out_ip6_dst_ops:
f2fc6a54 2920 goto out;
cdb18761
DL
2921}
2922
2c8c1e72 2923static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2924{
2925#ifdef CONFIG_PROC_FS
2926 proc_net_remove(net, "ipv6_route");
2927 proc_net_remove(net, "rt6_stats");
2928#endif
8ed67789
DL
2929 kfree(net->ipv6.ip6_null_entry);
2930#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2931 kfree(net->ipv6.ip6_prohibit_entry);
2932 kfree(net->ipv6.ip6_blk_hole_entry);
2933#endif
41bb78b4 2934 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2935}
2936
2937static struct pernet_operations ip6_route_net_ops = {
2938 .init = ip6_route_net_init,
2939 .exit = ip6_route_net_exit,
2940};
2941
8ed67789
DL
2942static struct notifier_block ip6_route_dev_notifier = {
2943 .notifier_call = ip6_route_dev_notify,
2944 .priority = 0,
2945};
2946
433d49c3 2947int __init ip6_route_init(void)
1da177e4 2948{
433d49c3
DL
2949 int ret;
2950
9a7ec3a9
DL
2951 ret = -ENOMEM;
2952 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2953 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2954 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2955 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2956 goto out;
14e50e57 2957
fc66f95c 2958 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2959 if (ret)
bdb3289f 2960 goto out_kmem_cache;
bdb3289f 2961
fc66f95c
ED
2962 ret = register_pernet_subsys(&ip6_route_net_ops);
2963 if (ret)
2964 goto out_dst_entries;
2965
5dc121e9
AE
2966 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2967
8ed67789
DL
2968 /* Registering of the loopback is done before this portion of code,
2969 * the loopback reference in rt6_info will not be taken, do it
2970 * manually for init_net */
d8d1f30b 2971 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2972 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2974 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2975 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2976 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2977 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2978 #endif
433d49c3
DL
2979 ret = fib6_init();
2980 if (ret)
8ed67789 2981 goto out_register_subsys;
433d49c3 2982
433d49c3
DL
2983 ret = xfrm6_init();
2984 if (ret)
cdb18761 2985 goto out_fib6_init;
c35b7e72 2986
433d49c3
DL
2987 ret = fib6_rules_init();
2988 if (ret)
2989 goto xfrm6_init;
7e5449c2 2990
433d49c3 2991 ret = -ENOBUFS;
c7ac8679
GR
2992 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2993 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2994 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2995 goto fib6_rules_init;
c127ea2c 2996
8ed67789 2997 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2998 if (ret)
2999 goto fib6_rules_init;
8ed67789 3000
433d49c3
DL
3001out:
3002 return ret;
3003
3004fib6_rules_init:
433d49c3
DL
3005 fib6_rules_cleanup();
3006xfrm6_init:
433d49c3 3007 xfrm6_fini();
433d49c3 3008out_fib6_init:
433d49c3 3009 fib6_gc_cleanup();
8ed67789
DL
3010out_register_subsys:
3011 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3012out_dst_entries:
3013 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3014out_kmem_cache:
f2fc6a54 3015 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3016 goto out;
1da177e4
LT
3017}
3018
3019void ip6_route_cleanup(void)
3020{
8ed67789 3021 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3022 fib6_rules_cleanup();
1da177e4 3023 xfrm6_fini();
1da177e4 3024 fib6_gc_cleanup();
8ed67789 3025 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3026 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3027 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3028}