]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
inet: Kill FLOWI_FLAG_PRECOW_METRICS.
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
1da177e4
LT
60
61#include <asm/uaccess.h>
62
63#ifdef CONFIG_SYSCTL
64#include <linux/sysctl.h>
65#endif
66
1716a961 67static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 68 const struct in6_addr *dest);
1da177e4 69static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 70static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 71static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
72static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73static void ip6_dst_destroy(struct dst_entry *);
74static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
569d3645 76static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
77
78static int ip6_pkt_discard(struct sk_buff *skb);
79static int ip6_pkt_discard_out(struct sk_buff *skb);
80static void ip6_link_failure(struct sk_buff *skb);
81static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
70ceb4f5 83#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 84static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
95c96174 87 unsigned int pref);
efa2cea0 88static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
91#endif
92
06582540
DM
93static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94{
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
8e2ec639
YZ
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
fbfe95a4 102 peer = rt6_get_peer_create(rt);
06582540
DM
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121}
122
f894cbf8
DM
123static inline const void *choose_neigh_daddr(struct rt6_info *rt,
124 struct sk_buff *skb,
125 const void *daddr)
39232973
DM
126{
127 struct in6_addr *p = &rt->rt6i_gateway;
128
a7563f34 129 if (!ipv6_addr_any(p))
39232973 130 return (const void *) p;
f894cbf8
DM
131 else if (skb)
132 return &ipv6_hdr(skb)->daddr;
39232973
DM
133 return daddr;
134}
135
f894cbf8
DM
136static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
137 struct sk_buff *skb,
138 const void *daddr)
d3aaeb38 139{
39232973
DM
140 struct rt6_info *rt = (struct rt6_info *) dst;
141 struct neighbour *n;
142
f894cbf8 143 daddr = choose_neigh_daddr(rt, skb, daddr);
39232973 144 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
f83c7790
DM
145 if (n)
146 return n;
147 return neigh_create(&nd_tbl, daddr, dst->dev);
148}
149
8ade06c6 150static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
f83c7790 151{
8ade06c6
DM
152 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
153 if (!n) {
154 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
155 if (IS_ERR(n))
156 return PTR_ERR(n);
157 }
97cac082 158 rt->n = n;
f83c7790
DM
159
160 return 0;
d3aaeb38
DM
161}
162
9a7ec3a9 163static struct dst_ops ip6_dst_ops_template = {
1da177e4 164 .family = AF_INET6,
09640e63 165 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
166 .gc = ip6_dst_gc,
167 .gc_thresh = 1024,
168 .check = ip6_dst_check,
0dbaee3b 169 .default_advmss = ip6_default_advmss,
ebb762f2 170 .mtu = ip6_mtu,
06582540 171 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
172 .destroy = ip6_dst_destroy,
173 .ifdown = ip6_dst_ifdown,
174 .negative_advice = ip6_negative_advice,
175 .link_failure = ip6_link_failure,
176 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 177 .local_out = __ip6_local_out,
d3aaeb38 178 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
179};
180
ebb762f2 181static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 182{
618f9bc7
SK
183 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185 return mtu ? : dst->dev->mtu;
ec831ea7
RD
186}
187
14e50e57
DM
188static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
189{
190}
191
0972ddb2
HB
192static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
193 unsigned long old)
194{
195 return NULL;
196}
197
14e50e57
DM
198static struct dst_ops ip6_dst_blackhole_ops = {
199 .family = AF_INET6,
09640e63 200 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
201 .destroy = ip6_dst_destroy,
202 .check = ip6_dst_check,
ebb762f2 203 .mtu = ip6_blackhole_mtu,
214f45c9 204 .default_advmss = ip6_default_advmss,
14e50e57 205 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 206 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 207 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
208};
209
62fa8a84
DM
210static const u32 ip6_template_metrics[RTAX_MAX] = {
211 [RTAX_HOPLIMIT - 1] = 255,
212};
213
bdb3289f 214static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
215 .dst = {
216 .__refcnt = ATOMIC_INIT(1),
217 .__use = 1,
218 .obsolete = -1,
219 .error = -ENETUNREACH,
d8d1f30b
CG
220 .input = ip6_pkt_discard,
221 .output = ip6_pkt_discard_out,
1da177e4
LT
222 },
223 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 224 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
225 .rt6i_metric = ~(u32) 0,
226 .rt6i_ref = ATOMIC_INIT(1),
227};
228
101367c2
TG
229#ifdef CONFIG_IPV6_MULTIPLE_TABLES
230
6723ab54
DM
231static int ip6_pkt_prohibit(struct sk_buff *skb);
232static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 233
280a34c8 234static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
235 .dst = {
236 .__refcnt = ATOMIC_INIT(1),
237 .__use = 1,
238 .obsolete = -1,
239 .error = -EACCES,
d8d1f30b
CG
240 .input = ip6_pkt_prohibit,
241 .output = ip6_pkt_prohibit_out,
101367c2
TG
242 },
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 244 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
247};
248
bdb3289f 249static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
250 .dst = {
251 .__refcnt = ATOMIC_INIT(1),
252 .__use = 1,
253 .obsolete = -1,
254 .error = -EINVAL,
d8d1f30b
CG
255 .input = dst_discard,
256 .output = dst_discard,
101367c2
TG
257 },
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 259 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
262};
263
264#endif
265
1da177e4 266/* allocate dst with ip6_dst_ops */
97bab73f 267static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 268 struct net_device *dev,
8b96d22d
DM
269 int flags,
270 struct fib6_table *table)
1da177e4 271{
97bab73f
DM
272 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
273 0, 0, flags);
cf911662 274
97bab73f 275 if (rt) {
a2de86f6 276 memset(&rt->n, 0,
38308473 277 sizeof(*rt) - sizeof(struct dst_entry));
8b96d22d 278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
97bab73f 279 }
cf911662 280 return rt;
1da177e4
LT
281}
282
283static void ip6_dst_destroy(struct dst_entry *dst)
284{
285 struct rt6_info *rt = (struct rt6_info *)dst;
286 struct inet6_dev *idev = rt->rt6i_idev;
287
97cac082
DM
288 if (rt->n)
289 neigh_release(rt->n);
290
8e2ec639
YZ
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
38308473 294 if (idev) {
1da177e4
LT
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
1ab1457c 297 }
1716a961
G
298
299 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300 dst_release(dst->from);
301
97bab73f
DM
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
304 inet_putpeer(peer);
305 }
306}
307
6431cbc2
DM
308static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
309
310static u32 rt6_peer_genid(void)
311{
312 return atomic_read(&__rt6_peer_genid);
313}
314
b3419363
DM
315void rt6_bind_peer(struct rt6_info *rt, int create)
316{
97bab73f 317 struct inet_peer_base *base;
b3419363
DM
318 struct inet_peer *peer;
319
97bab73f
DM
320 base = inetpeer_base_ptr(rt->_rt6i_peer);
321 if (!base)
322 return;
323
324 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
7b34ca2a
DM
325 if (peer) {
326 if (!rt6_set_peer(rt, peer))
327 inet_putpeer(peer);
328 else
329 rt->rt6i_peer_genid = rt6_peer_genid();
330 }
1da177e4
LT
331}
332
333static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
334 int how)
335{
336 struct rt6_info *rt = (struct rt6_info *)dst;
337 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 338 struct net_device *loopback_dev =
c346dca1 339 dev_net(dev)->loopback_dev;
1da177e4 340
97cac082
DM
341 if (dev != loopback_dev) {
342 if (idev && idev->dev == dev) {
343 struct inet6_dev *loopback_idev =
344 in6_dev_get(loopback_dev);
345 if (loopback_idev) {
346 rt->rt6i_idev = loopback_idev;
347 in6_dev_put(idev);
348 }
349 }
350 if (rt->n && rt->n->dev == dev) {
351 rt->n->dev = loopback_dev;
352 dev_hold(loopback_dev);
353 dev_put(dev);
1da177e4
LT
354 }
355 }
356}
357
a50feda5 358static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 359{
1716a961
G
360 struct rt6_info *ort = NULL;
361
362 if (rt->rt6i_flags & RTF_EXPIRES) {
363 if (time_after(jiffies, rt->dst.expires))
a50feda5 364 return true;
1716a961
G
365 } else if (rt->dst.from) {
366 ort = (struct rt6_info *) rt->dst.from;
367 return (ort->rt6i_flags & RTF_EXPIRES) &&
368 time_after(jiffies, ort->dst.expires);
369 }
a50feda5 370 return false;
1da177e4
LT
371}
372
a50feda5 373static bool rt6_need_strict(const struct in6_addr *daddr)
c71099ac 374{
a02cec21
ED
375 return ipv6_addr_type(daddr) &
376 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
377}
378
1da177e4 379/*
c71099ac 380 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
381 */
382
8ed67789
DL
383static inline struct rt6_info *rt6_device_match(struct net *net,
384 struct rt6_info *rt,
b71d1d42 385 const struct in6_addr *saddr,
1da177e4 386 int oif,
d420895e 387 int flags)
1da177e4
LT
388{
389 struct rt6_info *local = NULL;
390 struct rt6_info *sprt;
391
dd3abc4e
YH
392 if (!oif && ipv6_addr_any(saddr))
393 goto out;
394
d8d1f30b 395 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 396 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
397
398 if (oif) {
1da177e4
LT
399 if (dev->ifindex == oif)
400 return sprt;
401 if (dev->flags & IFF_LOOPBACK) {
38308473 402 if (!sprt->rt6i_idev ||
1da177e4 403 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 404 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 405 continue;
1ab1457c 406 if (local && (!oif ||
1da177e4
LT
407 local->rt6i_idev->dev->ifindex == oif))
408 continue;
409 }
410 local = sprt;
411 }
dd3abc4e
YH
412 } else {
413 if (ipv6_chk_addr(net, saddr, dev,
414 flags & RT6_LOOKUP_F_IFACE))
415 return sprt;
1da177e4 416 }
dd3abc4e 417 }
1da177e4 418
dd3abc4e 419 if (oif) {
1da177e4
LT
420 if (local)
421 return local;
422
d420895e 423 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 424 return net->ipv6.ip6_null_entry;
1da177e4 425 }
dd3abc4e 426out:
1da177e4
LT
427 return rt;
428}
429
27097255
YH
430#ifdef CONFIG_IPV6_ROUTER_PREF
431static void rt6_probe(struct rt6_info *rt)
432{
f2c31e32 433 struct neighbour *neigh;
27097255
YH
434 /*
435 * Okay, this does not seem to be appropriate
436 * for now, however, we need to check if it
437 * is really so; aka Router Reachability Probing.
438 *
439 * Router Reachability Probe MUST be rate-limited
440 * to no more than one per minute.
441 */
f2c31e32 442 rcu_read_lock();
97cac082 443 neigh = rt ? rt->n : NULL;
27097255 444 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 445 goto out;
27097255
YH
446 read_lock_bh(&neigh->lock);
447 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 448 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
449 struct in6_addr mcaddr;
450 struct in6_addr *target;
451
452 neigh->updated = jiffies;
453 read_unlock_bh(&neigh->lock);
454
455 target = (struct in6_addr *)&neigh->primary_key;
456 addrconf_addr_solict_mult(target, &mcaddr);
d1918542 457 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
f2c31e32 458 } else {
27097255 459 read_unlock_bh(&neigh->lock);
f2c31e32
ED
460 }
461out:
462 rcu_read_unlock();
27097255
YH
463}
464#else
465static inline void rt6_probe(struct rt6_info *rt)
466{
27097255
YH
467}
468#endif
469
1da177e4 470/*
554cfb7e 471 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 472 */
b6f99a21 473static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 474{
d1918542 475 struct net_device *dev = rt->dst.dev;
161980f4 476 if (!oif || dev->ifindex == oif)
554cfb7e 477 return 2;
161980f4
DM
478 if ((dev->flags & IFF_LOOPBACK) &&
479 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
480 return 1;
481 return 0;
554cfb7e 482}
1da177e4 483
b6f99a21 484static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 485{
f2c31e32 486 struct neighbour *neigh;
398bcbeb 487 int m;
f2c31e32
ED
488
489 rcu_read_lock();
97cac082 490 neigh = rt->n;
4d0c5911
YH
491 if (rt->rt6i_flags & RTF_NONEXTHOP ||
492 !(rt->rt6i_flags & RTF_GATEWAY))
493 m = 1;
494 else if (neigh) {
554cfb7e
YH
495 read_lock_bh(&neigh->lock);
496 if (neigh->nud_state & NUD_VALID)
4d0c5911 497 m = 2;
398bcbeb
YH
498#ifdef CONFIG_IPV6_ROUTER_PREF
499 else if (neigh->nud_state & NUD_FAILED)
500 m = 0;
501#endif
502 else
ea73ee23 503 m = 1;
554cfb7e 504 read_unlock_bh(&neigh->lock);
398bcbeb
YH
505 } else
506 m = 0;
f2c31e32 507 rcu_read_unlock();
554cfb7e 508 return m;
1da177e4
LT
509}
510
554cfb7e
YH
511static int rt6_score_route(struct rt6_info *rt, int oif,
512 int strict)
1da177e4 513{
4d0c5911 514 int m, n;
1ab1457c 515
4d0c5911 516 m = rt6_check_dev(rt, oif);
77d16f45 517 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 518 return -1;
ebacaaa0
YH
519#ifdef CONFIG_IPV6_ROUTER_PREF
520 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
521#endif
4d0c5911 522 n = rt6_check_neigh(rt);
557e92ef 523 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
524 return -1;
525 return m;
526}
527
f11e6659
DM
528static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
529 int *mpri, struct rt6_info *match)
554cfb7e 530{
f11e6659
DM
531 int m;
532
533 if (rt6_check_expired(rt))
534 goto out;
535
536 m = rt6_score_route(rt, oif, strict);
537 if (m < 0)
538 goto out;
539
540 if (m > *mpri) {
541 if (strict & RT6_LOOKUP_F_REACHABLE)
542 rt6_probe(match);
543 *mpri = m;
544 match = rt;
545 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
546 rt6_probe(rt);
547 }
548
549out:
550 return match;
551}
552
553static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
554 struct rt6_info *rr_head,
555 u32 metric, int oif, int strict)
556{
557 struct rt6_info *rt, *match;
554cfb7e 558 int mpri = -1;
1da177e4 559
f11e6659
DM
560 match = NULL;
561 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 562 rt = rt->dst.rt6_next)
f11e6659
DM
563 match = find_match(rt, oif, strict, &mpri, match);
564 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 565 rt = rt->dst.rt6_next)
f11e6659 566 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 567
f11e6659
DM
568 return match;
569}
1da177e4 570
f11e6659
DM
571static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
572{
573 struct rt6_info *match, *rt0;
8ed67789 574 struct net *net;
1da177e4 575
f11e6659
DM
576 rt0 = fn->rr_ptr;
577 if (!rt0)
578 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 579
f11e6659 580 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 581
554cfb7e 582 if (!match &&
f11e6659 583 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 584 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 585
554cfb7e 586 /* no entries matched; do round-robin */
f11e6659
DM
587 if (!next || next->rt6i_metric != rt0->rt6i_metric)
588 next = fn->leaf;
589
590 if (next != rt0)
591 fn->rr_ptr = next;
1da177e4 592 }
1da177e4 593
d1918542 594 net = dev_net(rt0->dst.dev);
a02cec21 595 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
596}
597
70ceb4f5
YH
598#ifdef CONFIG_IPV6_ROUTE_INFO
599int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 600 const struct in6_addr *gwaddr)
70ceb4f5 601{
c346dca1 602 struct net *net = dev_net(dev);
70ceb4f5
YH
603 struct route_info *rinfo = (struct route_info *) opt;
604 struct in6_addr prefix_buf, *prefix;
605 unsigned int pref;
4bed72e4 606 unsigned long lifetime;
70ceb4f5
YH
607 struct rt6_info *rt;
608
609 if (len < sizeof(struct route_info)) {
610 return -EINVAL;
611 }
612
613 /* Sanity check for prefix_len and length */
614 if (rinfo->length > 3) {
615 return -EINVAL;
616 } else if (rinfo->prefix_len > 128) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 64) {
619 if (rinfo->length < 2) {
620 return -EINVAL;
621 }
622 } else if (rinfo->prefix_len > 0) {
623 if (rinfo->length < 1) {
624 return -EINVAL;
625 }
626 }
627
628 pref = rinfo->route_pref;
629 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 630 return -EINVAL;
70ceb4f5 631
4bed72e4 632 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
633
634 if (rinfo->length == 3)
635 prefix = (struct in6_addr *)rinfo->prefix;
636 else {
637 /* this function is safe */
638 ipv6_addr_prefix(&prefix_buf,
639 (struct in6_addr *)rinfo->prefix,
640 rinfo->prefix_len);
641 prefix = &prefix_buf;
642 }
643
efa2cea0
DL
644 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
645 dev->ifindex);
70ceb4f5
YH
646
647 if (rt && !lifetime) {
e0a1ad73 648 ip6_del_rt(rt);
70ceb4f5
YH
649 rt = NULL;
650 }
651
652 if (!rt && lifetime)
efa2cea0 653 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
654 pref);
655 else if (rt)
656 rt->rt6i_flags = RTF_ROUTEINFO |
657 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
658
659 if (rt) {
1716a961
G
660 if (!addrconf_finite_timeout(lifetime))
661 rt6_clean_expires(rt);
662 else
663 rt6_set_expires(rt, jiffies + HZ * lifetime);
664
d8d1f30b 665 dst_release(&rt->dst);
70ceb4f5
YH
666 }
667 return 0;
668}
669#endif
670
8ed67789 671#define BACKTRACK(__net, saddr) \
982f56f3 672do { \
8ed67789 673 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 674 struct fib6_node *pn; \
e0eda7bb 675 while (1) { \
982f56f3
YH
676 if (fn->fn_flags & RTN_TL_ROOT) \
677 goto out; \
678 pn = fn->parent; \
679 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 680 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
681 else \
682 fn = pn; \
683 if (fn->fn_flags & RTN_RTINFO) \
684 goto restart; \
c71099ac 685 } \
c71099ac 686 } \
38308473 687} while (0)
c71099ac 688
8ed67789
DL
689static struct rt6_info *ip6_pol_route_lookup(struct net *net,
690 struct fib6_table *table,
4c9483b2 691 struct flowi6 *fl6, int flags)
1da177e4
LT
692{
693 struct fib6_node *fn;
694 struct rt6_info *rt;
695
c71099ac 696 read_lock_bh(&table->tb6_lock);
4c9483b2 697 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
698restart:
699 rt = fn->leaf;
4c9483b2
DM
700 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
701 BACKTRACK(net, &fl6->saddr);
c71099ac 702out:
d8d1f30b 703 dst_use(&rt->dst, jiffies);
c71099ac 704 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
705 return rt;
706
707}
708
ea6e574e
FW
709struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
710 int flags)
711{
712 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
713}
714EXPORT_SYMBOL_GPL(ip6_route_lookup);
715
9acd9f3a
YH
716struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
717 const struct in6_addr *saddr, int oif, int strict)
c71099ac 718{
4c9483b2
DM
719 struct flowi6 fl6 = {
720 .flowi6_oif = oif,
721 .daddr = *daddr,
c71099ac
TG
722 };
723 struct dst_entry *dst;
77d16f45 724 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 725
adaa70bb 726 if (saddr) {
4c9483b2 727 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
728 flags |= RT6_LOOKUP_F_HAS_SADDR;
729 }
730
4c9483b2 731 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
732 if (dst->error == 0)
733 return (struct rt6_info *) dst;
734
735 dst_release(dst);
736
1da177e4
LT
737 return NULL;
738}
739
7159039a
YH
740EXPORT_SYMBOL(rt6_lookup);
741
c71099ac 742/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
743 It takes new route entry, the addition fails by any reason the
744 route is freed. In any case, if caller does not hold it, it may
745 be destroyed.
746 */
747
86872cb5 748static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
749{
750 int err;
c71099ac 751 struct fib6_table *table;
1da177e4 752
c71099ac
TG
753 table = rt->rt6i_table;
754 write_lock_bh(&table->tb6_lock);
86872cb5 755 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 756 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
757
758 return err;
759}
760
40e22e8f
TG
761int ip6_ins_rt(struct rt6_info *rt)
762{
4d1169c1 763 struct nl_info info = {
d1918542 764 .nl_net = dev_net(rt->dst.dev),
4d1169c1 765 };
528c4ceb 766 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
767}
768
1716a961 769static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 770 const struct in6_addr *daddr,
b71d1d42 771 const struct in6_addr *saddr)
1da177e4 772{
1da177e4
LT
773 struct rt6_info *rt;
774
775 /*
776 * Clone the route.
777 */
778
21efcfa0 779 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
780
781 if (rt) {
14deae41
DM
782 int attempts = !in_softirq();
783
38308473 784 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
bb3c3686 785 if (ort->rt6i_dst.plen != 128 &&
21efcfa0 786 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 787 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 788 rt->rt6i_gateway = *daddr;
58c4fb86 789 }
1da177e4 790
1da177e4 791 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
792
793#ifdef CONFIG_IPV6_SUBTREES
794 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 795 rt->rt6i_src.addr = *saddr;
1da177e4
LT
796 rt->rt6i_src.plen = 128;
797 }
798#endif
799
14deae41 800 retry:
8ade06c6 801 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
d1918542 802 struct net *net = dev_net(rt->dst.dev);
14deae41
DM
803 int saved_rt_min_interval =
804 net->ipv6.sysctl.ip6_rt_gc_min_interval;
805 int saved_rt_elasticity =
806 net->ipv6.sysctl.ip6_rt_gc_elasticity;
807
808 if (attempts-- > 0) {
809 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
810 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
811
86393e52 812 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
813
814 net->ipv6.sysctl.ip6_rt_gc_elasticity =
815 saved_rt_elasticity;
816 net->ipv6.sysctl.ip6_rt_gc_min_interval =
817 saved_rt_min_interval;
818 goto retry;
819 }
820
f3213831 821 net_warn_ratelimited("Neighbour table overflow\n");
d8d1f30b 822 dst_free(&rt->dst);
14deae41
DM
823 return NULL;
824 }
95a9a5ba 825 }
1da177e4 826
95a9a5ba
YH
827 return rt;
828}
1da177e4 829
21efcfa0
ED
830static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
831 const struct in6_addr *daddr)
299d9939 832{
21efcfa0
ED
833 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
834
299d9939 835 if (rt) {
299d9939 836 rt->rt6i_flags |= RTF_CACHE;
97cac082 837 rt->n = neigh_clone(ort->n);
299d9939
YH
838 }
839 return rt;
840}
841
8ed67789 842static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 843 struct flowi6 *fl6, int flags)
1da177e4
LT
844{
845 struct fib6_node *fn;
519fbd87 846 struct rt6_info *rt, *nrt;
c71099ac 847 int strict = 0;
1da177e4 848 int attempts = 3;
519fbd87 849 int err;
53b7997f 850 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 851
77d16f45 852 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
853
854relookup:
c71099ac 855 read_lock_bh(&table->tb6_lock);
1da177e4 856
8238dd06 857restart_2:
4c9483b2 858 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
859
860restart:
4acad72d 861 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 862
4c9483b2 863 BACKTRACK(net, &fl6->saddr);
8ed67789 864 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 865 rt->rt6i_flags & RTF_CACHE)
1ddef044 866 goto out;
1da177e4 867
d8d1f30b 868 dst_hold(&rt->dst);
c71099ac 869 read_unlock_bh(&table->tb6_lock);
fb9de91e 870
97cac082 871 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 872 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 873 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 874 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
875 else
876 goto out2;
e40cf353 877
d8d1f30b 878 dst_release(&rt->dst);
8ed67789 879 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 880
d8d1f30b 881 dst_hold(&rt->dst);
519fbd87 882 if (nrt) {
40e22e8f 883 err = ip6_ins_rt(nrt);
519fbd87 884 if (!err)
1da177e4 885 goto out2;
1da177e4 886 }
1da177e4 887
519fbd87
YH
888 if (--attempts <= 0)
889 goto out2;
890
891 /*
c71099ac 892 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
893 * released someone could insert this route. Relookup.
894 */
d8d1f30b 895 dst_release(&rt->dst);
519fbd87
YH
896 goto relookup;
897
898out:
8238dd06
YH
899 if (reachable) {
900 reachable = 0;
901 goto restart_2;
902 }
d8d1f30b 903 dst_hold(&rt->dst);
c71099ac 904 read_unlock_bh(&table->tb6_lock);
1da177e4 905out2:
d8d1f30b
CG
906 rt->dst.lastuse = jiffies;
907 rt->dst.__use++;
c71099ac
TG
908
909 return rt;
1da177e4
LT
910}
911
8ed67789 912static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 913 struct flowi6 *fl6, int flags)
4acad72d 914{
4c9483b2 915 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
916}
917
72331bc0
SL
918static struct dst_entry *ip6_route_input_lookup(struct net *net,
919 struct net_device *dev,
920 struct flowi6 *fl6, int flags)
921{
922 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
923 flags |= RT6_LOOKUP_F_IFACE;
924
925 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
926}
927
c71099ac
TG
928void ip6_route_input(struct sk_buff *skb)
929{
b71d1d42 930 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 931 struct net *net = dev_net(skb->dev);
adaa70bb 932 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
933 struct flowi6 fl6 = {
934 .flowi6_iif = skb->dev->ifindex,
935 .daddr = iph->daddr,
936 .saddr = iph->saddr,
38308473 937 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
938 .flowi6_mark = skb->mark,
939 .flowi6_proto = iph->nexthdr,
c71099ac 940 };
adaa70bb 941
72331bc0 942 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
943}
944
8ed67789 945static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 946 struct flowi6 *fl6, int flags)
1da177e4 947{
4c9483b2 948 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
949}
950
9c7a4f9c 951struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 952 struct flowi6 *fl6)
c71099ac
TG
953{
954 int flags = 0;
955
4dc27d1c
DM
956 fl6->flowi6_iif = net->loopback_dev->ifindex;
957
4c9483b2 958 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 959 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 960
4c9483b2 961 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 962 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
963 else if (sk)
964 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 965
4c9483b2 966 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
967}
968
7159039a 969EXPORT_SYMBOL(ip6_route_output);
1da177e4 970
2774c131 971struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 972{
5c1e6aa3 973 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
974 struct dst_entry *new = NULL;
975
5c1e6aa3 976 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 977 if (rt) {
cf911662 978 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
97bab73f 979 rt6_init_peer(rt, net->ipv6.peers);
cf911662 980
d8d1f30b 981 new = &rt->dst;
14e50e57 982
14e50e57 983 new->__use = 1;
352e512c
HX
984 new->input = dst_discard;
985 new->output = dst_discard;
14e50e57 986
21efcfa0
ED
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
989 else
990 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
991 rt->rt6i_idev = ort->rt6i_idev;
992 if (rt->rt6i_idev)
993 in6_dev_hold(rt->rt6i_idev);
14e50e57 994
4e3fd7a0 995 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
996 rt->rt6i_flags = ort->rt6i_flags;
997 rt6_clean_expires(rt);
14e50e57
DM
998 rt->rt6i_metric = 0;
999
1000 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1001#ifdef CONFIG_IPV6_SUBTREES
1002 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1003#endif
1004
1005 dst_free(new);
1006 }
1007
69ead7af
DM
1008 dst_release(dst_orig);
1009 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1010}
14e50e57 1011
1da177e4
LT
1012/*
1013 * Destination cache support functions
1014 */
1015
1016static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1017{
1018 struct rt6_info *rt;
1019
1020 rt = (struct rt6_info *) dst;
1021
6431cbc2
DM
1022 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1023 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
97bab73f 1024 if (!rt6_has_peer(rt))
6431cbc2
DM
1025 rt6_bind_peer(rt, 0);
1026 rt->rt6i_peer_genid = rt6_peer_genid();
1027 }
1da177e4 1028 return dst;
6431cbc2 1029 }
1da177e4
LT
1030 return NULL;
1031}
1032
1033static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1034{
1035 struct rt6_info *rt = (struct rt6_info *) dst;
1036
1037 if (rt) {
54c1a859
YH
1038 if (rt->rt6i_flags & RTF_CACHE) {
1039 if (rt6_check_expired(rt)) {
1040 ip6_del_rt(rt);
1041 dst = NULL;
1042 }
1043 } else {
1da177e4 1044 dst_release(dst);
54c1a859
YH
1045 dst = NULL;
1046 }
1da177e4 1047 }
54c1a859 1048 return dst;
1da177e4
LT
1049}
1050
1051static void ip6_link_failure(struct sk_buff *skb)
1052{
1053 struct rt6_info *rt;
1054
3ffe533c 1055 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1056
adf30907 1057 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1058 if (rt) {
1716a961
G
1059 if (rt->rt6i_flags & RTF_CACHE)
1060 rt6_update_expires(rt, 0);
1061 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1da177e4
LT
1062 rt->rt6i_node->fn_sernum = -1;
1063 }
1064}
1065
1066static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1067{
1068 struct rt6_info *rt6 = (struct rt6_info*)dst;
1069
81aded24 1070 dst_confirm(dst);
1da177e4 1071 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
81aded24
DM
1072 struct net *net = dev_net(dst->dev);
1073
1da177e4
LT
1074 rt6->rt6i_flags |= RTF_MODIFIED;
1075 if (mtu < IPV6_MIN_MTU) {
defb3519 1076 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1077 mtu = IPV6_MIN_MTU;
defb3519
DM
1078 features |= RTAX_FEATURE_ALLFRAG;
1079 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1080 }
defb3519 1081 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1082 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1083 }
1084}
1085
42ae66c8
DM
1086void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1087 int oif, u32 mark)
81aded24
DM
1088{
1089 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1090 struct dst_entry *dst;
1091 struct flowi6 fl6;
1092
1093 memset(&fl6, 0, sizeof(fl6));
1094 fl6.flowi6_oif = oif;
1095 fl6.flowi6_mark = mark;
3e12939a 1096 fl6.flowi6_flags = 0;
81aded24
DM
1097 fl6.daddr = iph->daddr;
1098 fl6.saddr = iph->saddr;
1099 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1100
1101 dst = ip6_route_output(net, NULL, &fl6);
1102 if (!dst->error)
1103 ip6_rt_update_pmtu(dst, ntohl(mtu));
1104 dst_release(dst);
1105}
1106EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1107
1108void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1109{
1110 ip6_update_pmtu(skb, sock_net(sk), mtu,
1111 sk->sk_bound_dev_if, sk->sk_mark);
1112}
1113EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1114
0dbaee3b 1115static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1116{
0dbaee3b
DM
1117 struct net_device *dev = dst->dev;
1118 unsigned int mtu = dst_mtu(dst);
1119 struct net *net = dev_net(dev);
1120
1da177e4
LT
1121 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1122
5578689a
DL
1123 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1124 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1125
1126 /*
1ab1457c
YH
1127 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1128 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1129 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1130 * rely only on pmtu discovery"
1131 */
1132 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1133 mtu = IPV6_MAXPLEN;
1134 return mtu;
1135}
1136
ebb762f2 1137static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1138{
d33e4553 1139 struct inet6_dev *idev;
618f9bc7
SK
1140 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1141
1142 if (mtu)
1143 return mtu;
1144
1145 mtu = IPV6_MIN_MTU;
d33e4553
DM
1146
1147 rcu_read_lock();
1148 idev = __in6_dev_get(dst->dev);
1149 if (idev)
1150 mtu = idev->cnf.mtu6;
1151 rcu_read_unlock();
1152
1153 return mtu;
1154}
1155
3b00944c
YH
1156static struct dst_entry *icmp6_dst_gc_list;
1157static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1158
3b00944c 1159struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1160 struct neighbour *neigh,
87a11578 1161 struct flowi6 *fl6)
1da177e4 1162{
87a11578 1163 struct dst_entry *dst;
1da177e4
LT
1164 struct rt6_info *rt;
1165 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1166 struct net *net = dev_net(dev);
1da177e4 1167
38308473 1168 if (unlikely(!idev))
122bdf67 1169 return ERR_PTR(-ENODEV);
1da177e4 1170
8b96d22d 1171 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1172 if (unlikely(!rt)) {
1da177e4 1173 in6_dev_put(idev);
87a11578 1174 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1175 goto out;
1176 }
1177
1da177e4
LT
1178 if (neigh)
1179 neigh_hold(neigh);
14deae41 1180 else {
f894cbf8 1181 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
b43faac6 1182 if (IS_ERR(neigh)) {
252c3d84 1183 in6_dev_put(idev);
b43faac6
DM
1184 dst_free(&rt->dst);
1185 return ERR_CAST(neigh);
1186 }
14deae41 1187 }
1da177e4 1188
8e2ec639
YZ
1189 rt->dst.flags |= DST_HOST;
1190 rt->dst.output = ip6_output;
97cac082 1191 rt->n = neigh;
d8d1f30b 1192 atomic_set(&rt->dst.__refcnt, 1);
87a11578 1193 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1194 rt->rt6i_dst.plen = 128;
1195 rt->rt6i_idev = idev;
7011687f 1196 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1197
3b00944c 1198 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1199 rt->dst.next = icmp6_dst_gc_list;
1200 icmp6_dst_gc_list = &rt->dst;
3b00944c 1201 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1202
5578689a 1203 fib6_force_start_gc(net);
1da177e4 1204
87a11578
DM
1205 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1206
1da177e4 1207out:
87a11578 1208 return dst;
1da177e4
LT
1209}
1210
3d0f24a7 1211int icmp6_dst_gc(void)
1da177e4 1212{
e9476e95 1213 struct dst_entry *dst, **pprev;
3d0f24a7 1214 int more = 0;
1da177e4 1215
3b00944c
YH
1216 spin_lock_bh(&icmp6_dst_lock);
1217 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1218
1da177e4
LT
1219 while ((dst = *pprev) != NULL) {
1220 if (!atomic_read(&dst->__refcnt)) {
1221 *pprev = dst->next;
1222 dst_free(dst);
1da177e4
LT
1223 } else {
1224 pprev = &dst->next;
3d0f24a7 1225 ++more;
1da177e4
LT
1226 }
1227 }
1228
3b00944c 1229 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1230
3d0f24a7 1231 return more;
1da177e4
LT
1232}
1233
1e493d19
DM
1234static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1235 void *arg)
1236{
1237 struct dst_entry *dst, **pprev;
1238
1239 spin_lock_bh(&icmp6_dst_lock);
1240 pprev = &icmp6_dst_gc_list;
1241 while ((dst = *pprev) != NULL) {
1242 struct rt6_info *rt = (struct rt6_info *) dst;
1243 if (func(rt, arg)) {
1244 *pprev = dst->next;
1245 dst_free(dst);
1246 } else {
1247 pprev = &dst->next;
1248 }
1249 }
1250 spin_unlock_bh(&icmp6_dst_lock);
1251}
1252
569d3645 1253static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1254{
1da177e4 1255 unsigned long now = jiffies;
86393e52 1256 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1257 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1258 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1259 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1260 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1261 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1262 int entries;
7019b78e 1263
fc66f95c 1264 entries = dst_entries_get_fast(ops);
7019b78e 1265 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1266 entries <= rt_max_size)
1da177e4
LT
1267 goto out;
1268
6891a346
BT
1269 net->ipv6.ip6_rt_gc_expire++;
1270 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1271 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1272 entries = dst_entries_get_slow(ops);
1273 if (entries < ops->gc_thresh)
7019b78e 1274 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1275out:
7019b78e 1276 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1277 return entries > rt_max_size;
1da177e4
LT
1278}
1279
1280/* Clean host part of a prefix. Not necessary in radix tree,
1281 but results in cleaner routing tables.
1282
1283 Remove it only when all the things will work!
1284 */
1285
6b75d090 1286int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1287{
5170ae82 1288 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1289 if (hoplimit == 0) {
6b75d090 1290 struct net_device *dev = dst->dev;
c68f24cc
ED
1291 struct inet6_dev *idev;
1292
1293 rcu_read_lock();
1294 idev = __in6_dev_get(dev);
1295 if (idev)
6b75d090 1296 hoplimit = idev->cnf.hop_limit;
c68f24cc 1297 else
53b7997f 1298 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1299 rcu_read_unlock();
1da177e4
LT
1300 }
1301 return hoplimit;
1302}
abbf46ae 1303EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1304
1305/*
1306 *
1307 */
1308
86872cb5 1309int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1310{
1311 int err;
5578689a 1312 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1313 struct rt6_info *rt = NULL;
1314 struct net_device *dev = NULL;
1315 struct inet6_dev *idev = NULL;
c71099ac 1316 struct fib6_table *table;
1da177e4
LT
1317 int addr_type;
1318
86872cb5 1319 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1320 return -EINVAL;
1321#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1322 if (cfg->fc_src_len)
1da177e4
LT
1323 return -EINVAL;
1324#endif
86872cb5 1325 if (cfg->fc_ifindex) {
1da177e4 1326 err = -ENODEV;
5578689a 1327 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1328 if (!dev)
1329 goto out;
1330 idev = in6_dev_get(dev);
1331 if (!idev)
1332 goto out;
1333 }
1334
86872cb5
TG
1335 if (cfg->fc_metric == 0)
1336 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1337
d71314b4 1338 err = -ENOBUFS;
38308473
DM
1339 if (cfg->fc_nlinfo.nlh &&
1340 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1341 table = fib6_get_table(net, cfg->fc_table);
38308473 1342 if (!table) {
f3213831 1343 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1344 table = fib6_new_table(net, cfg->fc_table);
1345 }
1346 } else {
1347 table = fib6_new_table(net, cfg->fc_table);
1348 }
38308473
DM
1349
1350 if (!table)
c71099ac 1351 goto out;
c71099ac 1352
8b96d22d 1353 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1da177e4 1354
38308473 1355 if (!rt) {
1da177e4
LT
1356 err = -ENOMEM;
1357 goto out;
1358 }
1359
d8d1f30b 1360 rt->dst.obsolete = -1;
1716a961
G
1361
1362 if (cfg->fc_flags & RTF_EXPIRES)
1363 rt6_set_expires(rt, jiffies +
1364 clock_t_to_jiffies(cfg->fc_expires));
1365 else
1366 rt6_clean_expires(rt);
1da177e4 1367
86872cb5
TG
1368 if (cfg->fc_protocol == RTPROT_UNSPEC)
1369 cfg->fc_protocol = RTPROT_BOOT;
1370 rt->rt6i_protocol = cfg->fc_protocol;
1371
1372 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1373
1374 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1375 rt->dst.input = ip6_mc_input;
ab79ad14
1376 else if (cfg->fc_flags & RTF_LOCAL)
1377 rt->dst.input = ip6_input;
1da177e4 1378 else
d8d1f30b 1379 rt->dst.input = ip6_forward;
1da177e4 1380
d8d1f30b 1381 rt->dst.output = ip6_output;
1da177e4 1382
86872cb5
TG
1383 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1384 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1385 if (rt->rt6i_dst.plen == 128)
11d53b49 1386 rt->dst.flags |= DST_HOST;
1da177e4 1387
8e2ec639
YZ
1388 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1389 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1390 if (!metrics) {
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394 dst_init_metrics(&rt->dst, metrics, 0);
1395 }
1da177e4 1396#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1397 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1398 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1399#endif
1400
86872cb5 1401 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1402
1403 /* We cannot add true routes via loopback here,
1404 they would result in kernel looping; promote them to reject routes
1405 */
86872cb5 1406 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1407 (dev && (dev->flags & IFF_LOOPBACK) &&
1408 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1409 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1410 /* hold loopback dev/idev if we haven't done so. */
5578689a 1411 if (dev != net->loopback_dev) {
1da177e4
LT
1412 if (dev) {
1413 dev_put(dev);
1414 in6_dev_put(idev);
1415 }
5578689a 1416 dev = net->loopback_dev;
1da177e4
LT
1417 dev_hold(dev);
1418 idev = in6_dev_get(dev);
1419 if (!idev) {
1420 err = -ENODEV;
1421 goto out;
1422 }
1423 }
d8d1f30b
CG
1424 rt->dst.output = ip6_pkt_discard_out;
1425 rt->dst.input = ip6_pkt_discard;
1426 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1427 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1428 goto install_route;
1429 }
1430
86872cb5 1431 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1432 const struct in6_addr *gw_addr;
1da177e4
LT
1433 int gwa_type;
1434
86872cb5 1435 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1436 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1437 gwa_type = ipv6_addr_type(gw_addr);
1438
1439 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1440 struct rt6_info *grt;
1441
1442 /* IPv6 strictly inhibits using not link-local
1443 addresses as nexthop address.
1444 Otherwise, router will not able to send redirects.
1445 It is very good, but in some (rare!) circumstances
1446 (SIT, PtP, NBMA NOARP links) it is handy to allow
1447 some exceptions. --ANK
1448 */
1449 err = -EINVAL;
38308473 1450 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1451 goto out;
1452
5578689a 1453 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1454
1455 err = -EHOSTUNREACH;
38308473 1456 if (!grt)
1da177e4
LT
1457 goto out;
1458 if (dev) {
d1918542 1459 if (dev != grt->dst.dev) {
d8d1f30b 1460 dst_release(&grt->dst);
1da177e4
LT
1461 goto out;
1462 }
1463 } else {
d1918542 1464 dev = grt->dst.dev;
1da177e4
LT
1465 idev = grt->rt6i_idev;
1466 dev_hold(dev);
1467 in6_dev_hold(grt->rt6i_idev);
1468 }
38308473 1469 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1470 err = 0;
d8d1f30b 1471 dst_release(&grt->dst);
1da177e4
LT
1472
1473 if (err)
1474 goto out;
1475 }
1476 err = -EINVAL;
38308473 1477 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1478 goto out;
1479 }
1480
1481 err = -ENODEV;
38308473 1482 if (!dev)
1da177e4
LT
1483 goto out;
1484
c3968a85
DW
1485 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1486 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1487 err = -EINVAL;
1488 goto out;
1489 }
4e3fd7a0 1490 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1491 rt->rt6i_prefsrc.plen = 128;
1492 } else
1493 rt->rt6i_prefsrc.plen = 0;
1494
86872cb5 1495 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
8ade06c6 1496 err = rt6_bind_neighbour(rt, dev);
f83c7790 1497 if (err)
1da177e4 1498 goto out;
1da177e4
LT
1499 }
1500
86872cb5 1501 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1502
1503install_route:
86872cb5
TG
1504 if (cfg->fc_mx) {
1505 struct nlattr *nla;
1506 int remaining;
1507
1508 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1509 int type = nla_type(nla);
86872cb5
TG
1510
1511 if (type) {
1512 if (type > RTAX_MAX) {
1da177e4
LT
1513 err = -EINVAL;
1514 goto out;
1515 }
86872cb5 1516
defb3519 1517 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1518 }
1da177e4
LT
1519 }
1520 }
1521
d8d1f30b 1522 rt->dst.dev = dev;
1da177e4 1523 rt->rt6i_idev = idev;
c71099ac 1524 rt->rt6i_table = table;
63152fc0 1525
c346dca1 1526 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1527
86872cb5 1528 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1529
1530out:
1531 if (dev)
1532 dev_put(dev);
1533 if (idev)
1534 in6_dev_put(idev);
1535 if (rt)
d8d1f30b 1536 dst_free(&rt->dst);
1da177e4
LT
1537 return err;
1538}
1539
86872cb5 1540static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1541{
1542 int err;
c71099ac 1543 struct fib6_table *table;
d1918542 1544 struct net *net = dev_net(rt->dst.dev);
1da177e4 1545
8ed67789 1546 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1547 return -ENOENT;
1548
c71099ac
TG
1549 table = rt->rt6i_table;
1550 write_lock_bh(&table->tb6_lock);
1da177e4 1551
86872cb5 1552 err = fib6_del(rt, info);
d8d1f30b 1553 dst_release(&rt->dst);
1da177e4 1554
c71099ac 1555 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1556
1557 return err;
1558}
1559
e0a1ad73
TG
1560int ip6_del_rt(struct rt6_info *rt)
1561{
4d1169c1 1562 struct nl_info info = {
d1918542 1563 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1564 };
528c4ceb 1565 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1566}
1567
86872cb5 1568static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1569{
c71099ac 1570 struct fib6_table *table;
1da177e4
LT
1571 struct fib6_node *fn;
1572 struct rt6_info *rt;
1573 int err = -ESRCH;
1574
5578689a 1575 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1576 if (!table)
c71099ac
TG
1577 return err;
1578
1579 read_lock_bh(&table->tb6_lock);
1da177e4 1580
c71099ac 1581 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1582 &cfg->fc_dst, cfg->fc_dst_len,
1583 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1584
1da177e4 1585 if (fn) {
d8d1f30b 1586 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1587 if (cfg->fc_ifindex &&
d1918542
DM
1588 (!rt->dst.dev ||
1589 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1590 continue;
86872cb5
TG
1591 if (cfg->fc_flags & RTF_GATEWAY &&
1592 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1593 continue;
86872cb5 1594 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1595 continue;
d8d1f30b 1596 dst_hold(&rt->dst);
c71099ac 1597 read_unlock_bh(&table->tb6_lock);
1da177e4 1598
86872cb5 1599 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1600 }
1601 }
c71099ac 1602 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1603
1604 return err;
1605}
1606
1607/*
1608 * Handle redirects
1609 */
a6279458 1610struct ip6rd_flowi {
4c9483b2 1611 struct flowi6 fl6;
a6279458
YH
1612 struct in6_addr gateway;
1613};
1614
8ed67789
DL
1615static struct rt6_info *__ip6_route_redirect(struct net *net,
1616 struct fib6_table *table,
4c9483b2 1617 struct flowi6 *fl6,
a6279458 1618 int flags)
1da177e4 1619{
4c9483b2 1620 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1621 struct rt6_info *rt;
e843b9e1 1622 struct fib6_node *fn;
c71099ac 1623
1da177e4 1624 /*
e843b9e1
YH
1625 * Get the "current" route for this destination and
1626 * check if the redirect has come from approriate router.
1627 *
1628 * RFC 2461 specifies that redirects should only be
1629 * accepted if they come from the nexthop to the target.
1630 * Due to the way the routes are chosen, this notion
1631 * is a bit fuzzy and one might need to check all possible
1632 * routes.
1da177e4 1633 */
1da177e4 1634
c71099ac 1635 read_lock_bh(&table->tb6_lock);
4c9483b2 1636 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1637restart:
d8d1f30b 1638 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1639 /*
1640 * Current route is on-link; redirect is always invalid.
1641 *
1642 * Seems, previous statement is not true. It could
1643 * be node, which looks for us as on-link (f.e. proxy ndisc)
1644 * But then router serving it might decide, that we should
1645 * know truth 8)8) --ANK (980726).
1646 */
1647 if (rt6_check_expired(rt))
1648 continue;
1649 if (!(rt->rt6i_flags & RTF_GATEWAY))
1650 continue;
d1918542 1651 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
e843b9e1 1652 continue;
a6279458 1653 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1654 continue;
1655 break;
1656 }
a6279458 1657
cb15d9c2 1658 if (!rt)
8ed67789 1659 rt = net->ipv6.ip6_null_entry;
4c9483b2 1660 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1661out:
d8d1f30b 1662 dst_hold(&rt->dst);
a6279458 1663
c71099ac 1664 read_unlock_bh(&table->tb6_lock);
e843b9e1 1665
a6279458
YH
1666 return rt;
1667};
1668
b71d1d42
ED
1669static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1670 const struct in6_addr *src,
1671 const struct in6_addr *gateway,
a6279458
YH
1672 struct net_device *dev)
1673{
adaa70bb 1674 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1675 struct net *net = dev_net(dev);
a6279458 1676 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1677 .fl6 = {
1678 .flowi6_oif = dev->ifindex,
1679 .daddr = *dest,
1680 .saddr = *src,
a6279458 1681 },
a6279458 1682 };
adaa70bb 1683
4e3fd7a0 1684 rdfl.gateway = *gateway;
86c36ce4 1685
adaa70bb
TG
1686 if (rt6_need_strict(dest))
1687 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1688
4c9483b2 1689 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1690 flags, __ip6_route_redirect);
a6279458
YH
1691}
1692
b71d1d42
ED
1693void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1694 const struct in6_addr *saddr,
a6279458
YH
1695 struct neighbour *neigh, u8 *lladdr, int on_link)
1696{
1697 struct rt6_info *rt, *nrt = NULL;
1698 struct netevent_redirect netevent;
c346dca1 1699 struct net *net = dev_net(neigh->dev);
1d248b1c 1700 struct neighbour *old_neigh;
a6279458
YH
1701
1702 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1703
8ed67789 1704 if (rt == net->ipv6.ip6_null_entry) {
e87cc472 1705 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
a6279458 1706 goto out;
1da177e4
LT
1707 }
1708
1da177e4
LT
1709 /*
1710 * We have finally decided to accept it.
1711 */
1712
1ab1457c 1713 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1714 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1715 NEIGH_UPDATE_F_OVERRIDE|
1716 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1717 NEIGH_UPDATE_F_ISROUTER))
1718 );
1719
1720 /*
1721 * Redirect received -> path was valid.
1722 * Look, redirects are sent only in response to data packets,
1723 * so that this nexthop apparently is reachable. --ANK
1724 */
d8d1f30b 1725 dst_confirm(&rt->dst);
1da177e4
LT
1726
1727 /* Duplicate redirect: silently ignore. */
97cac082 1728 old_neigh = rt->n;
1d248b1c 1729 if (neigh == old_neigh)
1da177e4
LT
1730 goto out;
1731
21efcfa0 1732 nrt = ip6_rt_copy(rt, dest);
38308473 1733 if (!nrt)
1da177e4
LT
1734 goto out;
1735
1736 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 if (on_link)
1738 nrt->rt6i_flags &= ~RTF_GATEWAY;
1739
4e3fd7a0 1740 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
97cac082 1741 nrt->n = neigh_clone(neigh);
1da177e4 1742
40e22e8f 1743 if (ip6_ins_rt(nrt))
1da177e4
LT
1744 goto out;
1745
d8d1f30b 1746 netevent.old = &rt->dst;
1d248b1c 1747 netevent.old_neigh = old_neigh;
d8d1f30b 1748 netevent.new = &nrt->dst;
1d248b1c
DM
1749 netevent.new_neigh = neigh;
1750 netevent.daddr = dest;
8d71740c
TT
1751 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1752
38308473 1753 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1754 ip6_del_rt(rt);
1da177e4
LT
1755 return;
1756 }
1757
1758out:
d8d1f30b 1759 dst_release(&rt->dst);
1da177e4
LT
1760}
1761
1da177e4
LT
1762/*
1763 * Misc support functions
1764 */
1765
1716a961 1766static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1767 const struct in6_addr *dest)
1da177e4 1768{
d1918542 1769 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1770 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1771 ort->rt6i_table);
1da177e4
LT
1772
1773 if (rt) {
d8d1f30b
CG
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
8e2ec639 1776 rt->dst.flags |= DST_HOST;
d8d1f30b 1777
4e3fd7a0 1778 rt->rt6i_dst.addr = *dest;
8e2ec639 1779 rt->rt6i_dst.plen = 128;
defb3519 1780 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1781 rt->dst.error = ort->dst.error;
1da177e4
LT
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1785 rt->dst.lastuse = jiffies;
1da177e4 1786
4e3fd7a0 1787 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961
G
1788 rt->rt6i_flags = ort->rt6i_flags;
1789 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1790 (RTF_DEFAULT | RTF_ADDRCONF))
1791 rt6_set_from(rt, ort);
1792 else
1793 rt6_clean_expires(rt);
1da177e4
LT
1794 rt->rt6i_metric = 0;
1795
1da177e4
LT
1796#ifdef CONFIG_IPV6_SUBTREES
1797 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1798#endif
0f6c6392 1799 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1800 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1801 }
1802 return rt;
1803}
1804
70ceb4f5 1805#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1806static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1807 const struct in6_addr *prefix, int prefixlen,
1808 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1809{
1810 struct fib6_node *fn;
1811 struct rt6_info *rt = NULL;
c71099ac
TG
1812 struct fib6_table *table;
1813
efa2cea0 1814 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1815 if (!table)
c71099ac 1816 return NULL;
70ceb4f5 1817
c71099ac
TG
1818 write_lock_bh(&table->tb6_lock);
1819 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1820 if (!fn)
1821 goto out;
1822
d8d1f30b 1823 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1824 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1825 continue;
1826 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1827 continue;
1828 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1829 continue;
d8d1f30b 1830 dst_hold(&rt->dst);
70ceb4f5
YH
1831 break;
1832 }
1833out:
c71099ac 1834 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1835 return rt;
1836}
1837
efa2cea0 1838static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex,
95c96174 1841 unsigned int pref)
70ceb4f5 1842{
86872cb5
TG
1843 struct fib6_config cfg = {
1844 .fc_table = RT6_TABLE_INFO,
238fc7ea 1845 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1846 .fc_ifindex = ifindex,
1847 .fc_dst_len = prefixlen,
1848 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1849 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1850 .fc_nlinfo.pid = 0,
1851 .fc_nlinfo.nlh = NULL,
1852 .fc_nlinfo.nl_net = net,
86872cb5
TG
1853 };
1854
4e3fd7a0
AD
1855 cfg.fc_dst = *prefix;
1856 cfg.fc_gateway = *gwaddr;
70ceb4f5 1857
e317da96
YH
1858 /* We should treat it as a default route if prefix length is 0. */
1859 if (!prefixlen)
86872cb5 1860 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1861
86872cb5 1862 ip6_route_add(&cfg);
70ceb4f5 1863
efa2cea0 1864 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1865}
1866#endif
1867
b71d1d42 1868struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1869{
1da177e4 1870 struct rt6_info *rt;
c71099ac 1871 struct fib6_table *table;
1da177e4 1872
c346dca1 1873 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1874 if (!table)
c71099ac 1875 return NULL;
1da177e4 1876
c71099ac 1877 write_lock_bh(&table->tb6_lock);
d8d1f30b 1878 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
d1918542 1879 if (dev == rt->dst.dev &&
045927ff 1880 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1881 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1882 break;
1883 }
1884 if (rt)
d8d1f30b 1885 dst_hold(&rt->dst);
c71099ac 1886 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1887 return rt;
1888}
1889
b71d1d42 1890struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1891 struct net_device *dev,
1892 unsigned int pref)
1da177e4 1893{
86872cb5
TG
1894 struct fib6_config cfg = {
1895 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1896 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1897 .fc_ifindex = dev->ifindex,
1898 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1899 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1900 .fc_nlinfo.pid = 0,
1901 .fc_nlinfo.nlh = NULL,
c346dca1 1902 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1903 };
1da177e4 1904
4e3fd7a0 1905 cfg.fc_gateway = *gwaddr;
1da177e4 1906
86872cb5 1907 ip6_route_add(&cfg);
1da177e4 1908
1da177e4
LT
1909 return rt6_get_dflt_router(gwaddr, dev);
1910}
1911
7b4da532 1912void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1913{
1914 struct rt6_info *rt;
c71099ac
TG
1915 struct fib6_table *table;
1916
1917 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1918 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1919 if (!table)
c71099ac 1920 return;
1da177e4
LT
1921
1922restart:
c71099ac 1923 read_lock_bh(&table->tb6_lock);
d8d1f30b 1924 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1925 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1926 dst_hold(&rt->dst);
c71099ac 1927 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1928 ip6_del_rt(rt);
1da177e4
LT
1929 goto restart;
1930 }
1931 }
c71099ac 1932 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1933}
1934
5578689a
DL
1935static void rtmsg_to_fib6_config(struct net *net,
1936 struct in6_rtmsg *rtmsg,
86872cb5
TG
1937 struct fib6_config *cfg)
1938{
1939 memset(cfg, 0, sizeof(*cfg));
1940
1941 cfg->fc_table = RT6_TABLE_MAIN;
1942 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1943 cfg->fc_metric = rtmsg->rtmsg_metric;
1944 cfg->fc_expires = rtmsg->rtmsg_info;
1945 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1946 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1947 cfg->fc_flags = rtmsg->rtmsg_flags;
1948
5578689a 1949 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1950
4e3fd7a0
AD
1951 cfg->fc_dst = rtmsg->rtmsg_dst;
1952 cfg->fc_src = rtmsg->rtmsg_src;
1953 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1954}
1955
5578689a 1956int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1957{
86872cb5 1958 struct fib6_config cfg;
1da177e4
LT
1959 struct in6_rtmsg rtmsg;
1960 int err;
1961
1962 switch(cmd) {
1963 case SIOCADDRT: /* Add a route */
1964 case SIOCDELRT: /* Delete a route */
1965 if (!capable(CAP_NET_ADMIN))
1966 return -EPERM;
1967 err = copy_from_user(&rtmsg, arg,
1968 sizeof(struct in6_rtmsg));
1969 if (err)
1970 return -EFAULT;
86872cb5 1971
5578689a 1972 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1973
1da177e4
LT
1974 rtnl_lock();
1975 switch (cmd) {
1976 case SIOCADDRT:
86872cb5 1977 err = ip6_route_add(&cfg);
1da177e4
LT
1978 break;
1979 case SIOCDELRT:
86872cb5 1980 err = ip6_route_del(&cfg);
1da177e4
LT
1981 break;
1982 default:
1983 err = -EINVAL;
1984 }
1985 rtnl_unlock();
1986
1987 return err;
3ff50b79 1988 }
1da177e4
LT
1989
1990 return -EINVAL;
1991}
1992
1993/*
1994 * Drop the packet on the floor
1995 */
1996
d5fdd6ba 1997static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1998{
612f09e8 1999 int type;
adf30907 2000 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2001 switch (ipstats_mib_noroutes) {
2002 case IPSTATS_MIB_INNOROUTES:
0660e03f 2003 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2004 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2005 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2007 break;
2008 }
2009 /* FALLTHROUGH */
2010 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 ipstats_mib_noroutes);
612f09e8
YH
2013 break;
2014 }
3ffe533c 2015 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2016 kfree_skb(skb);
2017 return 0;
2018}
2019
9ce8ade0
TG
2020static int ip6_pkt_discard(struct sk_buff *skb)
2021{
612f09e8 2022 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2023}
2024
20380731 2025static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2026{
adf30907 2027 skb->dev = skb_dst(skb)->dev;
612f09e8 2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2029}
2030
6723ab54
DM
2031#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2032
9ce8ade0
TG
2033static int ip6_pkt_prohibit(struct sk_buff *skb)
2034{
612f09e8 2035 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2036}
2037
2038static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2039{
adf30907 2040 skb->dev = skb_dst(skb)->dev;
612f09e8 2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2042}
2043
6723ab54
DM
2044#endif
2045
1da177e4
LT
2046/*
2047 * Allocate a dst for local (unicast / anycast) address.
2048 */
2049
2050struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2051 const struct in6_addr *addr,
8f031519 2052 bool anycast)
1da177e4 2053{
c346dca1 2054 struct net *net = dev_net(idev->dev);
8b96d22d 2055 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
f83c7790 2056 int err;
1da177e4 2057
38308473 2058 if (!rt) {
f3213831 2059 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
1da177e4 2060 return ERR_PTR(-ENOMEM);
40385653 2061 }
1da177e4 2062
1da177e4
LT
2063 in6_dev_hold(idev);
2064
11d53b49 2065 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2066 rt->dst.input = ip6_input;
2067 rt->dst.output = ip6_output;
1da177e4 2068 rt->rt6i_idev = idev;
d8d1f30b 2069 rt->dst.obsolete = -1;
1da177e4
LT
2070
2071 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2072 if (anycast)
2073 rt->rt6i_flags |= RTF_ANYCAST;
2074 else
1da177e4 2075 rt->rt6i_flags |= RTF_LOCAL;
8ade06c6 2076 err = rt6_bind_neighbour(rt, rt->dst.dev);
f83c7790 2077 if (err) {
d8d1f30b 2078 dst_free(&rt->dst);
f83c7790 2079 return ERR_PTR(err);
1da177e4
LT
2080 }
2081
4e3fd7a0 2082 rt->rt6i_dst.addr = *addr;
1da177e4 2083 rt->rt6i_dst.plen = 128;
5578689a 2084 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2085
d8d1f30b 2086 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2087
2088 return rt;
2089}
2090
c3968a85
DW
2091int ip6_route_get_saddr(struct net *net,
2092 struct rt6_info *rt,
b71d1d42 2093 const struct in6_addr *daddr,
c3968a85
DW
2094 unsigned int prefs,
2095 struct in6_addr *saddr)
2096{
2097 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2098 int err = 0;
2099 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2100 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2101 else
2102 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2103 daddr, prefs, saddr);
2104 return err;
2105}
2106
2107/* remove deleted ip from prefsrc entries */
2108struct arg_dev_net_ip {
2109 struct net_device *dev;
2110 struct net *net;
2111 struct in6_addr *addr;
2112};
2113
2114static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2115{
2116 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2117 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2118 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2119
d1918542 2120 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2121 rt != net->ipv6.ip6_null_entry &&
2122 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2123 /* remove prefsrc entry */
2124 rt->rt6i_prefsrc.plen = 0;
2125 }
2126 return 0;
2127}
2128
2129void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2130{
2131 struct net *net = dev_net(ifp->idev->dev);
2132 struct arg_dev_net_ip adni = {
2133 .dev = ifp->idev->dev,
2134 .net = net,
2135 .addr = &ifp->addr,
2136 };
2137 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2138}
2139
8ed67789
DL
2140struct arg_dev_net {
2141 struct net_device *dev;
2142 struct net *net;
2143};
2144
1da177e4
LT
2145static int fib6_ifdown(struct rt6_info *rt, void *arg)
2146{
bc3ef660 2147 const struct arg_dev_net *adn = arg;
2148 const struct net_device *dev = adn->dev;
8ed67789 2149
d1918542 2150 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2151 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2152 return -1;
c159d30c 2153
1da177e4
LT
2154 return 0;
2155}
2156
f3db4851 2157void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2158{
8ed67789
DL
2159 struct arg_dev_net adn = {
2160 .dev = dev,
2161 .net = net,
2162 };
2163
2164 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2165 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2166}
2167
95c96174 2168struct rt6_mtu_change_arg {
1da177e4 2169 struct net_device *dev;
95c96174 2170 unsigned int mtu;
1da177e4
LT
2171};
2172
2173static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2174{
2175 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2176 struct inet6_dev *idev;
2177
2178 /* In IPv6 pmtu discovery is not optional,
2179 so that RTAX_MTU lock cannot disable it.
2180 We still use this lock to block changes
2181 caused by addrconf/ndisc.
2182 */
2183
2184 idev = __in6_dev_get(arg->dev);
38308473 2185 if (!idev)
1da177e4
LT
2186 return 0;
2187
2188 /* For administrative MTU increase, there is no way to discover
2189 IPv6 PMTU increase, so PMTU increase should be updated here.
2190 Since RFC 1981 doesn't include administrative MTU increase
2191 update PMTU increase is a MUST. (i.e. jumbo frame)
2192 */
2193 /*
2194 If new MTU is less than route PMTU, this new MTU will be the
2195 lowest MTU in the path, update the route PMTU to reflect PMTU
2196 decreases; if new MTU is greater than route PMTU, and the
2197 old MTU is the lowest MTU in the path, update the route PMTU
2198 to reflect the increase. In this case if the other nodes' MTU
2199 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2200 PMTU discouvery.
2201 */
d1918542 2202 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2203 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2204 (dst_mtu(&rt->dst) >= arg->mtu ||
2205 (dst_mtu(&rt->dst) < arg->mtu &&
2206 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2207 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2208 }
1da177e4
LT
2209 return 0;
2210}
2211
95c96174 2212void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2213{
c71099ac
TG
2214 struct rt6_mtu_change_arg arg = {
2215 .dev = dev,
2216 .mtu = mtu,
2217 };
1da177e4 2218
c346dca1 2219 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2220}
2221
ef7c79ed 2222static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2223 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2224 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2225 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2226 [RTA_PRIORITY] = { .type = NLA_U32 },
2227 [RTA_METRICS] = { .type = NLA_NESTED },
2228};
2229
2230static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2231 struct fib6_config *cfg)
1da177e4 2232{
86872cb5
TG
2233 struct rtmsg *rtm;
2234 struct nlattr *tb[RTA_MAX+1];
2235 int err;
1da177e4 2236
86872cb5
TG
2237 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 if (err < 0)
2239 goto errout;
1da177e4 2240
86872cb5
TG
2241 err = -EINVAL;
2242 rtm = nlmsg_data(nlh);
2243 memset(cfg, 0, sizeof(*cfg));
2244
2245 cfg->fc_table = rtm->rtm_table;
2246 cfg->fc_dst_len = rtm->rtm_dst_len;
2247 cfg->fc_src_len = rtm->rtm_src_len;
2248 cfg->fc_flags = RTF_UP;
2249 cfg->fc_protocol = rtm->rtm_protocol;
2250
2251 if (rtm->rtm_type == RTN_UNREACHABLE)
2252 cfg->fc_flags |= RTF_REJECT;
2253
ab79ad14
2254 if (rtm->rtm_type == RTN_LOCAL)
2255 cfg->fc_flags |= RTF_LOCAL;
2256
86872cb5
TG
2257 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2258 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2259 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2260
2261 if (tb[RTA_GATEWAY]) {
2262 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2263 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2264 }
86872cb5
TG
2265
2266 if (tb[RTA_DST]) {
2267 int plen = (rtm->rtm_dst_len + 7) >> 3;
2268
2269 if (nla_len(tb[RTA_DST]) < plen)
2270 goto errout;
2271
2272 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2273 }
86872cb5
TG
2274
2275 if (tb[RTA_SRC]) {
2276 int plen = (rtm->rtm_src_len + 7) >> 3;
2277
2278 if (nla_len(tb[RTA_SRC]) < plen)
2279 goto errout;
2280
2281 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2282 }
86872cb5 2283
c3968a85
DW
2284 if (tb[RTA_PREFSRC])
2285 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2286
86872cb5
TG
2287 if (tb[RTA_OIF])
2288 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2289
2290 if (tb[RTA_PRIORITY])
2291 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2292
2293 if (tb[RTA_METRICS]) {
2294 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2295 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2296 }
86872cb5
TG
2297
2298 if (tb[RTA_TABLE])
2299 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2300
2301 err = 0;
2302errout:
2303 return err;
1da177e4
LT
2304}
2305
c127ea2c 2306static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2307{
86872cb5
TG
2308 struct fib6_config cfg;
2309 int err;
1da177e4 2310
86872cb5
TG
2311 err = rtm_to_fib6_config(skb, nlh, &cfg);
2312 if (err < 0)
2313 return err;
2314
2315 return ip6_route_del(&cfg);
1da177e4
LT
2316}
2317
c127ea2c 2318static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2319{
86872cb5
TG
2320 struct fib6_config cfg;
2321 int err;
1da177e4 2322
86872cb5
TG
2323 err = rtm_to_fib6_config(skb, nlh, &cfg);
2324 if (err < 0)
2325 return err;
2326
2327 return ip6_route_add(&cfg);
1da177e4
LT
2328}
2329
339bf98f
TG
2330static inline size_t rt6_nlmsg_size(void)
2331{
2332 return NLMSG_ALIGN(sizeof(struct rtmsg))
2333 + nla_total_size(16) /* RTA_SRC */
2334 + nla_total_size(16) /* RTA_DST */
2335 + nla_total_size(16) /* RTA_GATEWAY */
2336 + nla_total_size(16) /* RTA_PREFSRC */
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(4) /* RTA_IIF */
2339 + nla_total_size(4) /* RTA_OIF */
2340 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2341 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2342 + nla_total_size(sizeof(struct rta_cacheinfo));
2343}
2344
191cd582
BH
2345static int rt6_fill_node(struct net *net,
2346 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2347 struct in6_addr *dst, struct in6_addr *src,
2348 int iif, int type, u32 pid, u32 seq,
7bc570c8 2349 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2350{
2351 struct rtmsg *rtm;
2d7202bf 2352 struct nlmsghdr *nlh;
e3703b3d 2353 long expires;
9e762a4a 2354 u32 table;
f2c31e32 2355 struct neighbour *n;
1da177e4
LT
2356
2357 if (prefix) { /* user wants prefix routes only */
2358 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2359 /* success since this is not a prefix route */
2360 return 1;
2361 }
2362 }
2363
2d7202bf 2364 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2365 if (!nlh)
26932566 2366 return -EMSGSIZE;
2d7202bf
TG
2367
2368 rtm = nlmsg_data(nlh);
1da177e4
LT
2369 rtm->rtm_family = AF_INET6;
2370 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2371 rtm->rtm_src_len = rt->rt6i_src.plen;
2372 rtm->rtm_tos = 0;
c71099ac 2373 if (rt->rt6i_table)
9e762a4a 2374 table = rt->rt6i_table->tb6_id;
c71099ac 2375 else
9e762a4a
PM
2376 table = RT6_TABLE_UNSPEC;
2377 rtm->rtm_table = table;
c78679e8
DM
2378 if (nla_put_u32(skb, RTA_TABLE, table))
2379 goto nla_put_failure;
38308473 2380 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2381 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2382 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2383 rtm->rtm_type = RTN_LOCAL;
d1918542 2384 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2385 rtm->rtm_type = RTN_LOCAL;
2386 else
2387 rtm->rtm_type = RTN_UNICAST;
2388 rtm->rtm_flags = 0;
2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2390 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2391 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2392 rtm->rtm_protocol = RTPROT_REDIRECT;
2393 else if (rt->rt6i_flags & RTF_ADDRCONF)
2394 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2395 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2396 rtm->rtm_protocol = RTPROT_RA;
2397
38308473 2398 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2399 rtm->rtm_flags |= RTM_F_CLONED;
2400
2401 if (dst) {
c78679e8
DM
2402 if (nla_put(skb, RTA_DST, 16, dst))
2403 goto nla_put_failure;
1ab1457c 2404 rtm->rtm_dst_len = 128;
1da177e4 2405 } else if (rtm->rtm_dst_len)
c78679e8
DM
2406 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2407 goto nla_put_failure;
1da177e4
LT
2408#ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
c78679e8
DM
2410 if (nla_put(skb, RTA_SRC, 16, src))
2411 goto nla_put_failure;
1ab1457c 2412 rtm->rtm_src_len = 128;
c78679e8
DM
2413 } else if (rtm->rtm_src_len &&
2414 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2415 goto nla_put_failure;
1da177e4 2416#endif
7bc570c8
YH
2417 if (iif) {
2418#ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2420 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432#endif
c78679e8
DM
2433 if (nla_put_u32(skb, RTA_IIF, iif))
2434 goto nla_put_failure;
7bc570c8 2435 } else if (dst) {
1da177e4 2436 struct in6_addr saddr_buf;
c78679e8
DM
2437 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2438 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2439 goto nla_put_failure;
1da177e4 2440 }
2d7202bf 2441
c3968a85
DW
2442 if (rt->rt6i_prefsrc.plen) {
2443 struct in6_addr saddr_buf;
4e3fd7a0 2444 saddr_buf = rt->rt6i_prefsrc.addr;
c78679e8
DM
2445 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2446 goto nla_put_failure;
c3968a85
DW
2447 }
2448
defb3519 2449 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2450 goto nla_put_failure;
2451
f2c31e32 2452 rcu_read_lock();
97cac082 2453 n = rt->n;
94f826b8
ED
2454 if (n) {
2455 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2456 rcu_read_unlock();
2457 goto nla_put_failure;
2458 }
2459 }
f2c31e32 2460 rcu_read_unlock();
2d7202bf 2461
c78679e8
DM
2462 if (rt->dst.dev &&
2463 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2464 goto nla_put_failure;
2465 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2466 goto nla_put_failure;
36e3deae
YH
2467 if (!(rt->rt6i_flags & RTF_EXPIRES))
2468 expires = 0;
d1918542
DM
2469 else if (rt->dst.expires - jiffies < INT_MAX)
2470 expires = rt->dst.expires - jiffies;
36e3deae
YH
2471 else
2472 expires = INT_MAX;
69cdf8f9 2473
81166dd6 2474 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
d8d1f30b 2475 expires, rt->dst.error) < 0)
e3703b3d 2476 goto nla_put_failure;
2d7202bf
TG
2477
2478 return nlmsg_end(skb, nlh);
2479
2480nla_put_failure:
26932566
PM
2481 nlmsg_cancel(skb, nlh);
2482 return -EMSGSIZE;
1da177e4
LT
2483}
2484
1b43af54 2485int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2486{
2487 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2488 int prefix;
2489
2d7202bf
TG
2490 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2491 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2492 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2493 } else
2494 prefix = 0;
2495
191cd582
BH
2496 return rt6_fill_node(arg->net,
2497 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2498 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2499 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2500}
2501
c127ea2c 2502static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2503{
3b1e0a65 2504 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2505 struct nlattr *tb[RTA_MAX+1];
2506 struct rt6_info *rt;
1da177e4 2507 struct sk_buff *skb;
ab364a6f 2508 struct rtmsg *rtm;
4c9483b2 2509 struct flowi6 fl6;
72331bc0 2510 int err, iif = 0, oif = 0;
1da177e4 2511
ab364a6f
TG
2512 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2513 if (err < 0)
2514 goto errout;
1da177e4 2515
ab364a6f 2516 err = -EINVAL;
4c9483b2 2517 memset(&fl6, 0, sizeof(fl6));
1da177e4 2518
ab364a6f
TG
2519 if (tb[RTA_SRC]) {
2520 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2521 goto errout;
2522
4e3fd7a0 2523 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2524 }
2525
2526 if (tb[RTA_DST]) {
2527 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2528 goto errout;
2529
4e3fd7a0 2530 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2531 }
2532
2533 if (tb[RTA_IIF])
2534 iif = nla_get_u32(tb[RTA_IIF]);
2535
2536 if (tb[RTA_OIF])
72331bc0 2537 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2538
2539 if (iif) {
2540 struct net_device *dev;
72331bc0
SL
2541 int flags = 0;
2542
5578689a 2543 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2544 if (!dev) {
2545 err = -ENODEV;
ab364a6f 2546 goto errout;
1da177e4 2547 }
72331bc0
SL
2548
2549 fl6.flowi6_iif = iif;
2550
2551 if (!ipv6_addr_any(&fl6.saddr))
2552 flags |= RT6_LOOKUP_F_HAS_SADDR;
2553
2554 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2555 flags);
2556 } else {
2557 fl6.flowi6_oif = oif;
2558
2559 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2560 }
2561
ab364a6f 2562 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2563 if (!skb) {
2173bff5 2564 dst_release(&rt->dst);
ab364a6f
TG
2565 err = -ENOBUFS;
2566 goto errout;
2567 }
1da177e4 2568
ab364a6f
TG
2569 /* Reserve room for dummy headers, this skb can pass
2570 through good chunk of routing engine.
2571 */
459a98ed 2572 skb_reset_mac_header(skb);
ab364a6f 2573 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2574
d8d1f30b 2575 skb_dst_set(skb, &rt->dst);
1da177e4 2576
4c9483b2 2577 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2578 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2579 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2580 if (err < 0) {
ab364a6f
TG
2581 kfree_skb(skb);
2582 goto errout;
1da177e4
LT
2583 }
2584
5578689a 2585 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2586errout:
1da177e4 2587 return err;
1da177e4
LT
2588}
2589
86872cb5 2590void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2591{
2592 struct sk_buff *skb;
5578689a 2593 struct net *net = info->nl_net;
528c4ceb
DL
2594 u32 seq;
2595 int err;
2596
2597 err = -ENOBUFS;
38308473 2598 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2599
339bf98f 2600 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2601 if (!skb)
21713ebc
TG
2602 goto errout;
2603
191cd582 2604 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2605 event, info->pid, seq, 0, 0, 0);
26932566
PM
2606 if (err < 0) {
2607 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2608 WARN_ON(err == -EMSGSIZE);
2609 kfree_skb(skb);
2610 goto errout;
2611 }
1ce85fe4
PNA
2612 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2613 info->nlh, gfp_any());
2614 return;
21713ebc
TG
2615errout:
2616 if (err < 0)
5578689a 2617 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2618}
2619
8ed67789
DL
2620static int ip6_route_dev_notify(struct notifier_block *this,
2621 unsigned long event, void *data)
2622{
2623 struct net_device *dev = (struct net_device *)data;
c346dca1 2624 struct net *net = dev_net(dev);
8ed67789
DL
2625
2626 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2627 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2628 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2630 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2631 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2632 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2633 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2634#endif
2635 }
2636
2637 return NOTIFY_OK;
2638}
2639
1da177e4
LT
2640/*
2641 * /proc
2642 */
2643
2644#ifdef CONFIG_PROC_FS
2645
1da177e4
LT
2646struct rt6_proc_arg
2647{
2648 char *buffer;
2649 int offset;
2650 int length;
2651 int skip;
2652 int len;
2653};
2654
2655static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2656{
33120b30 2657 struct seq_file *m = p_arg;
69cce1d1 2658 struct neighbour *n;
1da177e4 2659
4b7a4274 2660 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2661
2662#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2663 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2664#else
33120b30 2665 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2666#endif
f2c31e32 2667 rcu_read_lock();
97cac082 2668 n = rt->n;
69cce1d1
DM
2669 if (n) {
2670 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2671 } else {
33120b30 2672 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2673 }
f2c31e32 2674 rcu_read_unlock();
33120b30 2675 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2676 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2677 rt->dst.__use, rt->rt6i_flags,
d1918542 2678 rt->dst.dev ? rt->dst.dev->name : "");
1da177e4
LT
2679 return 0;
2680}
2681
33120b30 2682static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2683{
f3db4851 2684 struct net *net = (struct net *)m->private;
32b293a5 2685 fib6_clean_all_ro(net, rt6_info_route, 0, m);
33120b30
AD
2686 return 0;
2687}
1da177e4 2688
33120b30
AD
2689static int ipv6_route_open(struct inode *inode, struct file *file)
2690{
de05c557 2691 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2692}
2693
33120b30
AD
2694static const struct file_operations ipv6_route_proc_fops = {
2695 .owner = THIS_MODULE,
2696 .open = ipv6_route_open,
2697 .read = seq_read,
2698 .llseek = seq_lseek,
b6fcbdb4 2699 .release = single_release_net,
33120b30
AD
2700};
2701
1da177e4
LT
2702static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2703{
69ddb805 2704 struct net *net = (struct net *)seq->private;
1da177e4 2705 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2706 net->ipv6.rt6_stats->fib_nodes,
2707 net->ipv6.rt6_stats->fib_route_nodes,
2708 net->ipv6.rt6_stats->fib_rt_alloc,
2709 net->ipv6.rt6_stats->fib_rt_entries,
2710 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2711 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2712 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2713
2714 return 0;
2715}
2716
2717static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2718{
de05c557 2719 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2720}
2721
9a32144e 2722static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2723 .owner = THIS_MODULE,
2724 .open = rt6_stats_seq_open,
2725 .read = seq_read,
2726 .llseek = seq_lseek,
b6fcbdb4 2727 .release = single_release_net,
1da177e4
LT
2728};
2729#endif /* CONFIG_PROC_FS */
2730
2731#ifdef CONFIG_SYSCTL
2732
1da177e4 2733static
8d65af78 2734int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2735 void __user *buffer, size_t *lenp, loff_t *ppos)
2736{
c486da34
LAG
2737 struct net *net;
2738 int delay;
2739 if (!write)
1da177e4 2740 return -EINVAL;
c486da34
LAG
2741
2742 net = (struct net *)ctl->extra1;
2743 delay = net->ipv6.sysctl.flush_delay;
2744 proc_dointvec(ctl, write, buffer, lenp, ppos);
2745 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2746 return 0;
1da177e4
LT
2747}
2748
760f2d01 2749ctl_table ipv6_route_table_template[] = {
1ab1457c 2750 {
1da177e4 2751 .procname = "flush",
4990509f 2752 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2753 .maxlen = sizeof(int),
89c8b3a1 2754 .mode = 0200,
6d9f239a 2755 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2756 },
2757 {
1da177e4 2758 .procname = "gc_thresh",
9a7ec3a9 2759 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2760 .maxlen = sizeof(int),
2761 .mode = 0644,
6d9f239a 2762 .proc_handler = proc_dointvec,
1da177e4
LT
2763 },
2764 {
1da177e4 2765 .procname = "max_size",
4990509f 2766 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2767 .maxlen = sizeof(int),
2768 .mode = 0644,
6d9f239a 2769 .proc_handler = proc_dointvec,
1da177e4
LT
2770 },
2771 {
1da177e4 2772 .procname = "gc_min_interval",
4990509f 2773 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2774 .maxlen = sizeof(int),
2775 .mode = 0644,
6d9f239a 2776 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2777 },
2778 {
1da177e4 2779 .procname = "gc_timeout",
4990509f 2780 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2781 .maxlen = sizeof(int),
2782 .mode = 0644,
6d9f239a 2783 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2784 },
2785 {
1da177e4 2786 .procname = "gc_interval",
4990509f 2787 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2788 .maxlen = sizeof(int),
2789 .mode = 0644,
6d9f239a 2790 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2791 },
2792 {
1da177e4 2793 .procname = "gc_elasticity",
4990509f 2794 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2795 .maxlen = sizeof(int),
2796 .mode = 0644,
f3d3f616 2797 .proc_handler = proc_dointvec,
1da177e4
LT
2798 },
2799 {
1da177e4 2800 .procname = "mtu_expires",
4990509f 2801 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2802 .maxlen = sizeof(int),
2803 .mode = 0644,
6d9f239a 2804 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2805 },
2806 {
1da177e4 2807 .procname = "min_adv_mss",
4990509f 2808 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2809 .maxlen = sizeof(int),
2810 .mode = 0644,
f3d3f616 2811 .proc_handler = proc_dointvec,
1da177e4
LT
2812 },
2813 {
1da177e4 2814 .procname = "gc_min_interval_ms",
4990509f 2815 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2816 .maxlen = sizeof(int),
2817 .mode = 0644,
6d9f239a 2818 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2819 },
f8572d8f 2820 { }
1da177e4
LT
2821};
2822
2c8c1e72 2823struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2824{
2825 struct ctl_table *table;
2826
2827 table = kmemdup(ipv6_route_table_template,
2828 sizeof(ipv6_route_table_template),
2829 GFP_KERNEL);
5ee09105
YH
2830
2831 if (table) {
2832 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2833 table[0].extra1 = net;
86393e52 2834 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2835 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2836 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2837 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2838 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2839 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2840 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2841 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2842 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2843 }
2844
760f2d01
DL
2845 return table;
2846}
1da177e4
LT
2847#endif
2848
2c8c1e72 2849static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2850{
633d424b 2851 int ret = -ENOMEM;
8ed67789 2852
86393e52
AD
2853 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2854 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2855
fc66f95c
ED
2856 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2857 goto out_ip6_dst_ops;
2858
8ed67789
DL
2859 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2860 sizeof(*net->ipv6.ip6_null_entry),
2861 GFP_KERNEL);
2862 if (!net->ipv6.ip6_null_entry)
fc66f95c 2863 goto out_ip6_dst_entries;
d8d1f30b 2864 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2865 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2866 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2867 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2868 ip6_template_metrics, true);
8ed67789
DL
2869
2870#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2871 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2872 sizeof(*net->ipv6.ip6_prohibit_entry),
2873 GFP_KERNEL);
68fffc67
PZ
2874 if (!net->ipv6.ip6_prohibit_entry)
2875 goto out_ip6_null_entry;
d8d1f30b 2876 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2877 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2878 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2879 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2880 ip6_template_metrics, true);
8ed67789
DL
2881
2882 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2883 sizeof(*net->ipv6.ip6_blk_hole_entry),
2884 GFP_KERNEL);
68fffc67
PZ
2885 if (!net->ipv6.ip6_blk_hole_entry)
2886 goto out_ip6_prohibit_entry;
d8d1f30b 2887 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2888 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2889 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2890 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2891 ip6_template_metrics, true);
8ed67789
DL
2892#endif
2893
b339a47c
PZ
2894 net->ipv6.sysctl.flush_delay = 0;
2895 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2896 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2897 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2899 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2900 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2901 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2902
6891a346
BT
2903 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2904
8ed67789
DL
2905 ret = 0;
2906out:
2907 return ret;
f2fc6a54 2908
68fffc67
PZ
2909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2910out_ip6_prohibit_entry:
2911 kfree(net->ipv6.ip6_prohibit_entry);
2912out_ip6_null_entry:
2913 kfree(net->ipv6.ip6_null_entry);
2914#endif
fc66f95c
ED
2915out_ip6_dst_entries:
2916 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2917out_ip6_dst_ops:
f2fc6a54 2918 goto out;
cdb18761
DL
2919}
2920
2c8c1e72 2921static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 2922{
8ed67789
DL
2923 kfree(net->ipv6.ip6_null_entry);
2924#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2925 kfree(net->ipv6.ip6_prohibit_entry);
2926 kfree(net->ipv6.ip6_blk_hole_entry);
2927#endif
41bb78b4 2928 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2929}
2930
d189634e
TG
2931static int __net_init ip6_route_net_init_late(struct net *net)
2932{
2933#ifdef CONFIG_PROC_FS
2934 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2935 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2936#endif
2937 return 0;
2938}
2939
2940static void __net_exit ip6_route_net_exit_late(struct net *net)
2941{
2942#ifdef CONFIG_PROC_FS
2943 proc_net_remove(net, "ipv6_route");
2944 proc_net_remove(net, "rt6_stats");
2945#endif
2946}
2947
cdb18761
DL
2948static struct pernet_operations ip6_route_net_ops = {
2949 .init = ip6_route_net_init,
2950 .exit = ip6_route_net_exit,
2951};
2952
c3426b47
DM
2953static int __net_init ipv6_inetpeer_init(struct net *net)
2954{
2955 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2956
2957 if (!bp)
2958 return -ENOMEM;
2959 inet_peer_base_init(bp);
2960 net->ipv6.peers = bp;
2961 return 0;
2962}
2963
2964static void __net_exit ipv6_inetpeer_exit(struct net *net)
2965{
2966 struct inet_peer_base *bp = net->ipv6.peers;
2967
2968 net->ipv6.peers = NULL;
56a6b248 2969 inetpeer_invalidate_tree(bp);
c3426b47
DM
2970 kfree(bp);
2971}
2972
2b823f72 2973static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
2974 .init = ipv6_inetpeer_init,
2975 .exit = ipv6_inetpeer_exit,
2976};
2977
d189634e
TG
2978static struct pernet_operations ip6_route_net_late_ops = {
2979 .init = ip6_route_net_init_late,
2980 .exit = ip6_route_net_exit_late,
2981};
2982
8ed67789
DL
2983static struct notifier_block ip6_route_dev_notifier = {
2984 .notifier_call = ip6_route_dev_notify,
2985 .priority = 0,
2986};
2987
433d49c3 2988int __init ip6_route_init(void)
1da177e4 2989{
433d49c3
DL
2990 int ret;
2991
9a7ec3a9
DL
2992 ret = -ENOMEM;
2993 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2994 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2995 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2996 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2997 goto out;
14e50e57 2998
fc66f95c 2999 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3000 if (ret)
bdb3289f 3001 goto out_kmem_cache;
bdb3289f 3002
c3426b47
DM
3003 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3004 if (ret)
e8803b6c 3005 goto out_dst_entries;
2a0c451a 3006
7e52b33b
DM
3007 ret = register_pernet_subsys(&ip6_route_net_ops);
3008 if (ret)
3009 goto out_register_inetpeer;
c3426b47 3010
5dc121e9
AE
3011 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3012
8ed67789
DL
3013 /* Registering of the loopback is done before this portion of code,
3014 * the loopback reference in rt6_info will not be taken, do it
3015 * manually for init_net */
d8d1f30b 3016 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3017 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3018 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3019 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3020 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3021 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3022 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3023 #endif
e8803b6c 3024 ret = fib6_init();
433d49c3 3025 if (ret)
8ed67789 3026 goto out_register_subsys;
433d49c3 3027
433d49c3
DL
3028 ret = xfrm6_init();
3029 if (ret)
e8803b6c 3030 goto out_fib6_init;
c35b7e72 3031
433d49c3
DL
3032 ret = fib6_rules_init();
3033 if (ret)
3034 goto xfrm6_init;
7e5449c2 3035
d189634e
TG
3036 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3037 if (ret)
3038 goto fib6_rules_init;
3039
433d49c3 3040 ret = -ENOBUFS;
c7ac8679
GR
3041 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3042 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3043 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3044 goto out_register_late_subsys;
c127ea2c 3045
8ed67789 3046 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3047 if (ret)
d189634e 3048 goto out_register_late_subsys;
8ed67789 3049
433d49c3
DL
3050out:
3051 return ret;
3052
d189634e
TG
3053out_register_late_subsys:
3054 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3055fib6_rules_init:
433d49c3
DL
3056 fib6_rules_cleanup();
3057xfrm6_init:
433d49c3 3058 xfrm6_fini();
2a0c451a
TG
3059out_fib6_init:
3060 fib6_gc_cleanup();
8ed67789
DL
3061out_register_subsys:
3062 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3063out_register_inetpeer:
3064 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3065out_dst_entries:
3066 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3067out_kmem_cache:
f2fc6a54 3068 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3069 goto out;
1da177e4
LT
3070}
3071
3072void ip6_route_cleanup(void)
3073{
8ed67789 3074 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3075 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3076 fib6_rules_cleanup();
1da177e4 3077 xfrm6_fini();
1da177e4 3078 fib6_gc_cleanup();
c3426b47 3079 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3080 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3081 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3082 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3083}