]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/route.c
bnx2x: Fix compile errors if CONFIG_CNIC is not set
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4 28#include <linux/errno.h>
bc3b2d7f 29#include <linux/export.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/times.h>
32#include <linux/socket.h>
33#include <linux/sockios.h>
34#include <linux/net.h>
35#include <linux/route.h>
36#include <linux/netdevice.h>
37#include <linux/in6.h>
7bc570c8 38#include <linux/mroute6.h>
1da177e4 39#include <linux/init.h>
1da177e4 40#include <linux/if_arp.h>
1da177e4
LT
41#include <linux/proc_fs.h>
42#include <linux/seq_file.h>
5b7c931d 43#include <linux/nsproxy.h>
5a0e3ad6 44#include <linux/slab.h>
457c4cbc 45#include <net/net_namespace.h>
1da177e4
LT
46#include <net/snmp.h>
47#include <net/ipv6.h>
48#include <net/ip6_fib.h>
49#include <net/ip6_route.h>
50#include <net/ndisc.h>
51#include <net/addrconf.h>
52#include <net/tcp.h>
53#include <linux/rtnetlink.h>
54#include <net/dst.h>
55#include <net/xfrm.h>
8d71740c 56#include <net/netevent.h>
21713ebc 57#include <net/netlink.h>
1da177e4
LT
58
59#include <asm/uaccess.h>
60
61#ifdef CONFIG_SYSCTL
62#include <linux/sysctl.h>
63#endif
64
65/* Set to 3 to get tracing. */
66#define RT6_DEBUG 2
67
68#if RT6_DEBUG >= 3
69#define RDBG(x) printk x
70#define RT6_TRACE(x...) printk(KERN_DEBUG x)
71#else
72#define RDBG(x)
73#define RT6_TRACE(x...) do { ; } while (0)
74#endif
75
21efcfa0
ED
76static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
77 const struct in6_addr *dest);
1da177e4 78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 79static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 80static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
81static struct dst_entry *ip6_negative_advice(struct dst_entry *);
82static void ip6_dst_destroy(struct dst_entry *);
83static void ip6_dst_ifdown(struct dst_entry *,
84 struct net_device *dev, int how);
569d3645 85static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
86
87static int ip6_pkt_discard(struct sk_buff *skb);
88static int ip6_pkt_discard_out(struct sk_buff *skb);
89static void ip6_link_failure(struct sk_buff *skb);
90static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
91
70ceb4f5 92#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 93static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
94 const struct in6_addr *prefix, int prefixlen,
95 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5 96 unsigned pref);
efa2cea0 97static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
98 const struct in6_addr *prefix, int prefixlen,
99 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
100#endif
101
06582540
DM
102static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
103{
104 struct rt6_info *rt = (struct rt6_info *) dst;
105 struct inet_peer *peer;
106 u32 *p = NULL;
107
8e2ec639
YZ
108 if (!(rt->dst.flags & DST_HOST))
109 return NULL;
110
06582540
DM
111 if (!rt->rt6i_peer)
112 rt6_bind_peer(rt, 1);
113
114 peer = rt->rt6i_peer;
115 if (peer) {
116 u32 *old_p = __DST_METRICS_PTR(old);
117 unsigned long prev, new;
118
119 p = peer->metrics;
120 if (inet_metrics_new(peer))
121 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
122
123 new = (unsigned long) p;
124 prev = cmpxchg(&dst->_metrics, old, new);
125
126 if (prev != old) {
127 p = __DST_METRICS_PTR(prev);
128 if (prev & DST_METRICS_READ_ONLY)
129 p = NULL;
130 }
131 }
132 return p;
133}
134
d3aaeb38
DM
135static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
136{
137 return __neigh_lookup_errno(&nd_tbl, daddr, dst->dev);
138}
139
9a7ec3a9 140static struct dst_ops ip6_dst_ops_template = {
1da177e4 141 .family = AF_INET6,
09640e63 142 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
143 .gc = ip6_dst_gc,
144 .gc_thresh = 1024,
145 .check = ip6_dst_check,
0dbaee3b 146 .default_advmss = ip6_default_advmss,
ebb762f2 147 .mtu = ip6_mtu,
06582540 148 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
149 .destroy = ip6_dst_destroy,
150 .ifdown = ip6_dst_ifdown,
151 .negative_advice = ip6_negative_advice,
152 .link_failure = ip6_link_failure,
153 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 154 .local_out = __ip6_local_out,
d3aaeb38 155 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
156};
157
ebb762f2 158static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 159{
618f9bc7
SK
160 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
161
162 return mtu ? : dst->dev->mtu;
ec831ea7
RD
163}
164
14e50e57
DM
165static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
166{
167}
168
0972ddb2
HB
169static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
170 unsigned long old)
171{
172 return NULL;
173}
174
14e50e57
DM
175static struct dst_ops ip6_dst_blackhole_ops = {
176 .family = AF_INET6,
09640e63 177 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
178 .destroy = ip6_dst_destroy,
179 .check = ip6_dst_check,
ebb762f2 180 .mtu = ip6_blackhole_mtu,
214f45c9 181 .default_advmss = ip6_default_advmss,
14e50e57 182 .update_pmtu = ip6_rt_blackhole_update_pmtu,
0972ddb2 183 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 184 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
185};
186
62fa8a84
DM
187static const u32 ip6_template_metrics[RTAX_MAX] = {
188 [RTAX_HOPLIMIT - 1] = 255,
189};
190
bdb3289f 191static struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
192 .dst = {
193 .__refcnt = ATOMIC_INIT(1),
194 .__use = 1,
195 .obsolete = -1,
196 .error = -ENETUNREACH,
d8d1f30b
CG
197 .input = ip6_pkt_discard,
198 .output = ip6_pkt_discard_out,
1da177e4
LT
199 },
200 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 201 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
202 .rt6i_metric = ~(u32) 0,
203 .rt6i_ref = ATOMIC_INIT(1),
204};
205
101367c2
TG
206#ifdef CONFIG_IPV6_MULTIPLE_TABLES
207
6723ab54
DM
208static int ip6_pkt_prohibit(struct sk_buff *skb);
209static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 210
280a34c8 211static struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
212 .dst = {
213 .__refcnt = ATOMIC_INIT(1),
214 .__use = 1,
215 .obsolete = -1,
216 .error = -EACCES,
d8d1f30b
CG
217 .input = ip6_pkt_prohibit,
218 .output = ip6_pkt_prohibit_out,
101367c2
TG
219 },
220 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 221 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
222 .rt6i_metric = ~(u32) 0,
223 .rt6i_ref = ATOMIC_INIT(1),
224};
225
bdb3289f 226static struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
227 .dst = {
228 .__refcnt = ATOMIC_INIT(1),
229 .__use = 1,
230 .obsolete = -1,
231 .error = -EINVAL,
d8d1f30b
CG
232 .input = dst_discard,
233 .output = dst_discard,
101367c2
TG
234 },
235 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 236 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
237 .rt6i_metric = ~(u32) 0,
238 .rt6i_ref = ATOMIC_INIT(1),
239};
240
241#endif
242
1da177e4 243/* allocate dst with ip6_dst_ops */
5c1e6aa3 244static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops,
957c665f
DM
245 struct net_device *dev,
246 int flags)
1da177e4 247{
957c665f 248 struct rt6_info *rt = dst_alloc(ops, dev, 0, 0, flags);
cf911662 249
38308473 250 if (rt)
fbe58186 251 memset(&rt->rt6i_table, 0,
38308473 252 sizeof(*rt) - sizeof(struct dst_entry));
cf911662
DM
253
254 return rt;
1da177e4
LT
255}
256
257static void ip6_dst_destroy(struct dst_entry *dst)
258{
259 struct rt6_info *rt = (struct rt6_info *)dst;
260 struct inet6_dev *idev = rt->rt6i_idev;
b3419363 261 struct inet_peer *peer = rt->rt6i_peer;
1da177e4 262
8e2ec639
YZ
263 if (!(rt->dst.flags & DST_HOST))
264 dst_destroy_metrics_generic(dst);
265
38308473 266 if (idev) {
1da177e4
LT
267 rt->rt6i_idev = NULL;
268 in6_dev_put(idev);
1ab1457c 269 }
b3419363 270 if (peer) {
b3419363
DM
271 rt->rt6i_peer = NULL;
272 inet_putpeer(peer);
273 }
274}
275
6431cbc2
DM
276static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
277
278static u32 rt6_peer_genid(void)
279{
280 return atomic_read(&__rt6_peer_genid);
281}
282
b3419363
DM
283void rt6_bind_peer(struct rt6_info *rt, int create)
284{
285 struct inet_peer *peer;
286
b3419363
DM
287 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
288 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
289 inet_putpeer(peer);
6431cbc2
DM
290 else
291 rt->rt6i_peer_genid = rt6_peer_genid();
1da177e4
LT
292}
293
294static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
295 int how)
296{
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 299 struct net_device *loopback_dev =
c346dca1 300 dev_net(dev)->loopback_dev;
1da177e4 301
38308473 302 if (dev != loopback_dev && idev && idev->dev == dev) {
5a3e55d6
DL
303 struct inet6_dev *loopback_idev =
304 in6_dev_get(loopback_dev);
38308473 305 if (loopback_idev) {
1da177e4
LT
306 rt->rt6i_idev = loopback_idev;
307 in6_dev_put(idev);
308 }
309 }
310}
311
312static __inline__ int rt6_check_expired(const struct rt6_info *rt)
313{
a02cec21
ED
314 return (rt->rt6i_flags & RTF_EXPIRES) &&
315 time_after(jiffies, rt->rt6i_expires);
1da177e4
LT
316}
317
b71d1d42 318static inline int rt6_need_strict(const struct in6_addr *daddr)
c71099ac 319{
a02cec21
ED
320 return ipv6_addr_type(daddr) &
321 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
c71099ac
TG
322}
323
1da177e4 324/*
c71099ac 325 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
326 */
327
8ed67789
DL
328static inline struct rt6_info *rt6_device_match(struct net *net,
329 struct rt6_info *rt,
b71d1d42 330 const struct in6_addr *saddr,
1da177e4 331 int oif,
d420895e 332 int flags)
1da177e4
LT
333{
334 struct rt6_info *local = NULL;
335 struct rt6_info *sprt;
336
dd3abc4e
YH
337 if (!oif && ipv6_addr_any(saddr))
338 goto out;
339
d8d1f30b 340 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
dd3abc4e
YH
341 struct net_device *dev = sprt->rt6i_dev;
342
343 if (oif) {
1da177e4
LT
344 if (dev->ifindex == oif)
345 return sprt;
346 if (dev->flags & IFF_LOOPBACK) {
38308473 347 if (!sprt->rt6i_idev ||
1da177e4 348 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 349 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 350 continue;
1ab1457c 351 if (local && (!oif ||
1da177e4
LT
352 local->rt6i_idev->dev->ifindex == oif))
353 continue;
354 }
355 local = sprt;
356 }
dd3abc4e
YH
357 } else {
358 if (ipv6_chk_addr(net, saddr, dev,
359 flags & RT6_LOOKUP_F_IFACE))
360 return sprt;
1da177e4 361 }
dd3abc4e 362 }
1da177e4 363
dd3abc4e 364 if (oif) {
1da177e4
LT
365 if (local)
366 return local;
367
d420895e 368 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 369 return net->ipv6.ip6_null_entry;
1da177e4 370 }
dd3abc4e 371out:
1da177e4
LT
372 return rt;
373}
374
27097255
YH
375#ifdef CONFIG_IPV6_ROUTER_PREF
376static void rt6_probe(struct rt6_info *rt)
377{
f2c31e32 378 struct neighbour *neigh;
27097255
YH
379 /*
380 * Okay, this does not seem to be appropriate
381 * for now, however, we need to check if it
382 * is really so; aka Router Reachability Probing.
383 *
384 * Router Reachability Probe MUST be rate-limited
385 * to no more than one per minute.
386 */
f2c31e32 387 rcu_read_lock();
27217455 388 neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
27097255 389 if (!neigh || (neigh->nud_state & NUD_VALID))
f2c31e32 390 goto out;
27097255
YH
391 read_lock_bh(&neigh->lock);
392 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 393 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
394 struct in6_addr mcaddr;
395 struct in6_addr *target;
396
397 neigh->updated = jiffies;
398 read_unlock_bh(&neigh->lock);
399
400 target = (struct in6_addr *)&neigh->primary_key;
401 addrconf_addr_solict_mult(target, &mcaddr);
402 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
f2c31e32 403 } else {
27097255 404 read_unlock_bh(&neigh->lock);
f2c31e32
ED
405 }
406out:
407 rcu_read_unlock();
27097255
YH
408}
409#else
410static inline void rt6_probe(struct rt6_info *rt)
411{
27097255
YH
412}
413#endif
414
1da177e4 415/*
554cfb7e 416 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 417 */
b6f99a21 418static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
419{
420 struct net_device *dev = rt->rt6i_dev;
161980f4 421 if (!oif || dev->ifindex == oif)
554cfb7e 422 return 2;
161980f4
DM
423 if ((dev->flags & IFF_LOOPBACK) &&
424 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
425 return 1;
426 return 0;
554cfb7e 427}
1da177e4 428
b6f99a21 429static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 430{
f2c31e32 431 struct neighbour *neigh;
398bcbeb 432 int m;
f2c31e32
ED
433
434 rcu_read_lock();
27217455 435 neigh = dst_get_neighbour_noref(&rt->dst);
4d0c5911
YH
436 if (rt->rt6i_flags & RTF_NONEXTHOP ||
437 !(rt->rt6i_flags & RTF_GATEWAY))
438 m = 1;
439 else if (neigh) {
554cfb7e
YH
440 read_lock_bh(&neigh->lock);
441 if (neigh->nud_state & NUD_VALID)
4d0c5911 442 m = 2;
398bcbeb
YH
443#ifdef CONFIG_IPV6_ROUTER_PREF
444 else if (neigh->nud_state & NUD_FAILED)
445 m = 0;
446#endif
447 else
ea73ee23 448 m = 1;
554cfb7e 449 read_unlock_bh(&neigh->lock);
398bcbeb
YH
450 } else
451 m = 0;
f2c31e32 452 rcu_read_unlock();
554cfb7e 453 return m;
1da177e4
LT
454}
455
554cfb7e
YH
456static int rt6_score_route(struct rt6_info *rt, int oif,
457 int strict)
1da177e4 458{
4d0c5911 459 int m, n;
1ab1457c 460
4d0c5911 461 m = rt6_check_dev(rt, oif);
77d16f45 462 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 463 return -1;
ebacaaa0
YH
464#ifdef CONFIG_IPV6_ROUTER_PREF
465 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
466#endif
4d0c5911 467 n = rt6_check_neigh(rt);
557e92ef 468 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
469 return -1;
470 return m;
471}
472
f11e6659
DM
473static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
474 int *mpri, struct rt6_info *match)
554cfb7e 475{
f11e6659
DM
476 int m;
477
478 if (rt6_check_expired(rt))
479 goto out;
480
481 m = rt6_score_route(rt, oif, strict);
482 if (m < 0)
483 goto out;
484
485 if (m > *mpri) {
486 if (strict & RT6_LOOKUP_F_REACHABLE)
487 rt6_probe(match);
488 *mpri = m;
489 match = rt;
490 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
491 rt6_probe(rt);
492 }
493
494out:
495 return match;
496}
497
498static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
499 struct rt6_info *rr_head,
500 u32 metric, int oif, int strict)
501{
502 struct rt6_info *rt, *match;
554cfb7e 503 int mpri = -1;
1da177e4 504
f11e6659
DM
505 match = NULL;
506 for (rt = rr_head; rt && rt->rt6i_metric == metric;
d8d1f30b 507 rt = rt->dst.rt6_next)
f11e6659
DM
508 match = find_match(rt, oif, strict, &mpri, match);
509 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
d8d1f30b 510 rt = rt->dst.rt6_next)
f11e6659 511 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 512
f11e6659
DM
513 return match;
514}
1da177e4 515
f11e6659
DM
516static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
517{
518 struct rt6_info *match, *rt0;
8ed67789 519 struct net *net;
1da177e4 520
f11e6659 521 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 522 __func__, fn->leaf, oif);
554cfb7e 523
f11e6659
DM
524 rt0 = fn->rr_ptr;
525 if (!rt0)
526 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 527
f11e6659 528 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 529
554cfb7e 530 if (!match &&
f11e6659 531 (strict & RT6_LOOKUP_F_REACHABLE)) {
d8d1f30b 532 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 533
554cfb7e 534 /* no entries matched; do round-robin */
f11e6659
DM
535 if (!next || next->rt6i_metric != rt0->rt6i_metric)
536 next = fn->leaf;
537
538 if (next != rt0)
539 fn->rr_ptr = next;
1da177e4 540 }
1da177e4 541
f11e6659 542 RT6_TRACE("%s() => %p\n",
0dc47877 543 __func__, match);
1da177e4 544
c346dca1 545 net = dev_net(rt0->rt6i_dev);
a02cec21 546 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
547}
548
70ceb4f5
YH
549#ifdef CONFIG_IPV6_ROUTE_INFO
550int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 551 const struct in6_addr *gwaddr)
70ceb4f5 552{
c346dca1 553 struct net *net = dev_net(dev);
70ceb4f5
YH
554 struct route_info *rinfo = (struct route_info *) opt;
555 struct in6_addr prefix_buf, *prefix;
556 unsigned int pref;
4bed72e4 557 unsigned long lifetime;
70ceb4f5
YH
558 struct rt6_info *rt;
559
560 if (len < sizeof(struct route_info)) {
561 return -EINVAL;
562 }
563
564 /* Sanity check for prefix_len and length */
565 if (rinfo->length > 3) {
566 return -EINVAL;
567 } else if (rinfo->prefix_len > 128) {
568 return -EINVAL;
569 } else if (rinfo->prefix_len > 64) {
570 if (rinfo->length < 2) {
571 return -EINVAL;
572 }
573 } else if (rinfo->prefix_len > 0) {
574 if (rinfo->length < 1) {
575 return -EINVAL;
576 }
577 }
578
579 pref = rinfo->route_pref;
580 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 581 return -EINVAL;
70ceb4f5 582
4bed72e4 583 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
584
585 if (rinfo->length == 3)
586 prefix = (struct in6_addr *)rinfo->prefix;
587 else {
588 /* this function is safe */
589 ipv6_addr_prefix(&prefix_buf,
590 (struct in6_addr *)rinfo->prefix,
591 rinfo->prefix_len);
592 prefix = &prefix_buf;
593 }
594
efa2cea0
DL
595 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
596 dev->ifindex);
70ceb4f5
YH
597
598 if (rt && !lifetime) {
e0a1ad73 599 ip6_del_rt(rt);
70ceb4f5
YH
600 rt = NULL;
601 }
602
603 if (!rt && lifetime)
efa2cea0 604 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
605 pref);
606 else if (rt)
607 rt->rt6i_flags = RTF_ROUTEINFO |
608 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
609
610 if (rt) {
4bed72e4 611 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
612 rt->rt6i_flags &= ~RTF_EXPIRES;
613 } else {
614 rt->rt6i_expires = jiffies + HZ * lifetime;
615 rt->rt6i_flags |= RTF_EXPIRES;
616 }
d8d1f30b 617 dst_release(&rt->dst);
70ceb4f5
YH
618 }
619 return 0;
620}
621#endif
622
8ed67789 623#define BACKTRACK(__net, saddr) \
982f56f3 624do { \
8ed67789 625 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 626 struct fib6_node *pn; \
e0eda7bb 627 while (1) { \
982f56f3
YH
628 if (fn->fn_flags & RTN_TL_ROOT) \
629 goto out; \
630 pn = fn->parent; \
631 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 632 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
633 else \
634 fn = pn; \
635 if (fn->fn_flags & RTN_RTINFO) \
636 goto restart; \
c71099ac 637 } \
c71099ac 638 } \
38308473 639} while (0)
c71099ac 640
8ed67789
DL
641static struct rt6_info *ip6_pol_route_lookup(struct net *net,
642 struct fib6_table *table,
4c9483b2 643 struct flowi6 *fl6, int flags)
1da177e4
LT
644{
645 struct fib6_node *fn;
646 struct rt6_info *rt;
647
c71099ac 648 read_lock_bh(&table->tb6_lock);
4c9483b2 649 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
650restart:
651 rt = fn->leaf;
4c9483b2
DM
652 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
653 BACKTRACK(net, &fl6->saddr);
c71099ac 654out:
d8d1f30b 655 dst_use(&rt->dst, jiffies);
c71099ac 656 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
657 return rt;
658
659}
660
9acd9f3a
YH
661struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
662 const struct in6_addr *saddr, int oif, int strict)
c71099ac 663{
4c9483b2
DM
664 struct flowi6 fl6 = {
665 .flowi6_oif = oif,
666 .daddr = *daddr,
c71099ac
TG
667 };
668 struct dst_entry *dst;
77d16f45 669 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 670
adaa70bb 671 if (saddr) {
4c9483b2 672 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
673 flags |= RT6_LOOKUP_F_HAS_SADDR;
674 }
675
4c9483b2 676 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
677 if (dst->error == 0)
678 return (struct rt6_info *) dst;
679
680 dst_release(dst);
681
1da177e4
LT
682 return NULL;
683}
684
7159039a
YH
685EXPORT_SYMBOL(rt6_lookup);
686
c71099ac 687/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
688 It takes new route entry, the addition fails by any reason the
689 route is freed. In any case, if caller does not hold it, it may
690 be destroyed.
691 */
692
86872cb5 693static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
694{
695 int err;
c71099ac 696 struct fib6_table *table;
1da177e4 697
c71099ac
TG
698 table = rt->rt6i_table;
699 write_lock_bh(&table->tb6_lock);
86872cb5 700 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 701 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
702
703 return err;
704}
705
40e22e8f
TG
706int ip6_ins_rt(struct rt6_info *rt)
707{
4d1169c1 708 struct nl_info info = {
c346dca1 709 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 710 };
528c4ceb 711 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
712}
713
21efcfa0
ED
714static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
715 const struct in6_addr *daddr,
b71d1d42 716 const struct in6_addr *saddr)
1da177e4 717{
1da177e4
LT
718 struct rt6_info *rt;
719
720 /*
721 * Clone the route.
722 */
723
21efcfa0 724 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
725
726 if (rt) {
14deae41
DM
727 struct neighbour *neigh;
728 int attempts = !in_softirq();
729
38308473 730 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
58c4fb86 731 if (rt->rt6i_dst.plen != 128 &&
21efcfa0 732 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
58c4fb86 733 rt->rt6i_flags |= RTF_ANYCAST;
4e3fd7a0 734 rt->rt6i_gateway = *daddr;
58c4fb86 735 }
1da177e4 736
1da177e4 737 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
738
739#ifdef CONFIG_IPV6_SUBTREES
740 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 741 rt->rt6i_src.addr = *saddr;
1da177e4
LT
742 rt->rt6i_src.plen = 128;
743 }
744#endif
745
14deae41 746 retry:
04a6f441
DM
747 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway,
748 rt->rt6i_dev);
14deae41
DM
749 if (IS_ERR(neigh)) {
750 struct net *net = dev_net(rt->rt6i_dev);
751 int saved_rt_min_interval =
752 net->ipv6.sysctl.ip6_rt_gc_min_interval;
753 int saved_rt_elasticity =
754 net->ipv6.sysctl.ip6_rt_gc_elasticity;
755
756 if (attempts-- > 0) {
757 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
758 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
759
86393e52 760 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
761
762 net->ipv6.sysctl.ip6_rt_gc_elasticity =
763 saved_rt_elasticity;
764 net->ipv6.sysctl.ip6_rt_gc_min_interval =
765 saved_rt_min_interval;
766 goto retry;
767 }
768
769 if (net_ratelimit())
770 printk(KERN_WARNING
7e1b33e5 771 "ipv6: Neighbour table overflow.\n");
d8d1f30b 772 dst_free(&rt->dst);
14deae41
DM
773 return NULL;
774 }
69cce1d1 775 dst_set_neighbour(&rt->dst, neigh);
1da177e4 776
95a9a5ba 777 }
1da177e4 778
95a9a5ba
YH
779 return rt;
780}
1da177e4 781
21efcfa0
ED
782static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
783 const struct in6_addr *daddr)
299d9939 784{
21efcfa0
ED
785 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
786
299d9939 787 if (rt) {
299d9939 788 rt->rt6i_flags |= RTF_CACHE;
27217455 789 dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
299d9939
YH
790 }
791 return rt;
792}
793
8ed67789 794static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 795 struct flowi6 *fl6, int flags)
1da177e4
LT
796{
797 struct fib6_node *fn;
519fbd87 798 struct rt6_info *rt, *nrt;
c71099ac 799 int strict = 0;
1da177e4 800 int attempts = 3;
519fbd87 801 int err;
53b7997f 802 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 803
77d16f45 804 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
805
806relookup:
c71099ac 807 read_lock_bh(&table->tb6_lock);
1da177e4 808
8238dd06 809restart_2:
4c9483b2 810 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1da177e4
LT
811
812restart:
4acad72d 813 rt = rt6_select(fn, oif, strict | reachable);
8ed67789 814
4c9483b2 815 BACKTRACK(net, &fl6->saddr);
8ed67789 816 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 817 rt->rt6i_flags & RTF_CACHE)
1ddef044 818 goto out;
1da177e4 819
d8d1f30b 820 dst_hold(&rt->dst);
c71099ac 821 read_unlock_bh(&table->tb6_lock);
fb9de91e 822
27217455 823 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
4c9483b2 824 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
7343ff31 825 else if (!(rt->dst.flags & DST_HOST))
4c9483b2 826 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
827 else
828 goto out2;
e40cf353 829
d8d1f30b 830 dst_release(&rt->dst);
8ed67789 831 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 832
d8d1f30b 833 dst_hold(&rt->dst);
519fbd87 834 if (nrt) {
40e22e8f 835 err = ip6_ins_rt(nrt);
519fbd87 836 if (!err)
1da177e4 837 goto out2;
1da177e4 838 }
1da177e4 839
519fbd87
YH
840 if (--attempts <= 0)
841 goto out2;
842
843 /*
c71099ac 844 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
845 * released someone could insert this route. Relookup.
846 */
d8d1f30b 847 dst_release(&rt->dst);
519fbd87
YH
848 goto relookup;
849
850out:
8238dd06
YH
851 if (reachable) {
852 reachable = 0;
853 goto restart_2;
854 }
d8d1f30b 855 dst_hold(&rt->dst);
c71099ac 856 read_unlock_bh(&table->tb6_lock);
1da177e4 857out2:
d8d1f30b
CG
858 rt->dst.lastuse = jiffies;
859 rt->dst.__use++;
c71099ac
TG
860
861 return rt;
1da177e4
LT
862}
863
8ed67789 864static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 865 struct flowi6 *fl6, int flags)
4acad72d 866{
4c9483b2 867 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
868}
869
c71099ac
TG
870void ip6_route_input(struct sk_buff *skb)
871{
b71d1d42 872 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 873 struct net *net = dev_net(skb->dev);
adaa70bb 874 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
875 struct flowi6 fl6 = {
876 .flowi6_iif = skb->dev->ifindex,
877 .daddr = iph->daddr,
878 .saddr = iph->saddr,
38308473 879 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
4c9483b2
DM
880 .flowi6_mark = skb->mark,
881 .flowi6_proto = iph->nexthdr,
c71099ac 882 };
adaa70bb 883
1d6e55f1 884 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 885 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 886
4c9483b2 887 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
c71099ac
TG
888}
889
8ed67789 890static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 891 struct flowi6 *fl6, int flags)
1da177e4 892{
4c9483b2 893 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
894}
895
9c7a4f9c 896struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 897 struct flowi6 *fl6)
c71099ac
TG
898{
899 int flags = 0;
900
4c9483b2 901 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 902 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 903
4c9483b2 904 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 905 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
906 else if (sk)
907 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 908
4c9483b2 909 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4
LT
910}
911
7159039a 912EXPORT_SYMBOL(ip6_route_output);
1da177e4 913
2774c131 914struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 915{
5c1e6aa3 916 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
917 struct dst_entry *new = NULL;
918
5c1e6aa3 919 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
14e50e57 920 if (rt) {
cf911662
DM
921 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
922
d8d1f30b 923 new = &rt->dst;
14e50e57 924
14e50e57 925 new->__use = 1;
352e512c
HX
926 new->input = dst_discard;
927 new->output = dst_discard;
14e50e57 928
21efcfa0
ED
929 if (dst_metrics_read_only(&ort->dst))
930 new->_metrics = ort->dst._metrics;
931 else
932 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
933 rt->rt6i_idev = ort->rt6i_idev;
934 if (rt->rt6i_idev)
935 in6_dev_hold(rt->rt6i_idev);
936 rt->rt6i_expires = 0;
937
4e3fd7a0 938 rt->rt6i_gateway = ort->rt6i_gateway;
14e50e57
DM
939 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
940 rt->rt6i_metric = 0;
941
942 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
943#ifdef CONFIG_IPV6_SUBTREES
944 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
945#endif
946
947 dst_free(new);
948 }
949
69ead7af
DM
950 dst_release(dst_orig);
951 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 952}
14e50e57 953
1da177e4
LT
954/*
955 * Destination cache support functions
956 */
957
958static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
959{
960 struct rt6_info *rt;
961
962 rt = (struct rt6_info *) dst;
963
6431cbc2
DM
964 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
965 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
966 if (!rt->rt6i_peer)
967 rt6_bind_peer(rt, 0);
968 rt->rt6i_peer_genid = rt6_peer_genid();
969 }
1da177e4 970 return dst;
6431cbc2 971 }
1da177e4
LT
972 return NULL;
973}
974
975static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
976{
977 struct rt6_info *rt = (struct rt6_info *) dst;
978
979 if (rt) {
54c1a859
YH
980 if (rt->rt6i_flags & RTF_CACHE) {
981 if (rt6_check_expired(rt)) {
982 ip6_del_rt(rt);
983 dst = NULL;
984 }
985 } else {
1da177e4 986 dst_release(dst);
54c1a859
YH
987 dst = NULL;
988 }
1da177e4 989 }
54c1a859 990 return dst;
1da177e4
LT
991}
992
993static void ip6_link_failure(struct sk_buff *skb)
994{
995 struct rt6_info *rt;
996
3ffe533c 997 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 998
adf30907 999 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1000 if (rt) {
38308473 1001 if (rt->rt6i_flags & RTF_CACHE) {
d8d1f30b 1002 dst_set_expires(&rt->dst, 0);
1da177e4
LT
1003 rt->rt6i_flags |= RTF_EXPIRES;
1004 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1005 rt->rt6i_node->fn_sernum = -1;
1006 }
1007}
1008
1009static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1010{
1011 struct rt6_info *rt6 = (struct rt6_info*)dst;
1012
1013 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1014 rt6->rt6i_flags |= RTF_MODIFIED;
1015 if (mtu < IPV6_MIN_MTU) {
defb3519 1016 u32 features = dst_metric(dst, RTAX_FEATURES);
1da177e4 1017 mtu = IPV6_MIN_MTU;
defb3519
DM
1018 features |= RTAX_FEATURE_ALLFRAG;
1019 dst_metric_set(dst, RTAX_FEATURES, features);
1da177e4 1020 }
defb3519 1021 dst_metric_set(dst, RTAX_MTU, mtu);
1da177e4
LT
1022 }
1023}
1024
0dbaee3b 1025static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1026{
0dbaee3b
DM
1027 struct net_device *dev = dst->dev;
1028 unsigned int mtu = dst_mtu(dst);
1029 struct net *net = dev_net(dev);
1030
1da177e4
LT
1031 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1032
5578689a
DL
1033 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1034 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1035
1036 /*
1ab1457c
YH
1037 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1038 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1039 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1040 * rely only on pmtu discovery"
1041 */
1042 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1043 mtu = IPV6_MAXPLEN;
1044 return mtu;
1045}
1046
ebb762f2 1047static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1048{
d33e4553 1049 struct inet6_dev *idev;
618f9bc7
SK
1050 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1051
1052 if (mtu)
1053 return mtu;
1054
1055 mtu = IPV6_MIN_MTU;
d33e4553
DM
1056
1057 rcu_read_lock();
1058 idev = __in6_dev_get(dst->dev);
1059 if (idev)
1060 mtu = idev->cnf.mtu6;
1061 rcu_read_unlock();
1062
1063 return mtu;
1064}
1065
3b00944c
YH
1066static struct dst_entry *icmp6_dst_gc_list;
1067static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1068
3b00944c 1069struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 1070 struct neighbour *neigh,
9acd9f3a 1071 const struct in6_addr *addr)
1da177e4
LT
1072{
1073 struct rt6_info *rt;
1074 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1075 struct net *net = dev_net(dev);
1da177e4 1076
38308473 1077 if (unlikely(!idev))
1da177e4
LT
1078 return NULL;
1079
957c665f 1080 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0);
38308473 1081 if (unlikely(!rt)) {
1da177e4
LT
1082 in6_dev_put(idev);
1083 goto out;
1084 }
1085
1da177e4
LT
1086 if (neigh)
1087 neigh_hold(neigh);
14deae41 1088 else {
04a6f441 1089 neigh = __neigh_lookup_errno(&nd_tbl, addr, dev);
14deae41
DM
1090 if (IS_ERR(neigh))
1091 neigh = NULL;
1092 }
1da177e4 1093
8e2ec639
YZ
1094 rt->dst.flags |= DST_HOST;
1095 rt->dst.output = ip6_output;
69cce1d1 1096 dst_set_neighbour(&rt->dst, neigh);
d8d1f30b 1097 atomic_set(&rt->dst.__refcnt, 1);
4e3fd7a0 1098 rt->rt6i_dst.addr = *addr;
8e2ec639
YZ
1099 rt->rt6i_dst.plen = 128;
1100 rt->rt6i_idev = idev;
7011687f 1101 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1da177e4 1102
3b00944c 1103 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1104 rt->dst.next = icmp6_dst_gc_list;
1105 icmp6_dst_gc_list = &rt->dst;
3b00944c 1106 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1107
5578689a 1108 fib6_force_start_gc(net);
1da177e4
LT
1109
1110out:
d8d1f30b 1111 return &rt->dst;
1da177e4
LT
1112}
1113
3d0f24a7 1114int icmp6_dst_gc(void)
1da177e4 1115{
e9476e95 1116 struct dst_entry *dst, **pprev;
3d0f24a7 1117 int more = 0;
1da177e4 1118
3b00944c
YH
1119 spin_lock_bh(&icmp6_dst_lock);
1120 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1121
1da177e4
LT
1122 while ((dst = *pprev) != NULL) {
1123 if (!atomic_read(&dst->__refcnt)) {
1124 *pprev = dst->next;
1125 dst_free(dst);
1da177e4
LT
1126 } else {
1127 pprev = &dst->next;
3d0f24a7 1128 ++more;
1da177e4
LT
1129 }
1130 }
1131
3b00944c 1132 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1133
3d0f24a7 1134 return more;
1da177e4
LT
1135}
1136
1e493d19
DM
1137static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1138 void *arg)
1139{
1140 struct dst_entry *dst, **pprev;
1141
1142 spin_lock_bh(&icmp6_dst_lock);
1143 pprev = &icmp6_dst_gc_list;
1144 while ((dst = *pprev) != NULL) {
1145 struct rt6_info *rt = (struct rt6_info *) dst;
1146 if (func(rt, arg)) {
1147 *pprev = dst->next;
1148 dst_free(dst);
1149 } else {
1150 pprev = &dst->next;
1151 }
1152 }
1153 spin_unlock_bh(&icmp6_dst_lock);
1154}
1155
569d3645 1156static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1157{
1da177e4 1158 unsigned long now = jiffies;
86393e52 1159 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1160 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1161 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1162 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1163 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1164 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1165 int entries;
7019b78e 1166
fc66f95c 1167 entries = dst_entries_get_fast(ops);
7019b78e 1168 if (time_after(rt_last_gc + rt_min_interval, now) &&
fc66f95c 1169 entries <= rt_max_size)
1da177e4
LT
1170 goto out;
1171
6891a346
BT
1172 net->ipv6.ip6_rt_gc_expire++;
1173 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1174 net->ipv6.ip6_rt_last_gc = now;
fc66f95c
ED
1175 entries = dst_entries_get_slow(ops);
1176 if (entries < ops->gc_thresh)
7019b78e 1177 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1178out:
7019b78e 1179 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1180 return entries > rt_max_size;
1da177e4
LT
1181}
1182
1183/* Clean host part of a prefix. Not necessary in radix tree,
1184 but results in cleaner routing tables.
1185
1186 Remove it only when all the things will work!
1187 */
1188
6b75d090 1189int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1190{
5170ae82 1191 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
a02e4b7d 1192 if (hoplimit == 0) {
6b75d090 1193 struct net_device *dev = dst->dev;
c68f24cc
ED
1194 struct inet6_dev *idev;
1195
1196 rcu_read_lock();
1197 idev = __in6_dev_get(dev);
1198 if (idev)
6b75d090 1199 hoplimit = idev->cnf.hop_limit;
c68f24cc 1200 else
53b7997f 1201 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
c68f24cc 1202 rcu_read_unlock();
1da177e4
LT
1203 }
1204 return hoplimit;
1205}
abbf46ae 1206EXPORT_SYMBOL(ip6_dst_hoplimit);
1da177e4
LT
1207
1208/*
1209 *
1210 */
1211
86872cb5 1212int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1213{
1214 int err;
5578689a 1215 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1216 struct rt6_info *rt = NULL;
1217 struct net_device *dev = NULL;
1218 struct inet6_dev *idev = NULL;
c71099ac 1219 struct fib6_table *table;
1da177e4
LT
1220 int addr_type;
1221
86872cb5 1222 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1223 return -EINVAL;
1224#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1225 if (cfg->fc_src_len)
1da177e4
LT
1226 return -EINVAL;
1227#endif
86872cb5 1228 if (cfg->fc_ifindex) {
1da177e4 1229 err = -ENODEV;
5578689a 1230 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1231 if (!dev)
1232 goto out;
1233 idev = in6_dev_get(dev);
1234 if (!idev)
1235 goto out;
1236 }
1237
86872cb5
TG
1238 if (cfg->fc_metric == 0)
1239 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1240
d71314b4 1241 err = -ENOBUFS;
38308473
DM
1242 if (cfg->fc_nlinfo.nlh &&
1243 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1244 table = fib6_get_table(net, cfg->fc_table);
38308473 1245 if (!table) {
d71314b4
MV
1246 printk(KERN_WARNING "IPv6: NLM_F_CREATE should be specified when creating new route\n");
1247 table = fib6_new_table(net, cfg->fc_table);
1248 }
1249 } else {
1250 table = fib6_new_table(net, cfg->fc_table);
1251 }
38308473
DM
1252
1253 if (!table)
c71099ac 1254 goto out;
c71099ac 1255
957c665f 1256 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops, NULL, DST_NOCOUNT);
1da177e4 1257
38308473 1258 if (!rt) {
1da177e4
LT
1259 err = -ENOMEM;
1260 goto out;
1261 }
1262
d8d1f30b 1263 rt->dst.obsolete = -1;
6f704992
YH
1264 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1265 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1266 0;
1da177e4 1267
86872cb5
TG
1268 if (cfg->fc_protocol == RTPROT_UNSPEC)
1269 cfg->fc_protocol = RTPROT_BOOT;
1270 rt->rt6i_protocol = cfg->fc_protocol;
1271
1272 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1273
1274 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1275 rt->dst.input = ip6_mc_input;
ab79ad14
1276 else if (cfg->fc_flags & RTF_LOCAL)
1277 rt->dst.input = ip6_input;
1da177e4 1278 else
d8d1f30b 1279 rt->dst.input = ip6_forward;
1da177e4 1280
d8d1f30b 1281 rt->dst.output = ip6_output;
1da177e4 1282
86872cb5
TG
1283 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1284 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4 1285 if (rt->rt6i_dst.plen == 128)
11d53b49 1286 rt->dst.flags |= DST_HOST;
1da177e4 1287
8e2ec639
YZ
1288 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1289 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1290 if (!metrics) {
1291 err = -ENOMEM;
1292 goto out;
1293 }
1294 dst_init_metrics(&rt->dst, metrics, 0);
1295 }
1da177e4 1296#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1297 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1298 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1299#endif
1300
86872cb5 1301 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1302
1303 /* We cannot add true routes via loopback here,
1304 they would result in kernel looping; promote them to reject routes
1305 */
86872cb5 1306 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1307 (dev && (dev->flags & IFF_LOOPBACK) &&
1308 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1309 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1310 /* hold loopback dev/idev if we haven't done so. */
5578689a 1311 if (dev != net->loopback_dev) {
1da177e4
LT
1312 if (dev) {
1313 dev_put(dev);
1314 in6_dev_put(idev);
1315 }
5578689a 1316 dev = net->loopback_dev;
1da177e4
LT
1317 dev_hold(dev);
1318 idev = in6_dev_get(dev);
1319 if (!idev) {
1320 err = -ENODEV;
1321 goto out;
1322 }
1323 }
d8d1f30b
CG
1324 rt->dst.output = ip6_pkt_discard_out;
1325 rt->dst.input = ip6_pkt_discard;
1326 rt->dst.error = -ENETUNREACH;
1da177e4
LT
1327 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1328 goto install_route;
1329 }
1330
86872cb5 1331 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1332 const struct in6_addr *gw_addr;
1da177e4
LT
1333 int gwa_type;
1334
86872cb5 1335 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1336 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1337 gwa_type = ipv6_addr_type(gw_addr);
1338
1339 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1340 struct rt6_info *grt;
1341
1342 /* IPv6 strictly inhibits using not link-local
1343 addresses as nexthop address.
1344 Otherwise, router will not able to send redirects.
1345 It is very good, but in some (rare!) circumstances
1346 (SIT, PtP, NBMA NOARP links) it is handy to allow
1347 some exceptions. --ANK
1348 */
1349 err = -EINVAL;
38308473 1350 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1351 goto out;
1352
5578689a 1353 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1354
1355 err = -EHOSTUNREACH;
38308473 1356 if (!grt)
1da177e4
LT
1357 goto out;
1358 if (dev) {
1359 if (dev != grt->rt6i_dev) {
d8d1f30b 1360 dst_release(&grt->dst);
1da177e4
LT
1361 goto out;
1362 }
1363 } else {
1364 dev = grt->rt6i_dev;
1365 idev = grt->rt6i_idev;
1366 dev_hold(dev);
1367 in6_dev_hold(grt->rt6i_idev);
1368 }
38308473 1369 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1370 err = 0;
d8d1f30b 1371 dst_release(&grt->dst);
1da177e4
LT
1372
1373 if (err)
1374 goto out;
1375 }
1376 err = -EINVAL;
38308473 1377 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1378 goto out;
1379 }
1380
1381 err = -ENODEV;
38308473 1382 if (!dev)
1da177e4
LT
1383 goto out;
1384
c3968a85
DW
1385 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1386 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1387 err = -EINVAL;
1388 goto out;
1389 }
4e3fd7a0 1390 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1391 rt->rt6i_prefsrc.plen = 128;
1392 } else
1393 rt->rt6i_prefsrc.plen = 0;
1394
86872cb5 1395 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
69cce1d1
DM
1396 struct neighbour *n = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1397 if (IS_ERR(n)) {
1398 err = PTR_ERR(n);
1da177e4
LT
1399 goto out;
1400 }
69cce1d1 1401 dst_set_neighbour(&rt->dst, n);
1da177e4
LT
1402 }
1403
86872cb5 1404 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1405
1406install_route:
86872cb5
TG
1407 if (cfg->fc_mx) {
1408 struct nlattr *nla;
1409 int remaining;
1410
1411 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1412 int type = nla_type(nla);
86872cb5
TG
1413
1414 if (type) {
1415 if (type > RTAX_MAX) {
1da177e4
LT
1416 err = -EINVAL;
1417 goto out;
1418 }
86872cb5 1419
defb3519 1420 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1da177e4 1421 }
1da177e4
LT
1422 }
1423 }
1424
d8d1f30b 1425 rt->dst.dev = dev;
1da177e4 1426 rt->rt6i_idev = idev;
c71099ac 1427 rt->rt6i_table = table;
63152fc0 1428
c346dca1 1429 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1430
86872cb5 1431 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1432
1433out:
1434 if (dev)
1435 dev_put(dev);
1436 if (idev)
1437 in6_dev_put(idev);
1438 if (rt)
d8d1f30b 1439 dst_free(&rt->dst);
1da177e4
LT
1440 return err;
1441}
1442
86872cb5 1443static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1444{
1445 int err;
c71099ac 1446 struct fib6_table *table;
c346dca1 1447 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1448
8ed67789 1449 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1450 return -ENOENT;
1451
c71099ac
TG
1452 table = rt->rt6i_table;
1453 write_lock_bh(&table->tb6_lock);
1da177e4 1454
86872cb5 1455 err = fib6_del(rt, info);
d8d1f30b 1456 dst_release(&rt->dst);
1da177e4 1457
c71099ac 1458 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1459
1460 return err;
1461}
1462
e0a1ad73
TG
1463int ip6_del_rt(struct rt6_info *rt)
1464{
4d1169c1 1465 struct nl_info info = {
c346dca1 1466 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1467 };
528c4ceb 1468 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1469}
1470
86872cb5 1471static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1472{
c71099ac 1473 struct fib6_table *table;
1da177e4
LT
1474 struct fib6_node *fn;
1475 struct rt6_info *rt;
1476 int err = -ESRCH;
1477
5578689a 1478 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1479 if (!table)
c71099ac
TG
1480 return err;
1481
1482 read_lock_bh(&table->tb6_lock);
1da177e4 1483
c71099ac 1484 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1485 &cfg->fc_dst, cfg->fc_dst_len,
1486 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1487
1da177e4 1488 if (fn) {
d8d1f30b 1489 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
86872cb5 1490 if (cfg->fc_ifindex &&
38308473 1491 (!rt->rt6i_dev ||
86872cb5 1492 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1493 continue;
86872cb5
TG
1494 if (cfg->fc_flags & RTF_GATEWAY &&
1495 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1496 continue;
86872cb5 1497 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1498 continue;
d8d1f30b 1499 dst_hold(&rt->dst);
c71099ac 1500 read_unlock_bh(&table->tb6_lock);
1da177e4 1501
86872cb5 1502 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1503 }
1504 }
c71099ac 1505 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1506
1507 return err;
1508}
1509
1510/*
1511 * Handle redirects
1512 */
a6279458 1513struct ip6rd_flowi {
4c9483b2 1514 struct flowi6 fl6;
a6279458
YH
1515 struct in6_addr gateway;
1516};
1517
8ed67789
DL
1518static struct rt6_info *__ip6_route_redirect(struct net *net,
1519 struct fib6_table *table,
4c9483b2 1520 struct flowi6 *fl6,
a6279458 1521 int flags)
1da177e4 1522{
4c9483b2 1523 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
a6279458 1524 struct rt6_info *rt;
e843b9e1 1525 struct fib6_node *fn;
c71099ac 1526
1da177e4 1527 /*
e843b9e1
YH
1528 * Get the "current" route for this destination and
1529 * check if the redirect has come from approriate router.
1530 *
1531 * RFC 2461 specifies that redirects should only be
1532 * accepted if they come from the nexthop to the target.
1533 * Due to the way the routes are chosen, this notion
1534 * is a bit fuzzy and one might need to check all possible
1535 * routes.
1da177e4 1536 */
1da177e4 1537
c71099ac 1538 read_lock_bh(&table->tb6_lock);
4c9483b2 1539 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
e843b9e1 1540restart:
d8d1f30b 1541 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
e843b9e1
YH
1542 /*
1543 * Current route is on-link; redirect is always invalid.
1544 *
1545 * Seems, previous statement is not true. It could
1546 * be node, which looks for us as on-link (f.e. proxy ndisc)
1547 * But then router serving it might decide, that we should
1548 * know truth 8)8) --ANK (980726).
1549 */
1550 if (rt6_check_expired(rt))
1551 continue;
1552 if (!(rt->rt6i_flags & RTF_GATEWAY))
1553 continue;
4c9483b2 1554 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
e843b9e1 1555 continue;
a6279458 1556 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1557 continue;
1558 break;
1559 }
a6279458 1560
cb15d9c2 1561 if (!rt)
8ed67789 1562 rt = net->ipv6.ip6_null_entry;
4c9483b2 1563 BACKTRACK(net, &fl6->saddr);
cb15d9c2 1564out:
d8d1f30b 1565 dst_hold(&rt->dst);
a6279458 1566
c71099ac 1567 read_unlock_bh(&table->tb6_lock);
e843b9e1 1568
a6279458
YH
1569 return rt;
1570};
1571
b71d1d42
ED
1572static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1573 const struct in6_addr *src,
1574 const struct in6_addr *gateway,
a6279458
YH
1575 struct net_device *dev)
1576{
adaa70bb 1577 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1578 struct net *net = dev_net(dev);
a6279458 1579 struct ip6rd_flowi rdfl = {
4c9483b2
DM
1580 .fl6 = {
1581 .flowi6_oif = dev->ifindex,
1582 .daddr = *dest,
1583 .saddr = *src,
a6279458 1584 },
a6279458 1585 };
adaa70bb 1586
4e3fd7a0 1587 rdfl.gateway = *gateway;
86c36ce4 1588
adaa70bb
TG
1589 if (rt6_need_strict(dest))
1590 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1591
4c9483b2 1592 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
58f09b78 1593 flags, __ip6_route_redirect);
a6279458
YH
1594}
1595
b71d1d42
ED
1596void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1597 const struct in6_addr *saddr,
a6279458
YH
1598 struct neighbour *neigh, u8 *lladdr, int on_link)
1599{
1600 struct rt6_info *rt, *nrt = NULL;
1601 struct netevent_redirect netevent;
c346dca1 1602 struct net *net = dev_net(neigh->dev);
a6279458
YH
1603
1604 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1605
8ed67789 1606 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1607 if (net_ratelimit())
1608 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1609 "for redirect target\n");
a6279458 1610 goto out;
1da177e4
LT
1611 }
1612
1da177e4
LT
1613 /*
1614 * We have finally decided to accept it.
1615 */
1616
1ab1457c 1617 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1618 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1619 NEIGH_UPDATE_F_OVERRIDE|
1620 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1621 NEIGH_UPDATE_F_ISROUTER))
1622 );
1623
1624 /*
1625 * Redirect received -> path was valid.
1626 * Look, redirects are sent only in response to data packets,
1627 * so that this nexthop apparently is reachable. --ANK
1628 */
d8d1f30b 1629 dst_confirm(&rt->dst);
1da177e4
LT
1630
1631 /* Duplicate redirect: silently ignore. */
27217455 1632 if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
1da177e4
LT
1633 goto out;
1634
21efcfa0 1635 nrt = ip6_rt_copy(rt, dest);
38308473 1636 if (!nrt)
1da177e4
LT
1637 goto out;
1638
1639 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1640 if (on_link)
1641 nrt->rt6i_flags &= ~RTF_GATEWAY;
1642
4e3fd7a0 1643 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
69cce1d1 1644 dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
1da177e4 1645
40e22e8f 1646 if (ip6_ins_rt(nrt))
1da177e4
LT
1647 goto out;
1648
d8d1f30b
CG
1649 netevent.old = &rt->dst;
1650 netevent.new = &nrt->dst;
8d71740c
TT
1651 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1652
38308473 1653 if (rt->rt6i_flags & RTF_CACHE) {
e0a1ad73 1654 ip6_del_rt(rt);
1da177e4
LT
1655 return;
1656 }
1657
1658out:
d8d1f30b 1659 dst_release(&rt->dst);
1da177e4
LT
1660}
1661
1662/*
1663 * Handle ICMP "packet too big" messages
1664 * i.e. Path MTU discovery
1665 */
1666
b71d1d42 1667static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2 1668 struct net *net, u32 pmtu, int ifindex)
1da177e4
LT
1669{
1670 struct rt6_info *rt, *nrt;
1671 int allfrag = 0;
d3052b55 1672again:
ae878ae2 1673 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
38308473 1674 if (!rt)
1da177e4
LT
1675 return;
1676
d3052b55
AV
1677 if (rt6_check_expired(rt)) {
1678 ip6_del_rt(rt);
1679 goto again;
1680 }
1681
d8d1f30b 1682 if (pmtu >= dst_mtu(&rt->dst))
1da177e4
LT
1683 goto out;
1684
1685 if (pmtu < IPV6_MIN_MTU) {
1686 /*
1ab1457c 1687 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1688 * MTU (1280) and a fragment header should always be included
1689 * after a node receiving Too Big message reporting PMTU is
1690 * less than the IPv6 Minimum Link MTU.
1691 */
1692 pmtu = IPV6_MIN_MTU;
1693 allfrag = 1;
1694 }
1695
1696 /* New mtu received -> path was valid.
1697 They are sent only in response to data packets,
1698 so that this nexthop apparently is reachable. --ANK
1699 */
d8d1f30b 1700 dst_confirm(&rt->dst);
1da177e4
LT
1701
1702 /* Host route. If it is static, it would be better
1703 not to override it, but add new one, so that
1704 when cache entry will expire old pmtu
1705 would return automatically.
1706 */
1707 if (rt->rt6i_flags & RTF_CACHE) {
defb3519
DM
1708 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1709 if (allfrag) {
1710 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1711 features |= RTAX_FEATURE_ALLFRAG;
1712 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1713 }
d8d1f30b 1714 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1715 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1716 goto out;
1717 }
1718
1719 /* Network route.
1720 Two cases are possible:
1721 1. It is connected route. Action: COW
1722 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1723 */
27217455 1724 if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1725 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1726 else
1727 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1728
d5315b50 1729 if (nrt) {
defb3519
DM
1730 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1731 if (allfrag) {
1732 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1733 features |= RTAX_FEATURE_ALLFRAG;
1734 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1735 }
a1e78363
YH
1736
1737 /* According to RFC 1981, detecting PMTU increase shouldn't be
1738 * happened within 5 mins, the recommended timer is 10 mins.
1739 * Here this route expiration time is set to ip6_rt_mtu_expires
1740 * which is 10 mins. After 10 mins the decreased pmtu is expired
1741 * and detecting PMTU increase will be automatically happened.
1742 */
d8d1f30b 1743 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1744 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1745
40e22e8f 1746 ip6_ins_rt(nrt);
1da177e4 1747 }
1da177e4 1748out:
d8d1f30b 1749 dst_release(&rt->dst);
1da177e4
LT
1750}
1751
b71d1d42 1752void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr,
ae878ae2
1753 struct net_device *dev, u32 pmtu)
1754{
1755 struct net *net = dev_net(dev);
1756
1757 /*
1758 * RFC 1981 states that a node "MUST reduce the size of the packets it
1759 * is sending along the path" that caused the Packet Too Big message.
1760 * Since it's not possible in the general case to determine which
1761 * interface was used to send the original packet, we update the MTU
1762 * on the interface that will be used to send future packets. We also
1763 * update the MTU on the interface that received the Packet Too Big in
1764 * case the original packet was forced out that interface with
1765 * SO_BINDTODEVICE or similar. This is the next best thing to the
1766 * correct behaviour, which would be to update the MTU on all
1767 * interfaces.
1768 */
1769 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1770 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1771}
1772
1da177e4
LT
1773/*
1774 * Misc support functions
1775 */
1776
21efcfa0
ED
1777static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
1778 const struct in6_addr *dest)
1da177e4 1779{
c346dca1 1780 struct net *net = dev_net(ort->rt6i_dev);
5c1e6aa3 1781 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 1782 ort->dst.dev, 0);
1da177e4
LT
1783
1784 if (rt) {
d8d1f30b
CG
1785 rt->dst.input = ort->dst.input;
1786 rt->dst.output = ort->dst.output;
8e2ec639 1787 rt->dst.flags |= DST_HOST;
d8d1f30b 1788
4e3fd7a0 1789 rt->rt6i_dst.addr = *dest;
8e2ec639 1790 rt->rt6i_dst.plen = 128;
defb3519 1791 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1792 rt->dst.error = ort->dst.error;
1da177e4
LT
1793 rt->rt6i_idev = ort->rt6i_idev;
1794 if (rt->rt6i_idev)
1795 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1796 rt->dst.lastuse = jiffies;
1da177e4
LT
1797 rt->rt6i_expires = 0;
1798
4e3fd7a0 1799 rt->rt6i_gateway = ort->rt6i_gateway;
1da177e4
LT
1800 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1801 rt->rt6i_metric = 0;
1802
1da177e4
LT
1803#ifdef CONFIG_IPV6_SUBTREES
1804 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1805#endif
0f6c6392 1806 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1807 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1808 }
1809 return rt;
1810}
1811
70ceb4f5 1812#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1813static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1814 const struct in6_addr *prefix, int prefixlen,
1815 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1816{
1817 struct fib6_node *fn;
1818 struct rt6_info *rt = NULL;
c71099ac
TG
1819 struct fib6_table *table;
1820
efa2cea0 1821 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1822 if (!table)
c71099ac 1823 return NULL;
70ceb4f5 1824
c71099ac
TG
1825 write_lock_bh(&table->tb6_lock);
1826 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1827 if (!fn)
1828 goto out;
1829
d8d1f30b 1830 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
70ceb4f5
YH
1831 if (rt->rt6i_dev->ifindex != ifindex)
1832 continue;
1833 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1834 continue;
1835 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1836 continue;
d8d1f30b 1837 dst_hold(&rt->dst);
70ceb4f5
YH
1838 break;
1839 }
1840out:
c71099ac 1841 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1842 return rt;
1843}
1844
efa2cea0 1845static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1846 const struct in6_addr *prefix, int prefixlen,
1847 const struct in6_addr *gwaddr, int ifindex,
70ceb4f5
YH
1848 unsigned pref)
1849{
86872cb5
TG
1850 struct fib6_config cfg = {
1851 .fc_table = RT6_TABLE_INFO,
238fc7ea 1852 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1853 .fc_ifindex = ifindex,
1854 .fc_dst_len = prefixlen,
1855 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1856 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1857 .fc_nlinfo.pid = 0,
1858 .fc_nlinfo.nlh = NULL,
1859 .fc_nlinfo.nl_net = net,
86872cb5
TG
1860 };
1861
4e3fd7a0
AD
1862 cfg.fc_dst = *prefix;
1863 cfg.fc_gateway = *gwaddr;
70ceb4f5 1864
e317da96
YH
1865 /* We should treat it as a default route if prefix length is 0. */
1866 if (!prefixlen)
86872cb5 1867 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1868
86872cb5 1869 ip6_route_add(&cfg);
70ceb4f5 1870
efa2cea0 1871 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1872}
1873#endif
1874
b71d1d42 1875struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 1876{
1da177e4 1877 struct rt6_info *rt;
c71099ac 1878 struct fib6_table *table;
1da177e4 1879
c346dca1 1880 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 1881 if (!table)
c71099ac 1882 return NULL;
1da177e4 1883
c71099ac 1884 write_lock_bh(&table->tb6_lock);
d8d1f30b 1885 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1da177e4 1886 if (dev == rt->rt6i_dev &&
045927ff 1887 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1888 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1889 break;
1890 }
1891 if (rt)
d8d1f30b 1892 dst_hold(&rt->dst);
c71099ac 1893 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1894 return rt;
1895}
1896
b71d1d42 1897struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
1898 struct net_device *dev,
1899 unsigned int pref)
1da177e4 1900{
86872cb5
TG
1901 struct fib6_config cfg = {
1902 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1903 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1904 .fc_ifindex = dev->ifindex,
1905 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1906 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1907 .fc_nlinfo.pid = 0,
1908 .fc_nlinfo.nlh = NULL,
c346dca1 1909 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1910 };
1da177e4 1911
4e3fd7a0 1912 cfg.fc_gateway = *gwaddr;
1da177e4 1913
86872cb5 1914 ip6_route_add(&cfg);
1da177e4 1915
1da177e4
LT
1916 return rt6_get_dflt_router(gwaddr, dev);
1917}
1918
7b4da532 1919void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1920{
1921 struct rt6_info *rt;
c71099ac
TG
1922 struct fib6_table *table;
1923
1924 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1925 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 1926 if (!table)
c71099ac 1927 return;
1da177e4
LT
1928
1929restart:
c71099ac 1930 read_lock_bh(&table->tb6_lock);
d8d1f30b 1931 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1da177e4 1932 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
d8d1f30b 1933 dst_hold(&rt->dst);
c71099ac 1934 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1935 ip6_del_rt(rt);
1da177e4
LT
1936 goto restart;
1937 }
1938 }
c71099ac 1939 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1940}
1941
5578689a
DL
1942static void rtmsg_to_fib6_config(struct net *net,
1943 struct in6_rtmsg *rtmsg,
86872cb5
TG
1944 struct fib6_config *cfg)
1945{
1946 memset(cfg, 0, sizeof(*cfg));
1947
1948 cfg->fc_table = RT6_TABLE_MAIN;
1949 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1950 cfg->fc_metric = rtmsg->rtmsg_metric;
1951 cfg->fc_expires = rtmsg->rtmsg_info;
1952 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1953 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1954 cfg->fc_flags = rtmsg->rtmsg_flags;
1955
5578689a 1956 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1957
4e3fd7a0
AD
1958 cfg->fc_dst = rtmsg->rtmsg_dst;
1959 cfg->fc_src = rtmsg->rtmsg_src;
1960 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
1961}
1962
5578689a 1963int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1964{
86872cb5 1965 struct fib6_config cfg;
1da177e4
LT
1966 struct in6_rtmsg rtmsg;
1967 int err;
1968
1969 switch(cmd) {
1970 case SIOCADDRT: /* Add a route */
1971 case SIOCDELRT: /* Delete a route */
1972 if (!capable(CAP_NET_ADMIN))
1973 return -EPERM;
1974 err = copy_from_user(&rtmsg, arg,
1975 sizeof(struct in6_rtmsg));
1976 if (err)
1977 return -EFAULT;
86872cb5 1978
5578689a 1979 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1980
1da177e4
LT
1981 rtnl_lock();
1982 switch (cmd) {
1983 case SIOCADDRT:
86872cb5 1984 err = ip6_route_add(&cfg);
1da177e4
LT
1985 break;
1986 case SIOCDELRT:
86872cb5 1987 err = ip6_route_del(&cfg);
1da177e4
LT
1988 break;
1989 default:
1990 err = -EINVAL;
1991 }
1992 rtnl_unlock();
1993
1994 return err;
3ff50b79 1995 }
1da177e4
LT
1996
1997 return -EINVAL;
1998}
1999
2000/*
2001 * Drop the packet on the floor
2002 */
2003
d5fdd6ba 2004static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2005{
612f09e8 2006 int type;
adf30907 2007 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2008 switch (ipstats_mib_noroutes) {
2009 case IPSTATS_MIB_INNOROUTES:
0660e03f 2010 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2011 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2012 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2013 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2014 break;
2015 }
2016 /* FALLTHROUGH */
2017 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2018 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2019 ipstats_mib_noroutes);
612f09e8
YH
2020 break;
2021 }
3ffe533c 2022 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2023 kfree_skb(skb);
2024 return 0;
2025}
2026
9ce8ade0
TG
2027static int ip6_pkt_discard(struct sk_buff *skb)
2028{
612f09e8 2029 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2030}
2031
20380731 2032static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 2033{
adf30907 2034 skb->dev = skb_dst(skb)->dev;
612f09e8 2035 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2036}
2037
6723ab54
DM
2038#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2039
9ce8ade0
TG
2040static int ip6_pkt_prohibit(struct sk_buff *skb)
2041{
612f09e8 2042 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2043}
2044
2045static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2046{
adf30907 2047 skb->dev = skb_dst(skb)->dev;
612f09e8 2048 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2049}
2050
6723ab54
DM
2051#endif
2052
1da177e4
LT
2053/*
2054 * Allocate a dst for local (unicast / anycast) address.
2055 */
2056
2057struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2058 const struct in6_addr *addr,
2059 int anycast)
2060{
c346dca1 2061 struct net *net = dev_net(idev->dev);
5c1e6aa3 2062 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops,
957c665f 2063 net->loopback_dev, 0);
14deae41 2064 struct neighbour *neigh;
1da177e4 2065
38308473 2066 if (!rt) {
40385653
BG
2067 if (net_ratelimit())
2068 pr_warning("IPv6: Maximum number of routes reached,"
2069 " consider increasing route/max_size.\n");
1da177e4 2070 return ERR_PTR(-ENOMEM);
40385653 2071 }
1da177e4 2072
1da177e4
LT
2073 in6_dev_hold(idev);
2074
11d53b49 2075 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2076 rt->dst.input = ip6_input;
2077 rt->dst.output = ip6_output;
1da177e4 2078 rt->rt6i_idev = idev;
d8d1f30b 2079 rt->dst.obsolete = -1;
1da177e4
LT
2080
2081 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2082 if (anycast)
2083 rt->rt6i_flags |= RTF_ANYCAST;
2084 else
1da177e4 2085 rt->rt6i_flags |= RTF_LOCAL;
04a6f441 2086 neigh = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, rt->rt6i_dev);
14deae41 2087 if (IS_ERR(neigh)) {
d8d1f30b 2088 dst_free(&rt->dst);
14deae41 2089
29546a64 2090 return ERR_CAST(neigh);
1da177e4 2091 }
69cce1d1 2092 dst_set_neighbour(&rt->dst, neigh);
1da177e4 2093
4e3fd7a0 2094 rt->rt6i_dst.addr = *addr;
1da177e4 2095 rt->rt6i_dst.plen = 128;
5578689a 2096 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2097
d8d1f30b 2098 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2099
2100 return rt;
2101}
2102
c3968a85
DW
2103int ip6_route_get_saddr(struct net *net,
2104 struct rt6_info *rt,
b71d1d42 2105 const struct in6_addr *daddr,
c3968a85
DW
2106 unsigned int prefs,
2107 struct in6_addr *saddr)
2108{
2109 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2110 int err = 0;
2111 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2112 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2113 else
2114 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2115 daddr, prefs, saddr);
2116 return err;
2117}
2118
2119/* remove deleted ip from prefsrc entries */
2120struct arg_dev_net_ip {
2121 struct net_device *dev;
2122 struct net *net;
2123 struct in6_addr *addr;
2124};
2125
2126static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2127{
2128 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2129 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2130 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2131
38308473 2132 if (((void *)rt->rt6i_dev == dev || !dev) &&
c3968a85
DW
2133 rt != net->ipv6.ip6_null_entry &&
2134 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2135 /* remove prefsrc entry */
2136 rt->rt6i_prefsrc.plen = 0;
2137 }
2138 return 0;
2139}
2140
2141void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2142{
2143 struct net *net = dev_net(ifp->idev->dev);
2144 struct arg_dev_net_ip adni = {
2145 .dev = ifp->idev->dev,
2146 .net = net,
2147 .addr = &ifp->addr,
2148 };
2149 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2150}
2151
8ed67789
DL
2152struct arg_dev_net {
2153 struct net_device *dev;
2154 struct net *net;
2155};
2156
1da177e4
LT
2157static int fib6_ifdown(struct rt6_info *rt, void *arg)
2158{
bc3ef660 2159 const struct arg_dev_net *adn = arg;
2160 const struct net_device *dev = adn->dev;
8ed67789 2161
38308473 2162 if ((rt->rt6i_dev == dev || !dev) &&
bc3ef660 2163 rt != adn->net->ipv6.ip6_null_entry) {
1da177e4
LT
2164 RT6_TRACE("deleted by ifdown %p\n", rt);
2165 return -1;
2166 }
2167 return 0;
2168}
2169
f3db4851 2170void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2171{
8ed67789
DL
2172 struct arg_dev_net adn = {
2173 .dev = dev,
2174 .net = net,
2175 };
2176
2177 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 2178 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2179}
2180
2181struct rt6_mtu_change_arg
2182{
2183 struct net_device *dev;
2184 unsigned mtu;
2185};
2186
2187static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2188{
2189 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2190 struct inet6_dev *idev;
2191
2192 /* In IPv6 pmtu discovery is not optional,
2193 so that RTAX_MTU lock cannot disable it.
2194 We still use this lock to block changes
2195 caused by addrconf/ndisc.
2196 */
2197
2198 idev = __in6_dev_get(arg->dev);
38308473 2199 if (!idev)
1da177e4
LT
2200 return 0;
2201
2202 /* For administrative MTU increase, there is no way to discover
2203 IPv6 PMTU increase, so PMTU increase should be updated here.
2204 Since RFC 1981 doesn't include administrative MTU increase
2205 update PMTU increase is a MUST. (i.e. jumbo frame)
2206 */
2207 /*
2208 If new MTU is less than route PMTU, this new MTU will be the
2209 lowest MTU in the path, update the route PMTU to reflect PMTU
2210 decreases; if new MTU is greater than route PMTU, and the
2211 old MTU is the lowest MTU in the path, update the route PMTU
2212 to reflect the increase. In this case if the other nodes' MTU
2213 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2214 PMTU discouvery.
2215 */
2216 if (rt->rt6i_dev == arg->dev &&
d8d1f30b
CG
2217 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2218 (dst_mtu(&rt->dst) >= arg->mtu ||
2219 (dst_mtu(&rt->dst) < arg->mtu &&
2220 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2221 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2222 }
1da177e4
LT
2223 return 0;
2224}
2225
2226void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2227{
c71099ac
TG
2228 struct rt6_mtu_change_arg arg = {
2229 .dev = dev,
2230 .mtu = mtu,
2231 };
1da177e4 2232
c346dca1 2233 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2234}
2235
ef7c79ed 2236static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2237 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2238 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2239 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2240 [RTA_PRIORITY] = { .type = NLA_U32 },
2241 [RTA_METRICS] = { .type = NLA_NESTED },
2242};
2243
2244static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2245 struct fib6_config *cfg)
1da177e4 2246{
86872cb5
TG
2247 struct rtmsg *rtm;
2248 struct nlattr *tb[RTA_MAX+1];
2249 int err;
1da177e4 2250
86872cb5
TG
2251 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2252 if (err < 0)
2253 goto errout;
1da177e4 2254
86872cb5
TG
2255 err = -EINVAL;
2256 rtm = nlmsg_data(nlh);
2257 memset(cfg, 0, sizeof(*cfg));
2258
2259 cfg->fc_table = rtm->rtm_table;
2260 cfg->fc_dst_len = rtm->rtm_dst_len;
2261 cfg->fc_src_len = rtm->rtm_src_len;
2262 cfg->fc_flags = RTF_UP;
2263 cfg->fc_protocol = rtm->rtm_protocol;
2264
2265 if (rtm->rtm_type == RTN_UNREACHABLE)
2266 cfg->fc_flags |= RTF_REJECT;
2267
ab79ad14
2268 if (rtm->rtm_type == RTN_LOCAL)
2269 cfg->fc_flags |= RTF_LOCAL;
2270
86872cb5
TG
2271 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2272 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2273 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2274
2275 if (tb[RTA_GATEWAY]) {
2276 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2277 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2278 }
86872cb5
TG
2279
2280 if (tb[RTA_DST]) {
2281 int plen = (rtm->rtm_dst_len + 7) >> 3;
2282
2283 if (nla_len(tb[RTA_DST]) < plen)
2284 goto errout;
2285
2286 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2287 }
86872cb5
TG
2288
2289 if (tb[RTA_SRC]) {
2290 int plen = (rtm->rtm_src_len + 7) >> 3;
2291
2292 if (nla_len(tb[RTA_SRC]) < plen)
2293 goto errout;
2294
2295 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2296 }
86872cb5 2297
c3968a85
DW
2298 if (tb[RTA_PREFSRC])
2299 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2300
86872cb5
TG
2301 if (tb[RTA_OIF])
2302 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2303
2304 if (tb[RTA_PRIORITY])
2305 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2306
2307 if (tb[RTA_METRICS]) {
2308 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2309 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2310 }
86872cb5
TG
2311
2312 if (tb[RTA_TABLE])
2313 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2314
2315 err = 0;
2316errout:
2317 return err;
1da177e4
LT
2318}
2319
c127ea2c 2320static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2321{
86872cb5
TG
2322 struct fib6_config cfg;
2323 int err;
1da177e4 2324
86872cb5
TG
2325 err = rtm_to_fib6_config(skb, nlh, &cfg);
2326 if (err < 0)
2327 return err;
2328
2329 return ip6_route_del(&cfg);
1da177e4
LT
2330}
2331
c127ea2c 2332static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2333{
86872cb5
TG
2334 struct fib6_config cfg;
2335 int err;
1da177e4 2336
86872cb5
TG
2337 err = rtm_to_fib6_config(skb, nlh, &cfg);
2338 if (err < 0)
2339 return err;
2340
2341 return ip6_route_add(&cfg);
1da177e4
LT
2342}
2343
339bf98f
TG
2344static inline size_t rt6_nlmsg_size(void)
2345{
2346 return NLMSG_ALIGN(sizeof(struct rtmsg))
2347 + nla_total_size(16) /* RTA_SRC */
2348 + nla_total_size(16) /* RTA_DST */
2349 + nla_total_size(16) /* RTA_GATEWAY */
2350 + nla_total_size(16) /* RTA_PREFSRC */
2351 + nla_total_size(4) /* RTA_TABLE */
2352 + nla_total_size(4) /* RTA_IIF */
2353 + nla_total_size(4) /* RTA_OIF */
2354 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2355 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2356 + nla_total_size(sizeof(struct rta_cacheinfo));
2357}
2358
191cd582
BH
2359static int rt6_fill_node(struct net *net,
2360 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2361 struct in6_addr *dst, struct in6_addr *src,
2362 int iif, int type, u32 pid, u32 seq,
7bc570c8 2363 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2364{
2365 struct rtmsg *rtm;
2d7202bf 2366 struct nlmsghdr *nlh;
e3703b3d 2367 long expires;
9e762a4a 2368 u32 table;
f2c31e32 2369 struct neighbour *n;
1da177e4
LT
2370
2371 if (prefix) { /* user wants prefix routes only */
2372 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2373 /* success since this is not a prefix route */
2374 return 1;
2375 }
2376 }
2377
2d7202bf 2378 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
38308473 2379 if (!nlh)
26932566 2380 return -EMSGSIZE;
2d7202bf
TG
2381
2382 rtm = nlmsg_data(nlh);
1da177e4
LT
2383 rtm->rtm_family = AF_INET6;
2384 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2385 rtm->rtm_src_len = rt->rt6i_src.plen;
2386 rtm->rtm_tos = 0;
c71099ac 2387 if (rt->rt6i_table)
9e762a4a 2388 table = rt->rt6i_table->tb6_id;
c71099ac 2389 else
9e762a4a
PM
2390 table = RT6_TABLE_UNSPEC;
2391 rtm->rtm_table = table;
2d7202bf 2392 NLA_PUT_U32(skb, RTA_TABLE, table);
38308473 2393 if (rt->rt6i_flags & RTF_REJECT)
1da177e4 2394 rtm->rtm_type = RTN_UNREACHABLE;
38308473 2395 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2396 rtm->rtm_type = RTN_LOCAL;
38308473 2397 else if (rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
1da177e4
LT
2398 rtm->rtm_type = RTN_LOCAL;
2399 else
2400 rtm->rtm_type = RTN_UNICAST;
2401 rtm->rtm_flags = 0;
2402 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2403 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2404 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4
LT
2405 rtm->rtm_protocol = RTPROT_REDIRECT;
2406 else if (rt->rt6i_flags & RTF_ADDRCONF)
2407 rtm->rtm_protocol = RTPROT_KERNEL;
38308473 2408 else if (rt->rt6i_flags & RTF_DEFAULT)
1da177e4
LT
2409 rtm->rtm_protocol = RTPROT_RA;
2410
38308473 2411 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2412 rtm->rtm_flags |= RTM_F_CLONED;
2413
2414 if (dst) {
2d7202bf 2415 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2416 rtm->rtm_dst_len = 128;
1da177e4 2417 } else if (rtm->rtm_dst_len)
2d7202bf 2418 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2419#ifdef CONFIG_IPV6_SUBTREES
2420 if (src) {
2d7202bf 2421 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2422 rtm->rtm_src_len = 128;
1da177e4 2423 } else if (rtm->rtm_src_len)
2d7202bf 2424 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2425#endif
7bc570c8
YH
2426 if (iif) {
2427#ifdef CONFIG_IPV6_MROUTE
2428 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2429 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2430 if (err <= 0) {
2431 if (!nowait) {
2432 if (err == 0)
2433 return 0;
2434 goto nla_put_failure;
2435 } else {
2436 if (err == -EMSGSIZE)
2437 goto nla_put_failure;
2438 }
2439 }
2440 } else
2441#endif
2442 NLA_PUT_U32(skb, RTA_IIF, iif);
2443 } else if (dst) {
1da177e4 2444 struct in6_addr saddr_buf;
c3968a85 2445 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0)
2d7202bf 2446 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2447 }
2d7202bf 2448
c3968a85
DW
2449 if (rt->rt6i_prefsrc.plen) {
2450 struct in6_addr saddr_buf;
4e3fd7a0 2451 saddr_buf = rt->rt6i_prefsrc.addr;
c3968a85
DW
2452 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2453 }
2454
defb3519 2455 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2456 goto nla_put_failure;
2457
f2c31e32 2458 rcu_read_lock();
27217455 2459 n = dst_get_neighbour_noref(&rt->dst);
f2c31e32
ED
2460 if (n)
2461 NLA_PUT(skb, RTA_GATEWAY, 16, &n->primary_key);
2462 rcu_read_unlock();
2d7202bf 2463
d8d1f30b 2464 if (rt->dst.dev)
2d7202bf
TG
2465 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2466
2467 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2468
36e3deae
YH
2469 if (!(rt->rt6i_flags & RTF_EXPIRES))
2470 expires = 0;
2471 else if (rt->rt6i_expires - jiffies < INT_MAX)
2472 expires = rt->rt6i_expires - jiffies;
2473 else
2474 expires = INT_MAX;
69cdf8f9 2475
d8d1f30b
CG
2476 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2477 expires, rt->dst.error) < 0)
e3703b3d 2478 goto nla_put_failure;
2d7202bf
TG
2479
2480 return nlmsg_end(skb, nlh);
2481
2482nla_put_failure:
26932566
PM
2483 nlmsg_cancel(skb, nlh);
2484 return -EMSGSIZE;
1da177e4
LT
2485}
2486
1b43af54 2487int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2488{
2489 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2490 int prefix;
2491
2d7202bf
TG
2492 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2493 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2494 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2495 } else
2496 prefix = 0;
2497
191cd582
BH
2498 return rt6_fill_node(arg->net,
2499 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2500 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2501 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2502}
2503
c127ea2c 2504static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2505{
3b1e0a65 2506 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2507 struct nlattr *tb[RTA_MAX+1];
2508 struct rt6_info *rt;
1da177e4 2509 struct sk_buff *skb;
ab364a6f 2510 struct rtmsg *rtm;
4c9483b2 2511 struct flowi6 fl6;
ab364a6f 2512 int err, iif = 0;
1da177e4 2513
ab364a6f
TG
2514 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2515 if (err < 0)
2516 goto errout;
1da177e4 2517
ab364a6f 2518 err = -EINVAL;
4c9483b2 2519 memset(&fl6, 0, sizeof(fl6));
1da177e4 2520
ab364a6f
TG
2521 if (tb[RTA_SRC]) {
2522 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2523 goto errout;
2524
4e3fd7a0 2525 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2526 }
2527
2528 if (tb[RTA_DST]) {
2529 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2530 goto errout;
2531
4e3fd7a0 2532 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2533 }
2534
2535 if (tb[RTA_IIF])
2536 iif = nla_get_u32(tb[RTA_IIF]);
2537
2538 if (tb[RTA_OIF])
4c9483b2 2539 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2540
2541 if (iif) {
2542 struct net_device *dev;
5578689a 2543 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2544 if (!dev) {
2545 err = -ENODEV;
ab364a6f 2546 goto errout;
1da177e4
LT
2547 }
2548 }
2549
ab364a6f 2550 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2551 if (!skb) {
ab364a6f
TG
2552 err = -ENOBUFS;
2553 goto errout;
2554 }
1da177e4 2555
ab364a6f
TG
2556 /* Reserve room for dummy headers, this skb can pass
2557 through good chunk of routing engine.
2558 */
459a98ed 2559 skb_reset_mac_header(skb);
ab364a6f 2560 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2561
4c9483b2 2562 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
d8d1f30b 2563 skb_dst_set(skb, &rt->dst);
1da177e4 2564
4c9483b2 2565 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
1da177e4 2566 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2567 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2568 if (err < 0) {
ab364a6f
TG
2569 kfree_skb(skb);
2570 goto errout;
1da177e4
LT
2571 }
2572
5578689a 2573 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2574errout:
1da177e4 2575 return err;
1da177e4
LT
2576}
2577
86872cb5 2578void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2579{
2580 struct sk_buff *skb;
5578689a 2581 struct net *net = info->nl_net;
528c4ceb
DL
2582 u32 seq;
2583 int err;
2584
2585 err = -ENOBUFS;
38308473 2586 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2587
339bf98f 2588 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2589 if (!skb)
21713ebc
TG
2590 goto errout;
2591
191cd582 2592 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2593 event, info->pid, seq, 0, 0, 0);
26932566
PM
2594 if (err < 0) {
2595 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2596 WARN_ON(err == -EMSGSIZE);
2597 kfree_skb(skb);
2598 goto errout;
2599 }
1ce85fe4
PNA
2600 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2601 info->nlh, gfp_any());
2602 return;
21713ebc
TG
2603errout:
2604 if (err < 0)
5578689a 2605 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2606}
2607
8ed67789
DL
2608static int ip6_route_dev_notify(struct notifier_block *this,
2609 unsigned long event, void *data)
2610{
2611 struct net_device *dev = (struct net_device *)data;
c346dca1 2612 struct net *net = dev_net(dev);
8ed67789
DL
2613
2614 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2615 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2616 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2617#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2618 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2619 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2620 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2621 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2622#endif
2623 }
2624
2625 return NOTIFY_OK;
2626}
2627
1da177e4
LT
2628/*
2629 * /proc
2630 */
2631
2632#ifdef CONFIG_PROC_FS
2633
1da177e4
LT
2634struct rt6_proc_arg
2635{
2636 char *buffer;
2637 int offset;
2638 int length;
2639 int skip;
2640 int len;
2641};
2642
2643static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2644{
33120b30 2645 struct seq_file *m = p_arg;
69cce1d1 2646 struct neighbour *n;
1da177e4 2647
4b7a4274 2648 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2649
2650#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2651 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2652#else
33120b30 2653 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4 2654#endif
f2c31e32 2655 rcu_read_lock();
27217455 2656 n = dst_get_neighbour_noref(&rt->dst);
69cce1d1
DM
2657 if (n) {
2658 seq_printf(m, "%pi6", n->primary_key);
1da177e4 2659 } else {
33120b30 2660 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2661 }
f2c31e32 2662 rcu_read_unlock();
33120b30 2663 seq_printf(m, " %08x %08x %08x %08x %8s\n",
d8d1f30b
CG
2664 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2665 rt->dst.__use, rt->rt6i_flags,
33120b30 2666 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2667 return 0;
2668}
2669
33120b30 2670static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2671{
f3db4851
DL
2672 struct net *net = (struct net *)m->private;
2673 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2674 return 0;
2675}
1da177e4 2676
33120b30
AD
2677static int ipv6_route_open(struct inode *inode, struct file *file)
2678{
de05c557 2679 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2680}
2681
33120b30
AD
2682static const struct file_operations ipv6_route_proc_fops = {
2683 .owner = THIS_MODULE,
2684 .open = ipv6_route_open,
2685 .read = seq_read,
2686 .llseek = seq_lseek,
b6fcbdb4 2687 .release = single_release_net,
33120b30
AD
2688};
2689
1da177e4
LT
2690static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2691{
69ddb805 2692 struct net *net = (struct net *)seq->private;
1da177e4 2693 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2694 net->ipv6.rt6_stats->fib_nodes,
2695 net->ipv6.rt6_stats->fib_route_nodes,
2696 net->ipv6.rt6_stats->fib_rt_alloc,
2697 net->ipv6.rt6_stats->fib_rt_entries,
2698 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2699 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2700 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2701
2702 return 0;
2703}
2704
2705static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2706{
de05c557 2707 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2708}
2709
9a32144e 2710static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2711 .owner = THIS_MODULE,
2712 .open = rt6_stats_seq_open,
2713 .read = seq_read,
2714 .llseek = seq_lseek,
b6fcbdb4 2715 .release = single_release_net,
1da177e4
LT
2716};
2717#endif /* CONFIG_PROC_FS */
2718
2719#ifdef CONFIG_SYSCTL
2720
1da177e4 2721static
8d65af78 2722int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2723 void __user *buffer, size_t *lenp, loff_t *ppos)
2724{
c486da34
LAG
2725 struct net *net;
2726 int delay;
2727 if (!write)
1da177e4 2728 return -EINVAL;
c486da34
LAG
2729
2730 net = (struct net *)ctl->extra1;
2731 delay = net->ipv6.sysctl.flush_delay;
2732 proc_dointvec(ctl, write, buffer, lenp, ppos);
2733 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2734 return 0;
1da177e4
LT
2735}
2736
760f2d01 2737ctl_table ipv6_route_table_template[] = {
1ab1457c 2738 {
1da177e4 2739 .procname = "flush",
4990509f 2740 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2741 .maxlen = sizeof(int),
89c8b3a1 2742 .mode = 0200,
6d9f239a 2743 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2744 },
2745 {
1da177e4 2746 .procname = "gc_thresh",
9a7ec3a9 2747 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2748 .maxlen = sizeof(int),
2749 .mode = 0644,
6d9f239a 2750 .proc_handler = proc_dointvec,
1da177e4
LT
2751 },
2752 {
1da177e4 2753 .procname = "max_size",
4990509f 2754 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2755 .maxlen = sizeof(int),
2756 .mode = 0644,
6d9f239a 2757 .proc_handler = proc_dointvec,
1da177e4
LT
2758 },
2759 {
1da177e4 2760 .procname = "gc_min_interval",
4990509f 2761 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2762 .maxlen = sizeof(int),
2763 .mode = 0644,
6d9f239a 2764 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2765 },
2766 {
1da177e4 2767 .procname = "gc_timeout",
4990509f 2768 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2769 .maxlen = sizeof(int),
2770 .mode = 0644,
6d9f239a 2771 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2772 },
2773 {
1da177e4 2774 .procname = "gc_interval",
4990509f 2775 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2776 .maxlen = sizeof(int),
2777 .mode = 0644,
6d9f239a 2778 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2779 },
2780 {
1da177e4 2781 .procname = "gc_elasticity",
4990509f 2782 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2783 .maxlen = sizeof(int),
2784 .mode = 0644,
f3d3f616 2785 .proc_handler = proc_dointvec,
1da177e4
LT
2786 },
2787 {
1da177e4 2788 .procname = "mtu_expires",
4990509f 2789 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2790 .maxlen = sizeof(int),
2791 .mode = 0644,
6d9f239a 2792 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2793 },
2794 {
1da177e4 2795 .procname = "min_adv_mss",
4990509f 2796 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2797 .maxlen = sizeof(int),
2798 .mode = 0644,
f3d3f616 2799 .proc_handler = proc_dointvec,
1da177e4
LT
2800 },
2801 {
1da177e4 2802 .procname = "gc_min_interval_ms",
4990509f 2803 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2804 .maxlen = sizeof(int),
2805 .mode = 0644,
6d9f239a 2806 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2807 },
f8572d8f 2808 { }
1da177e4
LT
2809};
2810
2c8c1e72 2811struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2812{
2813 struct ctl_table *table;
2814
2815 table = kmemdup(ipv6_route_table_template,
2816 sizeof(ipv6_route_table_template),
2817 GFP_KERNEL);
5ee09105
YH
2818
2819 if (table) {
2820 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 2821 table[0].extra1 = net;
86393e52 2822 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2823 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2824 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2825 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2826 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2827 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2828 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2829 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2830 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2831 }
2832
760f2d01
DL
2833 return table;
2834}
1da177e4
LT
2835#endif
2836
2c8c1e72 2837static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2838{
633d424b 2839 int ret = -ENOMEM;
8ed67789 2840
86393e52
AD
2841 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2842 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2843
fc66f95c
ED
2844 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2845 goto out_ip6_dst_ops;
2846
8ed67789
DL
2847 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2848 sizeof(*net->ipv6.ip6_null_entry),
2849 GFP_KERNEL);
2850 if (!net->ipv6.ip6_null_entry)
fc66f95c 2851 goto out_ip6_dst_entries;
d8d1f30b 2852 net->ipv6.ip6_null_entry->dst.path =
8ed67789 2853 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 2854 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2855 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2856 ip6_template_metrics, true);
8ed67789
DL
2857
2858#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2859 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2860 sizeof(*net->ipv6.ip6_prohibit_entry),
2861 GFP_KERNEL);
68fffc67
PZ
2862 if (!net->ipv6.ip6_prohibit_entry)
2863 goto out_ip6_null_entry;
d8d1f30b 2864 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 2865 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 2866 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2867 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2868 ip6_template_metrics, true);
8ed67789
DL
2869
2870 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2871 sizeof(*net->ipv6.ip6_blk_hole_entry),
2872 GFP_KERNEL);
68fffc67
PZ
2873 if (!net->ipv6.ip6_blk_hole_entry)
2874 goto out_ip6_prohibit_entry;
d8d1f30b 2875 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 2876 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 2877 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
2878 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2879 ip6_template_metrics, true);
8ed67789
DL
2880#endif
2881
b339a47c
PZ
2882 net->ipv6.sysctl.flush_delay = 0;
2883 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2884 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2885 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2886 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2887 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2888 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2889 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2890
cdb18761
DL
2891#ifdef CONFIG_PROC_FS
2892 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2893 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2894#endif
6891a346
BT
2895 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2896
8ed67789
DL
2897 ret = 0;
2898out:
2899 return ret;
f2fc6a54 2900
68fffc67
PZ
2901#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902out_ip6_prohibit_entry:
2903 kfree(net->ipv6.ip6_prohibit_entry);
2904out_ip6_null_entry:
2905 kfree(net->ipv6.ip6_null_entry);
2906#endif
fc66f95c
ED
2907out_ip6_dst_entries:
2908 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 2909out_ip6_dst_ops:
f2fc6a54 2910 goto out;
cdb18761
DL
2911}
2912
2c8c1e72 2913static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2914{
2915#ifdef CONFIG_PROC_FS
2916 proc_net_remove(net, "ipv6_route");
2917 proc_net_remove(net, "rt6_stats");
2918#endif
8ed67789
DL
2919 kfree(net->ipv6.ip6_null_entry);
2920#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2921 kfree(net->ipv6.ip6_prohibit_entry);
2922 kfree(net->ipv6.ip6_blk_hole_entry);
2923#endif
41bb78b4 2924 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
2925}
2926
2927static struct pernet_operations ip6_route_net_ops = {
2928 .init = ip6_route_net_init,
2929 .exit = ip6_route_net_exit,
2930};
2931
8ed67789
DL
2932static struct notifier_block ip6_route_dev_notifier = {
2933 .notifier_call = ip6_route_dev_notify,
2934 .priority = 0,
2935};
2936
433d49c3 2937int __init ip6_route_init(void)
1da177e4 2938{
433d49c3
DL
2939 int ret;
2940
9a7ec3a9
DL
2941 ret = -ENOMEM;
2942 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2943 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2944 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2945 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2946 goto out;
14e50e57 2947
fc66f95c 2948 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 2949 if (ret)
bdb3289f 2950 goto out_kmem_cache;
bdb3289f 2951
fc66f95c
ED
2952 ret = register_pernet_subsys(&ip6_route_net_ops);
2953 if (ret)
2954 goto out_dst_entries;
2955
5dc121e9
AE
2956 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2957
8ed67789
DL
2958 /* Registering of the loopback is done before this portion of code,
2959 * the loopback reference in rt6_info will not be taken, do it
2960 * manually for init_net */
d8d1f30b 2961 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2962 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2963 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2964 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 2965 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 2966 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
2967 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2968 #endif
433d49c3
DL
2969 ret = fib6_init();
2970 if (ret)
8ed67789 2971 goto out_register_subsys;
433d49c3 2972
433d49c3
DL
2973 ret = xfrm6_init();
2974 if (ret)
cdb18761 2975 goto out_fib6_init;
c35b7e72 2976
433d49c3
DL
2977 ret = fib6_rules_init();
2978 if (ret)
2979 goto xfrm6_init;
7e5449c2 2980
433d49c3 2981 ret = -ENOBUFS;
c7ac8679
GR
2982 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
2983 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
2984 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
433d49c3 2985 goto fib6_rules_init;
c127ea2c 2986
8ed67789 2987 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2988 if (ret)
2989 goto fib6_rules_init;
8ed67789 2990
433d49c3
DL
2991out:
2992 return ret;
2993
2994fib6_rules_init:
433d49c3
DL
2995 fib6_rules_cleanup();
2996xfrm6_init:
433d49c3 2997 xfrm6_fini();
433d49c3 2998out_fib6_init:
433d49c3 2999 fib6_gc_cleanup();
8ed67789
DL
3000out_register_subsys:
3001 unregister_pernet_subsys(&ip6_route_net_ops);
fc66f95c
ED
3002out_dst_entries:
3003 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3004out_kmem_cache:
f2fc6a54 3005 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3006 goto out;
1da177e4
LT
3007}
3008
3009void ip6_route_cleanup(void)
3010{
8ed67789 3011 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 3012 fib6_rules_cleanup();
1da177e4 3013 xfrm6_fini();
1da177e4 3014 fib6_gc_cleanup();
8ed67789 3015 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3016 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3017 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3018}