]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/route.c
Merge branch 'hid-suspend' into picolcd
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
4fc268d2 27#include <linux/capability.h>
1da177e4
LT
28#include <linux/errno.h>
29#include <linux/types.h>
30#include <linux/times.h>
31#include <linux/socket.h>
32#include <linux/sockios.h>
33#include <linux/net.h>
34#include <linux/route.h>
35#include <linux/netdevice.h>
36#include <linux/in6.h>
7bc570c8 37#include <linux/mroute6.h>
1da177e4 38#include <linux/init.h>
1da177e4 39#include <linux/if_arp.h>
1da177e4
LT
40#include <linux/proc_fs.h>
41#include <linux/seq_file.h>
5b7c931d 42#include <linux/nsproxy.h>
5a0e3ad6 43#include <linux/slab.h>
457c4cbc 44#include <net/net_namespace.h>
1da177e4
LT
45#include <net/snmp.h>
46#include <net/ipv6.h>
47#include <net/ip6_fib.h>
48#include <net/ip6_route.h>
49#include <net/ndisc.h>
50#include <net/addrconf.h>
51#include <net/tcp.h>
52#include <linux/rtnetlink.h>
53#include <net/dst.h>
54#include <net/xfrm.h>
8d71740c 55#include <net/netevent.h>
21713ebc 56#include <net/netlink.h>
1da177e4
LT
57
58#include <asm/uaccess.h>
59
60#ifdef CONFIG_SYSCTL
61#include <linux/sysctl.h>
62#endif
63
64/* Set to 3 to get tracing. */
65#define RT6_DEBUG 2
66
67#if RT6_DEBUG >= 3
68#define RDBG(x) printk x
69#define RT6_TRACE(x...) printk(KERN_DEBUG x)
70#else
71#define RDBG(x)
72#define RT6_TRACE(x...) do { ; } while (0)
73#endif
74
519fbd87 75#define CLONE_OFFLINK_ROUTE 0
1da177e4 76
1da177e4
LT
77static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80static void ip6_dst_destroy(struct dst_entry *);
81static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
569d3645 83static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
84
85static int ip6_pkt_discard(struct sk_buff *skb);
86static int ip6_pkt_discard_out(struct sk_buff *skb);
87static void ip6_link_failure(struct sk_buff *skb);
88static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
70ceb4f5 90#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
91static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
efa2cea0
DL
95static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
97 struct in6_addr *gwaddr, int ifindex);
98#endif
99
9a7ec3a9 100static struct dst_ops ip6_dst_ops_template = {
1da177e4 101 .family = AF_INET6,
09640e63 102 .protocol = cpu_to_be16(ETH_P_IPV6),
1da177e4
LT
103 .gc = ip6_dst_gc,
104 .gc_thresh = 1024,
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
1ac06e03 111 .local_out = __ip6_local_out,
e2422970 112 .entries = ATOMIC_INIT(0),
1da177e4
LT
113};
114
14e50e57
DM
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
116{
117}
118
119static struct dst_ops ip6_dst_blackhole_ops = {
120 .family = AF_INET6,
09640e63 121 .protocol = cpu_to_be16(ETH_P_IPV6),
14e50e57
DM
122 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu,
e2422970 125 .entries = ATOMIC_INIT(0),
14e50e57
DM
126};
127
bdb3289f 128static struct rt6_info ip6_null_entry_template = {
1da177e4
LT
129 .u = {
130 .dst = {
131 .__refcnt = ATOMIC_INIT(1),
132 .__use = 1,
1da177e4
LT
133 .obsolete = -1,
134 .error = -ENETUNREACH,
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
136 .input = ip6_pkt_discard,
137 .output = ip6_pkt_discard_out,
1da177e4
LT
138 }
139 },
140 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 141 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
142 .rt6i_metric = ~(u32) 0,
143 .rt6i_ref = ATOMIC_INIT(1),
144};
145
101367c2
TG
146#ifdef CONFIG_IPV6_MULTIPLE_TABLES
147
6723ab54
DM
148static int ip6_pkt_prohibit(struct sk_buff *skb);
149static int ip6_pkt_prohibit_out(struct sk_buff *skb);
6723ab54 150
280a34c8 151static struct rt6_info ip6_prohibit_entry_template = {
101367c2
TG
152 .u = {
153 .dst = {
154 .__refcnt = ATOMIC_INIT(1),
155 .__use = 1,
101367c2
TG
156 .obsolete = -1,
157 .error = -EACCES,
158 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
9ce8ade0
TG
159 .input = ip6_pkt_prohibit,
160 .output = ip6_pkt_prohibit_out,
101367c2
TG
161 }
162 },
163 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 164 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
165 .rt6i_metric = ~(u32) 0,
166 .rt6i_ref = ATOMIC_INIT(1),
167};
168
bdb3289f 169static struct rt6_info ip6_blk_hole_entry_template = {
101367c2
TG
170 .u = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
101367c2
TG
174 .obsolete = -1,
175 .error = -EINVAL,
176 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
352e512c
HX
177 .input = dst_discard,
178 .output = dst_discard,
101367c2
TG
179 }
180 },
181 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 182 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
183 .rt6i_metric = ~(u32) 0,
184 .rt6i_ref = ATOMIC_INIT(1),
185};
186
187#endif
188
1da177e4 189/* allocate dst with ip6_dst_ops */
f2fc6a54 190static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
1da177e4 191{
f2fc6a54 192 return (struct rt6_info *)dst_alloc(ops);
1da177e4
LT
193}
194
195static void ip6_dst_destroy(struct dst_entry *dst)
196{
197 struct rt6_info *rt = (struct rt6_info *)dst;
198 struct inet6_dev *idev = rt->rt6i_idev;
199
200 if (idev != NULL) {
201 rt->rt6i_idev = NULL;
202 in6_dev_put(idev);
1ab1457c 203 }
1da177e4
LT
204}
205
206static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
207 int how)
208{
209 struct rt6_info *rt = (struct rt6_info *)dst;
210 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 211 struct net_device *loopback_dev =
c346dca1 212 dev_net(dev)->loopback_dev;
1da177e4 213
5a3e55d6
DL
214 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
215 struct inet6_dev *loopback_idev =
216 in6_dev_get(loopback_dev);
1da177e4
LT
217 if (loopback_idev != NULL) {
218 rt->rt6i_idev = loopback_idev;
219 in6_dev_put(idev);
220 }
221 }
222}
223
224static __inline__ int rt6_check_expired(const struct rt6_info *rt)
225{
226 return (rt->rt6i_flags & RTF_EXPIRES &&
227 time_after(jiffies, rt->rt6i_expires));
228}
229
c71099ac
TG
230static inline int rt6_need_strict(struct in6_addr *daddr)
231{
232 return (ipv6_addr_type(daddr) &
5ce83afa 233 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK));
c71099ac
TG
234}
235
1da177e4 236/*
c71099ac 237 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
238 */
239
8ed67789
DL
240static inline struct rt6_info *rt6_device_match(struct net *net,
241 struct rt6_info *rt,
dd3abc4e 242 struct in6_addr *saddr,
1da177e4 243 int oif,
d420895e 244 int flags)
1da177e4
LT
245{
246 struct rt6_info *local = NULL;
247 struct rt6_info *sprt;
248
dd3abc4e
YH
249 if (!oif && ipv6_addr_any(saddr))
250 goto out;
251
252 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
253 struct net_device *dev = sprt->rt6i_dev;
254
255 if (oif) {
1da177e4
LT
256 if (dev->ifindex == oif)
257 return sprt;
258 if (dev->flags & IFF_LOOPBACK) {
259 if (sprt->rt6i_idev == NULL ||
260 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 261 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 262 continue;
1ab1457c 263 if (local && (!oif ||
1da177e4
LT
264 local->rt6i_idev->dev->ifindex == oif))
265 continue;
266 }
267 local = sprt;
268 }
dd3abc4e
YH
269 } else {
270 if (ipv6_chk_addr(net, saddr, dev,
271 flags & RT6_LOOKUP_F_IFACE))
272 return sprt;
1da177e4 273 }
dd3abc4e 274 }
1da177e4 275
dd3abc4e 276 if (oif) {
1da177e4
LT
277 if (local)
278 return local;
279
d420895e 280 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 281 return net->ipv6.ip6_null_entry;
1da177e4 282 }
dd3abc4e 283out:
1da177e4
LT
284 return rt;
285}
286
27097255
YH
287#ifdef CONFIG_IPV6_ROUTER_PREF
288static void rt6_probe(struct rt6_info *rt)
289{
290 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
291 /*
292 * Okay, this does not seem to be appropriate
293 * for now, however, we need to check if it
294 * is really so; aka Router Reachability Probing.
295 *
296 * Router Reachability Probe MUST be rate-limited
297 * to no more than one per minute.
298 */
299 if (!neigh || (neigh->nud_state & NUD_VALID))
300 return;
301 read_lock_bh(&neigh->lock);
302 if (!(neigh->nud_state & NUD_VALID) &&
52e16356 303 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
27097255
YH
304 struct in6_addr mcaddr;
305 struct in6_addr *target;
306
307 neigh->updated = jiffies;
308 read_unlock_bh(&neigh->lock);
309
310 target = (struct in6_addr *)&neigh->primary_key;
311 addrconf_addr_solict_mult(target, &mcaddr);
312 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
313 } else
314 read_unlock_bh(&neigh->lock);
315}
316#else
317static inline void rt6_probe(struct rt6_info *rt)
318{
319 return;
320}
321#endif
322
1da177e4 323/*
554cfb7e 324 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 325 */
b6f99a21 326static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e
YH
327{
328 struct net_device *dev = rt->rt6i_dev;
161980f4 329 if (!oif || dev->ifindex == oif)
554cfb7e 330 return 2;
161980f4
DM
331 if ((dev->flags & IFF_LOOPBACK) &&
332 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
333 return 1;
334 return 0;
554cfb7e 335}
1da177e4 336
b6f99a21 337static inline int rt6_check_neigh(struct rt6_info *rt)
1da177e4 338{
554cfb7e 339 struct neighbour *neigh = rt->rt6i_nexthop;
398bcbeb 340 int m;
4d0c5911
YH
341 if (rt->rt6i_flags & RTF_NONEXTHOP ||
342 !(rt->rt6i_flags & RTF_GATEWAY))
343 m = 1;
344 else if (neigh) {
554cfb7e
YH
345 read_lock_bh(&neigh->lock);
346 if (neigh->nud_state & NUD_VALID)
4d0c5911 347 m = 2;
398bcbeb
YH
348#ifdef CONFIG_IPV6_ROUTER_PREF
349 else if (neigh->nud_state & NUD_FAILED)
350 m = 0;
351#endif
352 else
ea73ee23 353 m = 1;
554cfb7e 354 read_unlock_bh(&neigh->lock);
398bcbeb
YH
355 } else
356 m = 0;
554cfb7e 357 return m;
1da177e4
LT
358}
359
554cfb7e
YH
360static int rt6_score_route(struct rt6_info *rt, int oif,
361 int strict)
1da177e4 362{
4d0c5911 363 int m, n;
1ab1457c 364
4d0c5911 365 m = rt6_check_dev(rt, oif);
77d16f45 366 if (!m && (strict & RT6_LOOKUP_F_IFACE))
554cfb7e 367 return -1;
ebacaaa0
YH
368#ifdef CONFIG_IPV6_ROUTER_PREF
369 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
370#endif
4d0c5911 371 n = rt6_check_neigh(rt);
557e92ef 372 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
554cfb7e
YH
373 return -1;
374 return m;
375}
376
f11e6659
DM
377static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
378 int *mpri, struct rt6_info *match)
554cfb7e 379{
f11e6659
DM
380 int m;
381
382 if (rt6_check_expired(rt))
383 goto out;
384
385 m = rt6_score_route(rt, oif, strict);
386 if (m < 0)
387 goto out;
388
389 if (m > *mpri) {
390 if (strict & RT6_LOOKUP_F_REACHABLE)
391 rt6_probe(match);
392 *mpri = m;
393 match = rt;
394 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
395 rt6_probe(rt);
396 }
397
398out:
399 return match;
400}
401
402static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
403 struct rt6_info *rr_head,
404 u32 metric, int oif, int strict)
405{
406 struct rt6_info *rt, *match;
554cfb7e 407 int mpri = -1;
1da177e4 408
f11e6659
DM
409 match = NULL;
410 for (rt = rr_head; rt && rt->rt6i_metric == metric;
411 rt = rt->u.dst.rt6_next)
412 match = find_match(rt, oif, strict, &mpri, match);
413 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
414 rt = rt->u.dst.rt6_next)
415 match = find_match(rt, oif, strict, &mpri, match);
1da177e4 416
f11e6659
DM
417 return match;
418}
1da177e4 419
f11e6659
DM
420static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
421{
422 struct rt6_info *match, *rt0;
8ed67789 423 struct net *net;
1da177e4 424
f11e6659 425 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
0dc47877 426 __func__, fn->leaf, oif);
554cfb7e 427
f11e6659
DM
428 rt0 = fn->rr_ptr;
429 if (!rt0)
430 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 431
f11e6659 432 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
1da177e4 433
554cfb7e 434 if (!match &&
f11e6659
DM
435 (strict & RT6_LOOKUP_F_REACHABLE)) {
436 struct rt6_info *next = rt0->u.dst.rt6_next;
437
554cfb7e 438 /* no entries matched; do round-robin */
f11e6659
DM
439 if (!next || next->rt6i_metric != rt0->rt6i_metric)
440 next = fn->leaf;
441
442 if (next != rt0)
443 fn->rr_ptr = next;
1da177e4 444 }
1da177e4 445
f11e6659 446 RT6_TRACE("%s() => %p\n",
0dc47877 447 __func__, match);
1da177e4 448
c346dca1 449 net = dev_net(rt0->rt6i_dev);
8ed67789 450 return (match ? match : net->ipv6.ip6_null_entry);
1da177e4
LT
451}
452
70ceb4f5
YH
453#ifdef CONFIG_IPV6_ROUTE_INFO
454int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
455 struct in6_addr *gwaddr)
456{
c346dca1 457 struct net *net = dev_net(dev);
70ceb4f5
YH
458 struct route_info *rinfo = (struct route_info *) opt;
459 struct in6_addr prefix_buf, *prefix;
460 unsigned int pref;
4bed72e4 461 unsigned long lifetime;
70ceb4f5
YH
462 struct rt6_info *rt;
463
464 if (len < sizeof(struct route_info)) {
465 return -EINVAL;
466 }
467
468 /* Sanity check for prefix_len and length */
469 if (rinfo->length > 3) {
470 return -EINVAL;
471 } else if (rinfo->prefix_len > 128) {
472 return -EINVAL;
473 } else if (rinfo->prefix_len > 64) {
474 if (rinfo->length < 2) {
475 return -EINVAL;
476 }
477 } else if (rinfo->prefix_len > 0) {
478 if (rinfo->length < 1) {
479 return -EINVAL;
480 }
481 }
482
483 pref = rinfo->route_pref;
484 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 485 return -EINVAL;
70ceb4f5 486
4bed72e4 487 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
488
489 if (rinfo->length == 3)
490 prefix = (struct in6_addr *)rinfo->prefix;
491 else {
492 /* this function is safe */
493 ipv6_addr_prefix(&prefix_buf,
494 (struct in6_addr *)rinfo->prefix,
495 rinfo->prefix_len);
496 prefix = &prefix_buf;
497 }
498
efa2cea0
DL
499 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
500 dev->ifindex);
70ceb4f5
YH
501
502 if (rt && !lifetime) {
e0a1ad73 503 ip6_del_rt(rt);
70ceb4f5
YH
504 rt = NULL;
505 }
506
507 if (!rt && lifetime)
efa2cea0 508 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
509 pref);
510 else if (rt)
511 rt->rt6i_flags = RTF_ROUTEINFO |
512 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
513
514 if (rt) {
4bed72e4 515 if (!addrconf_finite_timeout(lifetime)) {
70ceb4f5
YH
516 rt->rt6i_flags &= ~RTF_EXPIRES;
517 } else {
518 rt->rt6i_expires = jiffies + HZ * lifetime;
519 rt->rt6i_flags |= RTF_EXPIRES;
520 }
521 dst_release(&rt->u.dst);
522 }
523 return 0;
524}
525#endif
526
8ed67789 527#define BACKTRACK(__net, saddr) \
982f56f3 528do { \
8ed67789 529 if (rt == __net->ipv6.ip6_null_entry) { \
982f56f3 530 struct fib6_node *pn; \
e0eda7bb 531 while (1) { \
982f56f3
YH
532 if (fn->fn_flags & RTN_TL_ROOT) \
533 goto out; \
534 pn = fn->parent; \
535 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
8bce65b9 536 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
982f56f3
YH
537 else \
538 fn = pn; \
539 if (fn->fn_flags & RTN_RTINFO) \
540 goto restart; \
c71099ac 541 } \
c71099ac 542 } \
982f56f3 543} while(0)
c71099ac 544
8ed67789
DL
545static struct rt6_info *ip6_pol_route_lookup(struct net *net,
546 struct fib6_table *table,
c71099ac 547 struct flowi *fl, int flags)
1da177e4
LT
548{
549 struct fib6_node *fn;
550 struct rt6_info *rt;
551
c71099ac
TG
552 read_lock_bh(&table->tb6_lock);
553 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
554restart:
555 rt = fn->leaf;
dd3abc4e 556 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
8ed67789 557 BACKTRACK(net, &fl->fl6_src);
c71099ac 558out:
03f49f34 559 dst_use(&rt->u.dst, jiffies);
c71099ac 560 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
561 return rt;
562
563}
564
9acd9f3a
YH
565struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
566 const struct in6_addr *saddr, int oif, int strict)
c71099ac
TG
567{
568 struct flowi fl = {
569 .oif = oif,
570 .nl_u = {
571 .ip6_u = {
572 .daddr = *daddr,
c71099ac
TG
573 },
574 },
575 };
576 struct dst_entry *dst;
77d16f45 577 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 578
adaa70bb
TG
579 if (saddr) {
580 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
581 flags |= RT6_LOOKUP_F_HAS_SADDR;
582 }
583
606a2b48 584 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
c71099ac
TG
585 if (dst->error == 0)
586 return (struct rt6_info *) dst;
587
588 dst_release(dst);
589
1da177e4
LT
590 return NULL;
591}
592
7159039a
YH
593EXPORT_SYMBOL(rt6_lookup);
594
c71099ac 595/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
596 It takes new route entry, the addition fails by any reason the
597 route is freed. In any case, if caller does not hold it, it may
598 be destroyed.
599 */
600
86872cb5 601static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
602{
603 int err;
c71099ac 604 struct fib6_table *table;
1da177e4 605
c71099ac
TG
606 table = rt->rt6i_table;
607 write_lock_bh(&table->tb6_lock);
86872cb5 608 err = fib6_add(&table->tb6_root, rt, info);
c71099ac 609 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
610
611 return err;
612}
613
40e22e8f
TG
614int ip6_ins_rt(struct rt6_info *rt)
615{
4d1169c1 616 struct nl_info info = {
c346dca1 617 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 618 };
528c4ceb 619 return __ip6_ins_rt(rt, &info);
40e22e8f
TG
620}
621
95a9a5ba
YH
622static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
623 struct in6_addr *saddr)
1da177e4 624{
1da177e4
LT
625 struct rt6_info *rt;
626
627 /*
628 * Clone the route.
629 */
630
631 rt = ip6_rt_copy(ort);
632
633 if (rt) {
14deae41
DM
634 struct neighbour *neigh;
635 int attempts = !in_softirq();
636
58c4fb86
YH
637 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
638 if (rt->rt6i_dst.plen != 128 &&
639 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
640 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 641 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
58c4fb86 642 }
1da177e4 643
58c4fb86 644 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
1da177e4
LT
645 rt->rt6i_dst.plen = 128;
646 rt->rt6i_flags |= RTF_CACHE;
647 rt->u.dst.flags |= DST_HOST;
648
649#ifdef CONFIG_IPV6_SUBTREES
650 if (rt->rt6i_src.plen && saddr) {
651 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
652 rt->rt6i_src.plen = 128;
653 }
654#endif
655
14deae41
DM
656 retry:
657 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
658 if (IS_ERR(neigh)) {
659 struct net *net = dev_net(rt->rt6i_dev);
660 int saved_rt_min_interval =
661 net->ipv6.sysctl.ip6_rt_gc_min_interval;
662 int saved_rt_elasticity =
663 net->ipv6.sysctl.ip6_rt_gc_elasticity;
664
665 if (attempts-- > 0) {
666 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
667 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
668
86393e52 669 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
14deae41
DM
670
671 net->ipv6.sysctl.ip6_rt_gc_elasticity =
672 saved_rt_elasticity;
673 net->ipv6.sysctl.ip6_rt_gc_min_interval =
674 saved_rt_min_interval;
675 goto retry;
676 }
677
678 if (net_ratelimit())
679 printk(KERN_WARNING
680 "Neighbour table overflow.\n");
681 dst_free(&rt->u.dst);
682 return NULL;
683 }
684 rt->rt6i_nexthop = neigh;
1da177e4 685
95a9a5ba 686 }
1da177e4 687
95a9a5ba
YH
688 return rt;
689}
1da177e4 690
299d9939
YH
691static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
692{
693 struct rt6_info *rt = ip6_rt_copy(ort);
694 if (rt) {
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
698 rt->u.dst.flags |= DST_HOST;
699 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
700 }
701 return rt;
702}
703
8ed67789
DL
704static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
705 struct flowi *fl, int flags)
1da177e4
LT
706{
707 struct fib6_node *fn;
519fbd87 708 struct rt6_info *rt, *nrt;
c71099ac 709 int strict = 0;
1da177e4 710 int attempts = 3;
519fbd87 711 int err;
53b7997f 712 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
1da177e4 713
77d16f45 714 strict |= flags & RT6_LOOKUP_F_IFACE;
1da177e4
LT
715
716relookup:
c71099ac 717 read_lock_bh(&table->tb6_lock);
1da177e4 718
8238dd06 719restart_2:
c71099ac 720 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1da177e4
LT
721
722restart:
4acad72d 723 rt = rt6_select(fn, oif, strict | reachable);
8ed67789
DL
724
725 BACKTRACK(net, &fl->fl6_src);
726 if (rt == net->ipv6.ip6_null_entry ||
8238dd06 727 rt->rt6i_flags & RTF_CACHE)
1ddef044 728 goto out;
1da177e4 729
fb9de91e 730 dst_hold(&rt->u.dst);
c71099ac 731 read_unlock_bh(&table->tb6_lock);
fb9de91e 732
519fbd87 733 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
c71099ac 734 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
519fbd87
YH
735 else {
736#if CLONE_OFFLINK_ROUTE
c71099ac 737 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
519fbd87
YH
738#else
739 goto out2;
740#endif
741 }
e40cf353 742
519fbd87 743 dst_release(&rt->u.dst);
8ed67789 744 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 745
519fbd87
YH
746 dst_hold(&rt->u.dst);
747 if (nrt) {
40e22e8f 748 err = ip6_ins_rt(nrt);
519fbd87 749 if (!err)
1da177e4 750 goto out2;
1da177e4 751 }
1da177e4 752
519fbd87
YH
753 if (--attempts <= 0)
754 goto out2;
755
756 /*
c71099ac 757 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
758 * released someone could insert this route. Relookup.
759 */
760 dst_release(&rt->u.dst);
761 goto relookup;
762
763out:
8238dd06
YH
764 if (reachable) {
765 reachable = 0;
766 goto restart_2;
767 }
519fbd87 768 dst_hold(&rt->u.dst);
c71099ac 769 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
770out2:
771 rt->u.dst.lastuse = jiffies;
772 rt->u.dst.__use++;
c71099ac
TG
773
774 return rt;
1da177e4
LT
775}
776
8ed67789 777static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4acad72d
PE
778 struct flowi *fl, int flags)
779{
8ed67789 780 return ip6_pol_route(net, table, fl->iif, fl, flags);
4acad72d
PE
781}
782
c71099ac
TG
783void ip6_route_input(struct sk_buff *skb)
784{
0660e03f 785 struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 786 struct net *net = dev_net(skb->dev);
adaa70bb 787 int flags = RT6_LOOKUP_F_HAS_SADDR;
c71099ac
TG
788 struct flowi fl = {
789 .iif = skb->dev->ifindex,
790 .nl_u = {
791 .ip6_u = {
792 .daddr = iph->daddr,
793 .saddr = iph->saddr,
90bcaf7b 794 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
c71099ac
TG
795 },
796 },
1ab1457c 797 .mark = skb->mark,
c71099ac
TG
798 .proto = iph->nexthdr,
799 };
adaa70bb 800
1d6e55f1 801 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
adaa70bb 802 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 803
adf30907 804 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
c71099ac
TG
805}
806
8ed67789 807static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
c71099ac 808 struct flowi *fl, int flags)
1da177e4 809{
8ed67789 810 return ip6_pol_route(net, table, fl->oif, fl, flags);
c71099ac
TG
811}
812
4591db4f
DL
813struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
814 struct flowi *fl)
c71099ac
TG
815{
816 int flags = 0;
817
818 if (rt6_need_strict(&fl->fl6_dst))
77d16f45 819 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 820
adaa70bb
TG
821 if (!ipv6_addr_any(&fl->fl6_src))
822 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
823 else if (sk)
824 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 825
4591db4f 826 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
1da177e4
LT
827}
828
7159039a 829EXPORT_SYMBOL(ip6_route_output);
1da177e4 830
14e50e57
DM
831int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
832{
833 struct rt6_info *ort = (struct rt6_info *) *dstp;
834 struct rt6_info *rt = (struct rt6_info *)
835 dst_alloc(&ip6_dst_blackhole_ops);
836 struct dst_entry *new = NULL;
837
838 if (rt) {
839 new = &rt->u.dst;
840
841 atomic_set(&new->__refcnt, 1);
842 new->__use = 1;
352e512c
HX
843 new->input = dst_discard;
844 new->output = dst_discard;
14e50e57
DM
845
846 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
847 new->dev = ort->u.dst.dev;
848 if (new->dev)
849 dev_hold(new->dev);
850 rt->rt6i_idev = ort->rt6i_idev;
851 if (rt->rt6i_idev)
852 in6_dev_hold(rt->rt6i_idev);
853 rt->rt6i_expires = 0;
854
855 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
856 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
857 rt->rt6i_metric = 0;
858
859 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
860#ifdef CONFIG_IPV6_SUBTREES
861 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
862#endif
863
864 dst_free(new);
865 }
866
867 dst_release(*dstp);
868 *dstp = new;
869 return (new ? 0 : -ENOMEM);
870}
871EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
872
1da177e4
LT
873/*
874 * Destination cache support functions
875 */
876
877static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
878{
879 struct rt6_info *rt;
880
881 rt = (struct rt6_info *) dst;
882
10414444 883 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
1da177e4
LT
884 return dst;
885
886 return NULL;
887}
888
889static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
890{
891 struct rt6_info *rt = (struct rt6_info *) dst;
892
893 if (rt) {
54c1a859
YH
894 if (rt->rt6i_flags & RTF_CACHE) {
895 if (rt6_check_expired(rt)) {
896 ip6_del_rt(rt);
897 dst = NULL;
898 }
899 } else {
1da177e4 900 dst_release(dst);
54c1a859
YH
901 dst = NULL;
902 }
1da177e4 903 }
54c1a859 904 return dst;
1da177e4
LT
905}
906
907static void ip6_link_failure(struct sk_buff *skb)
908{
909 struct rt6_info *rt;
910
3ffe533c 911 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 912
adf30907 913 rt = (struct rt6_info *) skb_dst(skb);
1da177e4
LT
914 if (rt) {
915 if (rt->rt6i_flags&RTF_CACHE) {
916 dst_set_expires(&rt->u.dst, 0);
917 rt->rt6i_flags |= RTF_EXPIRES;
918 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
919 rt->rt6i_node->fn_sernum = -1;
920 }
921}
922
923static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
924{
925 struct rt6_info *rt6 = (struct rt6_info*)dst;
926
927 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
928 rt6->rt6i_flags |= RTF_MODIFIED;
929 if (mtu < IPV6_MIN_MTU) {
930 mtu = IPV6_MIN_MTU;
931 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
932 }
933 dst->metrics[RTAX_MTU-1] = mtu;
8d71740c 934 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
1da177e4
LT
935 }
936}
937
1da177e4
LT
938static int ipv6_get_mtu(struct net_device *dev);
939
5578689a 940static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
1da177e4
LT
941{
942 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
943
5578689a
DL
944 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
945 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
946
947 /*
1ab1457c
YH
948 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
949 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
950 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
951 * rely only on pmtu discovery"
952 */
953 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
954 mtu = IPV6_MAXPLEN;
955 return mtu;
956}
957
3b00944c
YH
958static struct dst_entry *icmp6_dst_gc_list;
959static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 960
3b00944c 961struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1da177e4 962 struct neighbour *neigh,
9acd9f3a 963 const struct in6_addr *addr)
1da177e4
LT
964{
965 struct rt6_info *rt;
966 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 967 struct net *net = dev_net(dev);
1da177e4
LT
968
969 if (unlikely(idev == NULL))
970 return NULL;
971
86393e52 972 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
973 if (unlikely(rt == NULL)) {
974 in6_dev_put(idev);
975 goto out;
976 }
977
978 dev_hold(dev);
979 if (neigh)
980 neigh_hold(neigh);
14deae41 981 else {
1da177e4 982 neigh = ndisc_get_neigh(dev, addr);
14deae41
DM
983 if (IS_ERR(neigh))
984 neigh = NULL;
985 }
1da177e4
LT
986
987 rt->rt6i_dev = dev;
988 rt->rt6i_idev = idev;
989 rt->rt6i_nexthop = neigh;
990 atomic_set(&rt->u.dst.__refcnt, 1);
991 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
992 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 993 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
3b00944c 994 rt->u.dst.output = ip6_output;
1da177e4
LT
995
996#if 0 /* there's no chance to use these for ndisc */
1ab1457c
YH
997 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
998 ? DST_HOST
1da177e4
LT
999 : 0;
1000 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1001 rt->rt6i_dst.plen = 128;
1002#endif
1003
3b00944c
YH
1004 spin_lock_bh(&icmp6_dst_lock);
1005 rt->u.dst.next = icmp6_dst_gc_list;
1006 icmp6_dst_gc_list = &rt->u.dst;
1007 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1008
5578689a 1009 fib6_force_start_gc(net);
1da177e4
LT
1010
1011out:
40aa7b90 1012 return &rt->u.dst;
1da177e4
LT
1013}
1014
3d0f24a7 1015int icmp6_dst_gc(void)
1da177e4
LT
1016{
1017 struct dst_entry *dst, *next, **pprev;
3d0f24a7 1018 int more = 0;
1da177e4
LT
1019
1020 next = NULL;
5d0bbeeb 1021
3b00944c
YH
1022 spin_lock_bh(&icmp6_dst_lock);
1023 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1024
1da177e4
LT
1025 while ((dst = *pprev) != NULL) {
1026 if (!atomic_read(&dst->__refcnt)) {
1027 *pprev = dst->next;
1028 dst_free(dst);
1da177e4
LT
1029 } else {
1030 pprev = &dst->next;
3d0f24a7 1031 ++more;
1da177e4
LT
1032 }
1033 }
1034
3b00944c 1035 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1036
3d0f24a7 1037 return more;
1da177e4
LT
1038}
1039
1e493d19
DM
1040static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1041 void *arg)
1042{
1043 struct dst_entry *dst, **pprev;
1044
1045 spin_lock_bh(&icmp6_dst_lock);
1046 pprev = &icmp6_dst_gc_list;
1047 while ((dst = *pprev) != NULL) {
1048 struct rt6_info *rt = (struct rt6_info *) dst;
1049 if (func(rt, arg)) {
1050 *pprev = dst->next;
1051 dst_free(dst);
1052 } else {
1053 pprev = &dst->next;
1054 }
1055 }
1056 spin_unlock_bh(&icmp6_dst_lock);
1057}
1058
569d3645 1059static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1060{
1da177e4 1061 unsigned long now = jiffies;
86393e52 1062 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1063 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1064 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1065 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1066 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1067 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1068
1069 if (time_after(rt_last_gc + rt_min_interval, now) &&
1070 atomic_read(&ops->entries) <= rt_max_size)
1da177e4
LT
1071 goto out;
1072
6891a346
BT
1073 net->ipv6.ip6_rt_gc_expire++;
1074 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1075 net->ipv6.ip6_rt_last_gc = now;
7019b78e
DL
1076 if (atomic_read(&ops->entries) < ops->gc_thresh)
1077 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1078out:
7019b78e
DL
1079 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1080 return (atomic_read(&ops->entries) > rt_max_size);
1da177e4
LT
1081}
1082
1083/* Clean host part of a prefix. Not necessary in radix tree,
1084 but results in cleaner routing tables.
1085
1086 Remove it only when all the things will work!
1087 */
1088
1089static int ipv6_get_mtu(struct net_device *dev)
1090{
1091 int mtu = IPV6_MIN_MTU;
1092 struct inet6_dev *idev;
1093
1094 idev = in6_dev_get(dev);
1095 if (idev) {
1096 mtu = idev->cnf.mtu6;
1097 in6_dev_put(idev);
1098 }
1099 return mtu;
1100}
1101
6b75d090 1102int ip6_dst_hoplimit(struct dst_entry *dst)
1da177e4 1103{
6b75d090
YH
1104 int hoplimit = dst_metric(dst, RTAX_HOPLIMIT);
1105 if (hoplimit < 0) {
1106 struct net_device *dev = dst->dev;
1107 struct inet6_dev *idev = in6_dev_get(dev);
1108 if (idev) {
1109 hoplimit = idev->cnf.hop_limit;
1110 in6_dev_put(idev);
1111 } else
53b7997f 1112 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1da177e4
LT
1113 }
1114 return hoplimit;
1115}
1116
1117/*
1118 *
1119 */
1120
86872cb5 1121int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1122{
1123 int err;
5578689a 1124 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1125 struct rt6_info *rt = NULL;
1126 struct net_device *dev = NULL;
1127 struct inet6_dev *idev = NULL;
c71099ac 1128 struct fib6_table *table;
1da177e4
LT
1129 int addr_type;
1130
86872cb5 1131 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1132 return -EINVAL;
1133#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1134 if (cfg->fc_src_len)
1da177e4
LT
1135 return -EINVAL;
1136#endif
86872cb5 1137 if (cfg->fc_ifindex) {
1da177e4 1138 err = -ENODEV;
5578689a 1139 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1140 if (!dev)
1141 goto out;
1142 idev = in6_dev_get(dev);
1143 if (!idev)
1144 goto out;
1145 }
1146
86872cb5
TG
1147 if (cfg->fc_metric == 0)
1148 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1149
5578689a 1150 table = fib6_new_table(net, cfg->fc_table);
c71099ac
TG
1151 if (table == NULL) {
1152 err = -ENOBUFS;
1153 goto out;
1154 }
1155
86393e52 1156 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1157
1158 if (rt == NULL) {
1159 err = -ENOMEM;
1160 goto out;
1161 }
1162
1163 rt->u.dst.obsolete = -1;
6f704992
YH
1164 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1165 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1166 0;
1da177e4 1167
86872cb5
TG
1168 if (cfg->fc_protocol == RTPROT_UNSPEC)
1169 cfg->fc_protocol = RTPROT_BOOT;
1170 rt->rt6i_protocol = cfg->fc_protocol;
1171
1172 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1173
1174 if (addr_type & IPV6_ADDR_MULTICAST)
1175 rt->u.dst.input = ip6_mc_input;
1176 else
1177 rt->u.dst.input = ip6_forward;
1178
1179 rt->u.dst.output = ip6_output;
1180
86872cb5
TG
1181 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1182 rt->rt6i_dst.plen = cfg->fc_dst_len;
1da177e4
LT
1183 if (rt->rt6i_dst.plen == 128)
1184 rt->u.dst.flags = DST_HOST;
1185
1186#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1187 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1188 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1189#endif
1190
86872cb5 1191 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1192
1193 /* We cannot add true routes via loopback here,
1194 they would result in kernel looping; promote them to reject routes
1195 */
86872cb5 1196 if ((cfg->fc_flags & RTF_REJECT) ||
1da177e4
LT
1197 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1198 /* hold loopback dev/idev if we haven't done so. */
5578689a 1199 if (dev != net->loopback_dev) {
1da177e4
LT
1200 if (dev) {
1201 dev_put(dev);
1202 in6_dev_put(idev);
1203 }
5578689a 1204 dev = net->loopback_dev;
1da177e4
LT
1205 dev_hold(dev);
1206 idev = in6_dev_get(dev);
1207 if (!idev) {
1208 err = -ENODEV;
1209 goto out;
1210 }
1211 }
1212 rt->u.dst.output = ip6_pkt_discard_out;
1213 rt->u.dst.input = ip6_pkt_discard;
1214 rt->u.dst.error = -ENETUNREACH;
1215 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1216 goto install_route;
1217 }
1218
86872cb5 1219 if (cfg->fc_flags & RTF_GATEWAY) {
1da177e4
LT
1220 struct in6_addr *gw_addr;
1221 int gwa_type;
1222
86872cb5
TG
1223 gw_addr = &cfg->fc_gateway;
1224 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1da177e4
LT
1225 gwa_type = ipv6_addr_type(gw_addr);
1226
1227 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1228 struct rt6_info *grt;
1229
1230 /* IPv6 strictly inhibits using not link-local
1231 addresses as nexthop address.
1232 Otherwise, router will not able to send redirects.
1233 It is very good, but in some (rare!) circumstances
1234 (SIT, PtP, NBMA NOARP links) it is handy to allow
1235 some exceptions. --ANK
1236 */
1237 err = -EINVAL;
1238 if (!(gwa_type&IPV6_ADDR_UNICAST))
1239 goto out;
1240
5578689a 1241 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1242
1243 err = -EHOSTUNREACH;
1244 if (grt == NULL)
1245 goto out;
1246 if (dev) {
1247 if (dev != grt->rt6i_dev) {
1248 dst_release(&grt->u.dst);
1249 goto out;
1250 }
1251 } else {
1252 dev = grt->rt6i_dev;
1253 idev = grt->rt6i_idev;
1254 dev_hold(dev);
1255 in6_dev_hold(grt->rt6i_idev);
1256 }
1257 if (!(grt->rt6i_flags&RTF_GATEWAY))
1258 err = 0;
1259 dst_release(&grt->u.dst);
1260
1261 if (err)
1262 goto out;
1263 }
1264 err = -EINVAL;
1265 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1266 goto out;
1267 }
1268
1269 err = -ENODEV;
1270 if (dev == NULL)
1271 goto out;
1272
86872cb5 1273 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1da177e4
LT
1274 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1275 if (IS_ERR(rt->rt6i_nexthop)) {
1276 err = PTR_ERR(rt->rt6i_nexthop);
1277 rt->rt6i_nexthop = NULL;
1278 goto out;
1279 }
1280 }
1281
86872cb5 1282 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1283
1284install_route:
86872cb5
TG
1285 if (cfg->fc_mx) {
1286 struct nlattr *nla;
1287 int remaining;
1288
1289 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
8f4c1f9b 1290 int type = nla_type(nla);
86872cb5
TG
1291
1292 if (type) {
1293 if (type > RTAX_MAX) {
1da177e4
LT
1294 err = -EINVAL;
1295 goto out;
1296 }
86872cb5
TG
1297
1298 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1da177e4 1299 }
1da177e4
LT
1300 }
1301 }
1302
5ffc02a1 1303 if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
1da177e4 1304 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1ca615fb 1305 if (!dst_mtu(&rt->u.dst))
1da177e4 1306 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
5ffc02a1 1307 if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
5578689a 1308 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1309 rt->u.dst.dev = dev;
1310 rt->rt6i_idev = idev;
c71099ac 1311 rt->rt6i_table = table;
63152fc0 1312
c346dca1 1313 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1314
86872cb5 1315 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1316
1317out:
1318 if (dev)
1319 dev_put(dev);
1320 if (idev)
1321 in6_dev_put(idev);
1322 if (rt)
40aa7b90 1323 dst_free(&rt->u.dst);
1da177e4
LT
1324 return err;
1325}
1326
86872cb5 1327static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1328{
1329 int err;
c71099ac 1330 struct fib6_table *table;
c346dca1 1331 struct net *net = dev_net(rt->rt6i_dev);
1da177e4 1332
8ed67789 1333 if (rt == net->ipv6.ip6_null_entry)
6c813a72
PM
1334 return -ENOENT;
1335
c71099ac
TG
1336 table = rt->rt6i_table;
1337 write_lock_bh(&table->tb6_lock);
1da177e4 1338
86872cb5 1339 err = fib6_del(rt, info);
1da177e4
LT
1340 dst_release(&rt->u.dst);
1341
c71099ac 1342 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1343
1344 return err;
1345}
1346
e0a1ad73
TG
1347int ip6_del_rt(struct rt6_info *rt)
1348{
4d1169c1 1349 struct nl_info info = {
c346dca1 1350 .nl_net = dev_net(rt->rt6i_dev),
4d1169c1 1351 };
528c4ceb 1352 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1353}
1354
86872cb5 1355static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1356{
c71099ac 1357 struct fib6_table *table;
1da177e4
LT
1358 struct fib6_node *fn;
1359 struct rt6_info *rt;
1360 int err = -ESRCH;
1361
5578689a 1362 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
c71099ac
TG
1363 if (table == NULL)
1364 return err;
1365
1366 read_lock_bh(&table->tb6_lock);
1da177e4 1367
c71099ac 1368 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1369 &cfg->fc_dst, cfg->fc_dst_len,
1370 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1371
1da177e4 1372 if (fn) {
7cc48263 1373 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
86872cb5 1374 if (cfg->fc_ifindex &&
1da177e4 1375 (rt->rt6i_dev == NULL ||
86872cb5 1376 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1da177e4 1377 continue;
86872cb5
TG
1378 if (cfg->fc_flags & RTF_GATEWAY &&
1379 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1380 continue;
86872cb5 1381 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4
LT
1382 continue;
1383 dst_hold(&rt->u.dst);
c71099ac 1384 read_unlock_bh(&table->tb6_lock);
1da177e4 1385
86872cb5 1386 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1387 }
1388 }
c71099ac 1389 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1390
1391 return err;
1392}
1393
1394/*
1395 * Handle redirects
1396 */
a6279458
YH
1397struct ip6rd_flowi {
1398 struct flowi fl;
1399 struct in6_addr gateway;
1400};
1401
8ed67789
DL
1402static struct rt6_info *__ip6_route_redirect(struct net *net,
1403 struct fib6_table *table,
a6279458
YH
1404 struct flowi *fl,
1405 int flags)
1da177e4 1406{
a6279458
YH
1407 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1408 struct rt6_info *rt;
e843b9e1 1409 struct fib6_node *fn;
c71099ac 1410
1da177e4 1411 /*
e843b9e1
YH
1412 * Get the "current" route for this destination and
1413 * check if the redirect has come from approriate router.
1414 *
1415 * RFC 2461 specifies that redirects should only be
1416 * accepted if they come from the nexthop to the target.
1417 * Due to the way the routes are chosen, this notion
1418 * is a bit fuzzy and one might need to check all possible
1419 * routes.
1da177e4 1420 */
1da177e4 1421
c71099ac 1422 read_lock_bh(&table->tb6_lock);
a6279458 1423 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
e843b9e1 1424restart:
7cc48263 1425 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
e843b9e1
YH
1426 /*
1427 * Current route is on-link; redirect is always invalid.
1428 *
1429 * Seems, previous statement is not true. It could
1430 * be node, which looks for us as on-link (f.e. proxy ndisc)
1431 * But then router serving it might decide, that we should
1432 * know truth 8)8) --ANK (980726).
1433 */
1434 if (rt6_check_expired(rt))
1435 continue;
1436 if (!(rt->rt6i_flags & RTF_GATEWAY))
1437 continue;
a6279458 1438 if (fl->oif != rt->rt6i_dev->ifindex)
e843b9e1 1439 continue;
a6279458 1440 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
e843b9e1
YH
1441 continue;
1442 break;
1443 }
a6279458 1444
cb15d9c2 1445 if (!rt)
8ed67789
DL
1446 rt = net->ipv6.ip6_null_entry;
1447 BACKTRACK(net, &fl->fl6_src);
cb15d9c2 1448out:
a6279458
YH
1449 dst_hold(&rt->u.dst);
1450
c71099ac 1451 read_unlock_bh(&table->tb6_lock);
e843b9e1 1452
a6279458
YH
1453 return rt;
1454};
1455
1456static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1457 struct in6_addr *src,
1458 struct in6_addr *gateway,
1459 struct net_device *dev)
1460{
adaa70bb 1461 int flags = RT6_LOOKUP_F_HAS_SADDR;
c346dca1 1462 struct net *net = dev_net(dev);
a6279458
YH
1463 struct ip6rd_flowi rdfl = {
1464 .fl = {
1465 .oif = dev->ifindex,
1466 .nl_u = {
1467 .ip6_u = {
1468 .daddr = *dest,
1469 .saddr = *src,
1470 },
1471 },
1472 },
a6279458 1473 };
adaa70bb 1474
86c36ce4
BH
1475 ipv6_addr_copy(&rdfl.gateway, gateway);
1476
adaa70bb
TG
1477 if (rt6_need_strict(dest))
1478 flags |= RT6_LOOKUP_F_IFACE;
a6279458 1479
5578689a 1480 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
58f09b78 1481 flags, __ip6_route_redirect);
a6279458
YH
1482}
1483
1484void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1485 struct in6_addr *saddr,
1486 struct neighbour *neigh, u8 *lladdr, int on_link)
1487{
1488 struct rt6_info *rt, *nrt = NULL;
1489 struct netevent_redirect netevent;
c346dca1 1490 struct net *net = dev_net(neigh->dev);
a6279458
YH
1491
1492 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1493
8ed67789 1494 if (rt == net->ipv6.ip6_null_entry) {
1da177e4
LT
1495 if (net_ratelimit())
1496 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1497 "for redirect target\n");
a6279458 1498 goto out;
1da177e4
LT
1499 }
1500
1da177e4
LT
1501 /*
1502 * We have finally decided to accept it.
1503 */
1504
1ab1457c 1505 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1506 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1507 NEIGH_UPDATE_F_OVERRIDE|
1508 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1509 NEIGH_UPDATE_F_ISROUTER))
1510 );
1511
1512 /*
1513 * Redirect received -> path was valid.
1514 * Look, redirects are sent only in response to data packets,
1515 * so that this nexthop apparently is reachable. --ANK
1516 */
1517 dst_confirm(&rt->u.dst);
1518
1519 /* Duplicate redirect: silently ignore. */
1520 if (neigh == rt->u.dst.neighbour)
1521 goto out;
1522
1523 nrt = ip6_rt_copy(rt);
1524 if (nrt == NULL)
1525 goto out;
1526
1527 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1528 if (on_link)
1529 nrt->rt6i_flags &= ~RTF_GATEWAY;
1530
1531 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1532 nrt->rt6i_dst.plen = 128;
1533 nrt->u.dst.flags |= DST_HOST;
1534
1535 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1536 nrt->rt6i_nexthop = neigh_clone(neigh);
1537 /* Reset pmtu, it may be better */
1538 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
c346dca1 1539 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dev_net(neigh->dev),
5578689a 1540 dst_mtu(&nrt->u.dst));
1da177e4 1541
40e22e8f 1542 if (ip6_ins_rt(nrt))
1da177e4
LT
1543 goto out;
1544
8d71740c
TT
1545 netevent.old = &rt->u.dst;
1546 netevent.new = &nrt->u.dst;
1547 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1548
1da177e4 1549 if (rt->rt6i_flags&RTF_CACHE) {
e0a1ad73 1550 ip6_del_rt(rt);
1da177e4
LT
1551 return;
1552 }
1553
1554out:
1ab1457c 1555 dst_release(&rt->u.dst);
1da177e4
LT
1556 return;
1557}
1558
1559/*
1560 * Handle ICMP "packet too big" messages
1561 * i.e. Path MTU discovery
1562 */
1563
1564void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1565 struct net_device *dev, u32 pmtu)
1566{
1567 struct rt6_info *rt, *nrt;
c346dca1 1568 struct net *net = dev_net(dev);
1da177e4
LT
1569 int allfrag = 0;
1570
5578689a 1571 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1da177e4
LT
1572 if (rt == NULL)
1573 return;
1574
1575 if (pmtu >= dst_mtu(&rt->u.dst))
1576 goto out;
1577
1578 if (pmtu < IPV6_MIN_MTU) {
1579 /*
1ab1457c 1580 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1da177e4
LT
1581 * MTU (1280) and a fragment header should always be included
1582 * after a node receiving Too Big message reporting PMTU is
1583 * less than the IPv6 Minimum Link MTU.
1584 */
1585 pmtu = IPV6_MIN_MTU;
1586 allfrag = 1;
1587 }
1588
1589 /* New mtu received -> path was valid.
1590 They are sent only in response to data packets,
1591 so that this nexthop apparently is reachable. --ANK
1592 */
1593 dst_confirm(&rt->u.dst);
1594
1595 /* Host route. If it is static, it would be better
1596 not to override it, but add new one, so that
1597 when cache entry will expire old pmtu
1598 would return automatically.
1599 */
1600 if (rt->rt6i_flags & RTF_CACHE) {
1601 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1602 if (allfrag)
1603 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
5578689a 1604 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1605 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1606 goto out;
1607 }
1608
1609 /* Network route.
1610 Two cases are possible:
1611 1. It is connected route. Action: COW
1612 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1613 */
d5315b50 1614 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
a1e78363 1615 nrt = rt6_alloc_cow(rt, daddr, saddr);
d5315b50
YH
1616 else
1617 nrt = rt6_alloc_clone(rt, daddr);
a1e78363 1618
d5315b50 1619 if (nrt) {
a1e78363
YH
1620 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1621 if (allfrag)
1622 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1623
1624 /* According to RFC 1981, detecting PMTU increase shouldn't be
1625 * happened within 5 mins, the recommended timer is 10 mins.
1626 * Here this route expiration time is set to ip6_rt_mtu_expires
1627 * which is 10 mins. After 10 mins the decreased pmtu is expired
1628 * and detecting PMTU increase will be automatically happened.
1629 */
5578689a 1630 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
a1e78363
YH
1631 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1632
40e22e8f 1633 ip6_ins_rt(nrt);
1da177e4 1634 }
1da177e4
LT
1635out:
1636 dst_release(&rt->u.dst);
1637}
1638
1639/*
1640 * Misc support functions
1641 */
1642
1643static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1644{
c346dca1 1645 struct net *net = dev_net(ort->rt6i_dev);
86393e52 1646 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1da177e4
LT
1647
1648 if (rt) {
1649 rt->u.dst.input = ort->u.dst.input;
1650 rt->u.dst.output = ort->u.dst.output;
1651
1652 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
22e1e4d8 1653 rt->u.dst.error = ort->u.dst.error;
1da177e4
LT
1654 rt->u.dst.dev = ort->u.dst.dev;
1655 if (rt->u.dst.dev)
1656 dev_hold(rt->u.dst.dev);
1657 rt->rt6i_idev = ort->rt6i_idev;
1658 if (rt->rt6i_idev)
1659 in6_dev_hold(rt->rt6i_idev);
1660 rt->u.dst.lastuse = jiffies;
1661 rt->rt6i_expires = 0;
1662
1663 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1664 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1665 rt->rt6i_metric = 0;
1666
1667 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1668#ifdef CONFIG_IPV6_SUBTREES
1669 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1670#endif
c71099ac 1671 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1672 }
1673 return rt;
1674}
1675
70ceb4f5 1676#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0
DL
1677static struct rt6_info *rt6_get_route_info(struct net *net,
1678 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1679 struct in6_addr *gwaddr, int ifindex)
1680{
1681 struct fib6_node *fn;
1682 struct rt6_info *rt = NULL;
c71099ac
TG
1683 struct fib6_table *table;
1684
efa2cea0 1685 table = fib6_get_table(net, RT6_TABLE_INFO);
c71099ac
TG
1686 if (table == NULL)
1687 return NULL;
70ceb4f5 1688
c71099ac
TG
1689 write_lock_bh(&table->tb6_lock);
1690 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
70ceb4f5
YH
1691 if (!fn)
1692 goto out;
1693
7cc48263 1694 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
70ceb4f5
YH
1695 if (rt->rt6i_dev->ifindex != ifindex)
1696 continue;
1697 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1698 continue;
1699 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1700 continue;
1701 dst_hold(&rt->u.dst);
1702 break;
1703 }
1704out:
c71099ac 1705 write_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1706 return rt;
1707}
1708
efa2cea0
DL
1709static struct rt6_info *rt6_add_route_info(struct net *net,
1710 struct in6_addr *prefix, int prefixlen,
70ceb4f5
YH
1711 struct in6_addr *gwaddr, int ifindex,
1712 unsigned pref)
1713{
86872cb5
TG
1714 struct fib6_config cfg = {
1715 .fc_table = RT6_TABLE_INFO,
238fc7ea 1716 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1717 .fc_ifindex = ifindex,
1718 .fc_dst_len = prefixlen,
1719 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1720 RTF_UP | RTF_PREF(pref),
efa2cea0
DL
1721 .fc_nlinfo.pid = 0,
1722 .fc_nlinfo.nlh = NULL,
1723 .fc_nlinfo.nl_net = net,
86872cb5
TG
1724 };
1725
1726 ipv6_addr_copy(&cfg.fc_dst, prefix);
1727 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
70ceb4f5 1728
e317da96
YH
1729 /* We should treat it as a default route if prefix length is 0. */
1730 if (!prefixlen)
86872cb5 1731 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 1732
86872cb5 1733 ip6_route_add(&cfg);
70ceb4f5 1734
efa2cea0 1735 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
1736}
1737#endif
1738
1da177e4 1739struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1ab1457c 1740{
1da177e4 1741 struct rt6_info *rt;
c71099ac 1742 struct fib6_table *table;
1da177e4 1743
c346dca1 1744 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
c71099ac
TG
1745 if (table == NULL)
1746 return NULL;
1da177e4 1747
c71099ac 1748 write_lock_bh(&table->tb6_lock);
7cc48263 1749 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1da177e4 1750 if (dev == rt->rt6i_dev &&
045927ff 1751 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
1752 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1753 break;
1754 }
1755 if (rt)
1756 dst_hold(&rt->u.dst);
c71099ac 1757 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
1758 return rt;
1759}
1760
1761struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
ebacaaa0
YH
1762 struct net_device *dev,
1763 unsigned int pref)
1da177e4 1764{
86872cb5
TG
1765 struct fib6_config cfg = {
1766 .fc_table = RT6_TABLE_DFLT,
238fc7ea 1767 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
1768 .fc_ifindex = dev->ifindex,
1769 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1770 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
5578689a
DL
1771 .fc_nlinfo.pid = 0,
1772 .fc_nlinfo.nlh = NULL,
c346dca1 1773 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 1774 };
1da177e4 1775
86872cb5 1776 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1da177e4 1777
86872cb5 1778 ip6_route_add(&cfg);
1da177e4 1779
1da177e4
LT
1780 return rt6_get_dflt_router(gwaddr, dev);
1781}
1782
7b4da532 1783void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
1784{
1785 struct rt6_info *rt;
c71099ac
TG
1786 struct fib6_table *table;
1787
1788 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 1789 table = fib6_get_table(net, RT6_TABLE_DFLT);
c71099ac
TG
1790 if (table == NULL)
1791 return;
1da177e4
LT
1792
1793restart:
c71099ac 1794 read_lock_bh(&table->tb6_lock);
7cc48263 1795 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1da177e4
LT
1796 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1797 dst_hold(&rt->u.dst);
c71099ac 1798 read_unlock_bh(&table->tb6_lock);
e0a1ad73 1799 ip6_del_rt(rt);
1da177e4
LT
1800 goto restart;
1801 }
1802 }
c71099ac 1803 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1804}
1805
5578689a
DL
1806static void rtmsg_to_fib6_config(struct net *net,
1807 struct in6_rtmsg *rtmsg,
86872cb5
TG
1808 struct fib6_config *cfg)
1809{
1810 memset(cfg, 0, sizeof(*cfg));
1811
1812 cfg->fc_table = RT6_TABLE_MAIN;
1813 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1814 cfg->fc_metric = rtmsg->rtmsg_metric;
1815 cfg->fc_expires = rtmsg->rtmsg_info;
1816 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1817 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1818 cfg->fc_flags = rtmsg->rtmsg_flags;
1819
5578689a 1820 cfg->fc_nlinfo.nl_net = net;
f1243c2d 1821
86872cb5
TG
1822 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1823 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1824 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1825}
1826
5578689a 1827int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 1828{
86872cb5 1829 struct fib6_config cfg;
1da177e4
LT
1830 struct in6_rtmsg rtmsg;
1831 int err;
1832
1833 switch(cmd) {
1834 case SIOCADDRT: /* Add a route */
1835 case SIOCDELRT: /* Delete a route */
1836 if (!capable(CAP_NET_ADMIN))
1837 return -EPERM;
1838 err = copy_from_user(&rtmsg, arg,
1839 sizeof(struct in6_rtmsg));
1840 if (err)
1841 return -EFAULT;
86872cb5 1842
5578689a 1843 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 1844
1da177e4
LT
1845 rtnl_lock();
1846 switch (cmd) {
1847 case SIOCADDRT:
86872cb5 1848 err = ip6_route_add(&cfg);
1da177e4
LT
1849 break;
1850 case SIOCDELRT:
86872cb5 1851 err = ip6_route_del(&cfg);
1da177e4
LT
1852 break;
1853 default:
1854 err = -EINVAL;
1855 }
1856 rtnl_unlock();
1857
1858 return err;
3ff50b79 1859 }
1da177e4
LT
1860
1861 return -EINVAL;
1862}
1863
1864/*
1865 * Drop the packet on the floor
1866 */
1867
d5fdd6ba 1868static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 1869{
612f09e8 1870 int type;
adf30907 1871 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
1872 switch (ipstats_mib_noroutes) {
1873 case IPSTATS_MIB_INNOROUTES:
0660e03f 1874 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 1875 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
1876 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1877 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
1878 break;
1879 }
1880 /* FALLTHROUGH */
1881 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
1882 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1883 ipstats_mib_noroutes);
612f09e8
YH
1884 break;
1885 }
3ffe533c 1886 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
1887 kfree_skb(skb);
1888 return 0;
1889}
1890
9ce8ade0
TG
1891static int ip6_pkt_discard(struct sk_buff *skb)
1892{
612f09e8 1893 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1894}
1895
20380731 1896static int ip6_pkt_discard_out(struct sk_buff *skb)
1da177e4 1897{
adf30907 1898 skb->dev = skb_dst(skb)->dev;
612f09e8 1899 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
1900}
1901
6723ab54
DM
1902#ifdef CONFIG_IPV6_MULTIPLE_TABLES
1903
9ce8ade0
TG
1904static int ip6_pkt_prohibit(struct sk_buff *skb)
1905{
612f09e8 1906 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
1907}
1908
1909static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1910{
adf30907 1911 skb->dev = skb_dst(skb)->dev;
612f09e8 1912 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
1913}
1914
6723ab54
DM
1915#endif
1916
1da177e4
LT
1917/*
1918 * Allocate a dst for local (unicast / anycast) address.
1919 */
1920
1921struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1922 const struct in6_addr *addr,
1923 int anycast)
1924{
c346dca1 1925 struct net *net = dev_net(idev->dev);
86393e52 1926 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
14deae41 1927 struct neighbour *neigh;
1da177e4
LT
1928
1929 if (rt == NULL)
1930 return ERR_PTR(-ENOMEM);
1931
5578689a 1932 dev_hold(net->loopback_dev);
1da177e4
LT
1933 in6_dev_hold(idev);
1934
1935 rt->u.dst.flags = DST_HOST;
1936 rt->u.dst.input = ip6_input;
1937 rt->u.dst.output = ip6_output;
5578689a 1938 rt->rt6i_dev = net->loopback_dev;
1da177e4
LT
1939 rt->rt6i_idev = idev;
1940 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
5578689a 1941 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1da177e4
LT
1942 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1943 rt->u.dst.obsolete = -1;
1944
1945 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
1946 if (anycast)
1947 rt->rt6i_flags |= RTF_ANYCAST;
1948 else
1da177e4 1949 rt->rt6i_flags |= RTF_LOCAL;
14deae41
DM
1950 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1951 if (IS_ERR(neigh)) {
40aa7b90 1952 dst_free(&rt->u.dst);
14deae41
DM
1953
1954 /* We are casting this because that is the return
1955 * value type. But an errno encoded pointer is the
1956 * same regardless of the underlying pointer type,
1957 * and that's what we are returning. So this is OK.
1958 */
1959 return (struct rt6_info *) neigh;
1da177e4 1960 }
14deae41 1961 rt->rt6i_nexthop = neigh;
1da177e4
LT
1962
1963 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1964 rt->rt6i_dst.plen = 128;
5578689a 1965 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4
LT
1966
1967 atomic_set(&rt->u.dst.__refcnt, 1);
1968
1969 return rt;
1970}
1971
8ed67789
DL
1972struct arg_dev_net {
1973 struct net_device *dev;
1974 struct net *net;
1975};
1976
1da177e4
LT
1977static int fib6_ifdown(struct rt6_info *rt, void *arg)
1978{
8ed67789
DL
1979 struct net_device *dev = ((struct arg_dev_net *)arg)->dev;
1980 struct net *net = ((struct arg_dev_net *)arg)->net;
1981
1982 if (((void *)rt->rt6i_dev == dev || dev == NULL) &&
1983 rt != net->ipv6.ip6_null_entry) {
1da177e4
LT
1984 RT6_TRACE("deleted by ifdown %p\n", rt);
1985 return -1;
1986 }
1987 return 0;
1988}
1989
f3db4851 1990void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 1991{
8ed67789
DL
1992 struct arg_dev_net adn = {
1993 .dev = dev,
1994 .net = net,
1995 };
1996
1997 fib6_clean_all(net, fib6_ifdown, 0, &adn);
1e493d19 1998 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
1999}
2000
2001struct rt6_mtu_change_arg
2002{
2003 struct net_device *dev;
2004 unsigned mtu;
2005};
2006
2007static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2008{
2009 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2010 struct inet6_dev *idev;
c346dca1 2011 struct net *net = dev_net(arg->dev);
1da177e4
LT
2012
2013 /* In IPv6 pmtu discovery is not optional,
2014 so that RTAX_MTU lock cannot disable it.
2015 We still use this lock to block changes
2016 caused by addrconf/ndisc.
2017 */
2018
2019 idev = __in6_dev_get(arg->dev);
2020 if (idev == NULL)
2021 return 0;
2022
2023 /* For administrative MTU increase, there is no way to discover
2024 IPv6 PMTU increase, so PMTU increase should be updated here.
2025 Since RFC 1981 doesn't include administrative MTU increase
2026 update PMTU increase is a MUST. (i.e. jumbo frame)
2027 */
2028 /*
2029 If new MTU is less than route PMTU, this new MTU will be the
2030 lowest MTU in the path, update the route PMTU to reflect PMTU
2031 decreases; if new MTU is greater than route PMTU, and the
2032 old MTU is the lowest MTU in the path, update the route PMTU
2033 to reflect the increase. In this case if the other nodes' MTU
2034 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2035 PMTU discouvery.
2036 */
2037 if (rt->rt6i_dev == arg->dev &&
2038 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
23717795 2039 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1ab1457c 2040 (dst_mtu(&rt->u.dst) < arg->mtu &&
566cfd8f 2041 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1da177e4 2042 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
5578689a 2043 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
566cfd8f 2044 }
1da177e4
LT
2045 return 0;
2046}
2047
2048void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2049{
c71099ac
TG
2050 struct rt6_mtu_change_arg arg = {
2051 .dev = dev,
2052 .mtu = mtu,
2053 };
1da177e4 2054
c346dca1 2055 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
1da177e4
LT
2056}
2057
ef7c79ed 2058static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2059 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2060 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2061 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2062 [RTA_PRIORITY] = { .type = NLA_U32 },
2063 [RTA_METRICS] = { .type = NLA_NESTED },
2064};
2065
2066static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2067 struct fib6_config *cfg)
1da177e4 2068{
86872cb5
TG
2069 struct rtmsg *rtm;
2070 struct nlattr *tb[RTA_MAX+1];
2071 int err;
1da177e4 2072
86872cb5
TG
2073 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2074 if (err < 0)
2075 goto errout;
1da177e4 2076
86872cb5
TG
2077 err = -EINVAL;
2078 rtm = nlmsg_data(nlh);
2079 memset(cfg, 0, sizeof(*cfg));
2080
2081 cfg->fc_table = rtm->rtm_table;
2082 cfg->fc_dst_len = rtm->rtm_dst_len;
2083 cfg->fc_src_len = rtm->rtm_src_len;
2084 cfg->fc_flags = RTF_UP;
2085 cfg->fc_protocol = rtm->rtm_protocol;
2086
2087 if (rtm->rtm_type == RTN_UNREACHABLE)
2088 cfg->fc_flags |= RTF_REJECT;
2089
2090 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2091 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2092 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2093
2094 if (tb[RTA_GATEWAY]) {
2095 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2096 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2097 }
86872cb5
TG
2098
2099 if (tb[RTA_DST]) {
2100 int plen = (rtm->rtm_dst_len + 7) >> 3;
2101
2102 if (nla_len(tb[RTA_DST]) < plen)
2103 goto errout;
2104
2105 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2106 }
86872cb5
TG
2107
2108 if (tb[RTA_SRC]) {
2109 int plen = (rtm->rtm_src_len + 7) >> 3;
2110
2111 if (nla_len(tb[RTA_SRC]) < plen)
2112 goto errout;
2113
2114 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2115 }
86872cb5
TG
2116
2117 if (tb[RTA_OIF])
2118 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2119
2120 if (tb[RTA_PRIORITY])
2121 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2122
2123 if (tb[RTA_METRICS]) {
2124 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2125 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2126 }
86872cb5
TG
2127
2128 if (tb[RTA_TABLE])
2129 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2130
2131 err = 0;
2132errout:
2133 return err;
1da177e4
LT
2134}
2135
c127ea2c 2136static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2137{
86872cb5
TG
2138 struct fib6_config cfg;
2139 int err;
1da177e4 2140
86872cb5
TG
2141 err = rtm_to_fib6_config(skb, nlh, &cfg);
2142 if (err < 0)
2143 return err;
2144
2145 return ip6_route_del(&cfg);
1da177e4
LT
2146}
2147
c127ea2c 2148static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2149{
86872cb5
TG
2150 struct fib6_config cfg;
2151 int err;
1da177e4 2152
86872cb5
TG
2153 err = rtm_to_fib6_config(skb, nlh, &cfg);
2154 if (err < 0)
2155 return err;
2156
2157 return ip6_route_add(&cfg);
1da177e4
LT
2158}
2159
339bf98f
TG
2160static inline size_t rt6_nlmsg_size(void)
2161{
2162 return NLMSG_ALIGN(sizeof(struct rtmsg))
2163 + nla_total_size(16) /* RTA_SRC */
2164 + nla_total_size(16) /* RTA_DST */
2165 + nla_total_size(16) /* RTA_GATEWAY */
2166 + nla_total_size(16) /* RTA_PREFSRC */
2167 + nla_total_size(4) /* RTA_TABLE */
2168 + nla_total_size(4) /* RTA_IIF */
2169 + nla_total_size(4) /* RTA_OIF */
2170 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2171 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
339bf98f
TG
2172 + nla_total_size(sizeof(struct rta_cacheinfo));
2173}
2174
191cd582
BH
2175static int rt6_fill_node(struct net *net,
2176 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80
JHS
2177 struct in6_addr *dst, struct in6_addr *src,
2178 int iif, int type, u32 pid, u32 seq,
7bc570c8 2179 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2180{
2181 struct rtmsg *rtm;
2d7202bf 2182 struct nlmsghdr *nlh;
e3703b3d 2183 long expires;
9e762a4a 2184 u32 table;
1da177e4
LT
2185
2186 if (prefix) { /* user wants prefix routes only */
2187 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2188 /* success since this is not a prefix route */
2189 return 1;
2190 }
2191 }
2192
2d7202bf
TG
2193 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2194 if (nlh == NULL)
26932566 2195 return -EMSGSIZE;
2d7202bf
TG
2196
2197 rtm = nlmsg_data(nlh);
1da177e4
LT
2198 rtm->rtm_family = AF_INET6;
2199 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2200 rtm->rtm_src_len = rt->rt6i_src.plen;
2201 rtm->rtm_tos = 0;
c71099ac 2202 if (rt->rt6i_table)
9e762a4a 2203 table = rt->rt6i_table->tb6_id;
c71099ac 2204 else
9e762a4a
PM
2205 table = RT6_TABLE_UNSPEC;
2206 rtm->rtm_table = table;
2d7202bf 2207 NLA_PUT_U32(skb, RTA_TABLE, table);
1da177e4
LT
2208 if (rt->rt6i_flags&RTF_REJECT)
2209 rtm->rtm_type = RTN_UNREACHABLE;
2210 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2211 rtm->rtm_type = RTN_LOCAL;
2212 else
2213 rtm->rtm_type = RTN_UNICAST;
2214 rtm->rtm_flags = 0;
2215 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2216 rtm->rtm_protocol = rt->rt6i_protocol;
2217 if (rt->rt6i_flags&RTF_DYNAMIC)
2218 rtm->rtm_protocol = RTPROT_REDIRECT;
2219 else if (rt->rt6i_flags & RTF_ADDRCONF)
2220 rtm->rtm_protocol = RTPROT_KERNEL;
2221 else if (rt->rt6i_flags&RTF_DEFAULT)
2222 rtm->rtm_protocol = RTPROT_RA;
2223
2224 if (rt->rt6i_flags&RTF_CACHE)
2225 rtm->rtm_flags |= RTM_F_CLONED;
2226
2227 if (dst) {
2d7202bf 2228 NLA_PUT(skb, RTA_DST, 16, dst);
1ab1457c 2229 rtm->rtm_dst_len = 128;
1da177e4 2230 } else if (rtm->rtm_dst_len)
2d7202bf 2231 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1da177e4
LT
2232#ifdef CONFIG_IPV6_SUBTREES
2233 if (src) {
2d7202bf 2234 NLA_PUT(skb, RTA_SRC, 16, src);
1ab1457c 2235 rtm->rtm_src_len = 128;
1da177e4 2236 } else if (rtm->rtm_src_len)
2d7202bf 2237 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1da177e4 2238#endif
7bc570c8
YH
2239 if (iif) {
2240#ifdef CONFIG_IPV6_MROUTE
2241 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2242 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2243 if (err <= 0) {
2244 if (!nowait) {
2245 if (err == 0)
2246 return 0;
2247 goto nla_put_failure;
2248 } else {
2249 if (err == -EMSGSIZE)
2250 goto nla_put_failure;
2251 }
2252 }
2253 } else
2254#endif
2255 NLA_PUT_U32(skb, RTA_IIF, iif);
2256 } else if (dst) {
5e0115e5 2257 struct inet6_dev *idev = ip6_dst_idev(&rt->u.dst);
1da177e4 2258 struct in6_addr saddr_buf;
191cd582 2259 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
7cbca67c 2260 dst, 0, &saddr_buf) == 0)
2d7202bf 2261 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1da177e4 2262 }
2d7202bf 2263
1da177e4 2264 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2d7202bf
TG
2265 goto nla_put_failure;
2266
1da177e4 2267 if (rt->u.dst.neighbour)
2d7202bf
TG
2268 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2269
1da177e4 2270 if (rt->u.dst.dev)
2d7202bf
TG
2271 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2272
2273 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
e3703b3d 2274
36e3deae
YH
2275 if (!(rt->rt6i_flags & RTF_EXPIRES))
2276 expires = 0;
2277 else if (rt->rt6i_expires - jiffies < INT_MAX)
2278 expires = rt->rt6i_expires - jiffies;
2279 else
2280 expires = INT_MAX;
69cdf8f9 2281
e3703b3d
TG
2282 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2283 expires, rt->u.dst.error) < 0)
2284 goto nla_put_failure;
2d7202bf
TG
2285
2286 return nlmsg_end(skb, nlh);
2287
2288nla_put_failure:
26932566
PM
2289 nlmsg_cancel(skb, nlh);
2290 return -EMSGSIZE;
1da177e4
LT
2291}
2292
1b43af54 2293int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2294{
2295 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2296 int prefix;
2297
2d7202bf
TG
2298 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2299 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2300 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2301 } else
2302 prefix = 0;
2303
191cd582
BH
2304 return rt6_fill_node(arg->net,
2305 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1da177e4 2306 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2307 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2308}
2309
c127ea2c 2310static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1da177e4 2311{
3b1e0a65 2312 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2313 struct nlattr *tb[RTA_MAX+1];
2314 struct rt6_info *rt;
1da177e4 2315 struct sk_buff *skb;
ab364a6f 2316 struct rtmsg *rtm;
1da177e4 2317 struct flowi fl;
ab364a6f 2318 int err, iif = 0;
1da177e4 2319
ab364a6f
TG
2320 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2321 if (err < 0)
2322 goto errout;
1da177e4 2323
ab364a6f 2324 err = -EINVAL;
1da177e4 2325 memset(&fl, 0, sizeof(fl));
1da177e4 2326
ab364a6f
TG
2327 if (tb[RTA_SRC]) {
2328 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2329 goto errout;
2330
2331 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2332 }
2333
2334 if (tb[RTA_DST]) {
2335 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2336 goto errout;
2337
2338 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2339 }
2340
2341 if (tb[RTA_IIF])
2342 iif = nla_get_u32(tb[RTA_IIF]);
2343
2344 if (tb[RTA_OIF])
2345 fl.oif = nla_get_u32(tb[RTA_OIF]);
1da177e4
LT
2346
2347 if (iif) {
2348 struct net_device *dev;
5578689a 2349 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2350 if (!dev) {
2351 err = -ENODEV;
ab364a6f 2352 goto errout;
1da177e4
LT
2353 }
2354 }
2355
ab364a6f
TG
2356 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2357 if (skb == NULL) {
2358 err = -ENOBUFS;
2359 goto errout;
2360 }
1da177e4 2361
ab364a6f
TG
2362 /* Reserve room for dummy headers, this skb can pass
2363 through good chunk of routing engine.
2364 */
459a98ed 2365 skb_reset_mac_header(skb);
ab364a6f 2366 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2367
8a3edd80 2368 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
adf30907 2369 skb_dst_set(skb, &rt->u.dst);
1da177e4 2370
191cd582 2371 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
1da177e4 2372 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
7bc570c8 2373 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2374 if (err < 0) {
ab364a6f
TG
2375 kfree_skb(skb);
2376 goto errout;
1da177e4
LT
2377 }
2378
5578689a 2379 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
ab364a6f 2380errout:
1da177e4 2381 return err;
1da177e4
LT
2382}
2383
86872cb5 2384void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2385{
2386 struct sk_buff *skb;
5578689a 2387 struct net *net = info->nl_net;
528c4ceb
DL
2388 u32 seq;
2389 int err;
2390
2391 err = -ENOBUFS;
2392 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
86872cb5 2393
339bf98f 2394 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
21713ebc
TG
2395 if (skb == NULL)
2396 goto errout;
2397
191cd582 2398 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
7bc570c8 2399 event, info->pid, seq, 0, 0, 0);
26932566
PM
2400 if (err < 0) {
2401 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2402 WARN_ON(err == -EMSGSIZE);
2403 kfree_skb(skb);
2404 goto errout;
2405 }
1ce85fe4
PNA
2406 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2407 info->nlh, gfp_any());
2408 return;
21713ebc
TG
2409errout:
2410 if (err < 0)
5578689a 2411 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2412}
2413
8ed67789
DL
2414static int ip6_route_dev_notify(struct notifier_block *this,
2415 unsigned long event, void *data)
2416{
2417 struct net_device *dev = (struct net_device *)data;
c346dca1 2418 struct net *net = dev_net(dev);
8ed67789
DL
2419
2420 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2421 net->ipv6.ip6_null_entry->u.dst.dev = dev;
2422 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2423#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2424 net->ipv6.ip6_prohibit_entry->u.dst.dev = dev;
2425 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2426 net->ipv6.ip6_blk_hole_entry->u.dst.dev = dev;
2427 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2428#endif
2429 }
2430
2431 return NOTIFY_OK;
2432}
2433
1da177e4
LT
2434/*
2435 * /proc
2436 */
2437
2438#ifdef CONFIG_PROC_FS
2439
2440#define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2441
2442struct rt6_proc_arg
2443{
2444 char *buffer;
2445 int offset;
2446 int length;
2447 int skip;
2448 int len;
2449};
2450
2451static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2452{
33120b30 2453 struct seq_file *m = p_arg;
1da177e4 2454
4b7a4274 2455 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
1da177e4
LT
2456
2457#ifdef CONFIG_IPV6_SUBTREES
4b7a4274 2458 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
1da177e4 2459#else
33120b30 2460 seq_puts(m, "00000000000000000000000000000000 00 ");
1da177e4
LT
2461#endif
2462
2463 if (rt->rt6i_nexthop) {
4b7a4274 2464 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
1da177e4 2465 } else {
33120b30 2466 seq_puts(m, "00000000000000000000000000000000");
1da177e4 2467 }
33120b30
AD
2468 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2469 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2470 rt->u.dst.__use, rt->rt6i_flags,
2471 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1da177e4
LT
2472 return 0;
2473}
2474
33120b30 2475static int ipv6_route_show(struct seq_file *m, void *v)
1da177e4 2476{
f3db4851
DL
2477 struct net *net = (struct net *)m->private;
2478 fib6_clean_all(net, rt6_info_route, 0, m);
33120b30
AD
2479 return 0;
2480}
1da177e4 2481
33120b30
AD
2482static int ipv6_route_open(struct inode *inode, struct file *file)
2483{
de05c557 2484 return single_open_net(inode, file, ipv6_route_show);
f3db4851
DL
2485}
2486
33120b30
AD
2487static const struct file_operations ipv6_route_proc_fops = {
2488 .owner = THIS_MODULE,
2489 .open = ipv6_route_open,
2490 .read = seq_read,
2491 .llseek = seq_lseek,
b6fcbdb4 2492 .release = single_release_net,
33120b30
AD
2493};
2494
1da177e4
LT
2495static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2496{
69ddb805 2497 struct net *net = (struct net *)seq->private;
1da177e4 2498 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2499 net->ipv6.rt6_stats->fib_nodes,
2500 net->ipv6.rt6_stats->fib_route_nodes,
2501 net->ipv6.rt6_stats->fib_rt_alloc,
2502 net->ipv6.rt6_stats->fib_rt_entries,
2503 net->ipv6.rt6_stats->fib_rt_cache,
86393e52 2504 atomic_read(&net->ipv6.ip6_dst_ops.entries),
69ddb805 2505 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2506
2507 return 0;
2508}
2509
2510static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2511{
de05c557 2512 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2513}
2514
9a32144e 2515static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2516 .owner = THIS_MODULE,
2517 .open = rt6_stats_seq_open,
2518 .read = seq_read,
2519 .llseek = seq_lseek,
b6fcbdb4 2520 .release = single_release_net,
1da177e4
LT
2521};
2522#endif /* CONFIG_PROC_FS */
2523
2524#ifdef CONFIG_SYSCTL
2525
1da177e4 2526static
8d65af78 2527int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
1da177e4
LT
2528 void __user *buffer, size_t *lenp, loff_t *ppos)
2529{
5b7c931d
DL
2530 struct net *net = current->nsproxy->net_ns;
2531 int delay = net->ipv6.sysctl.flush_delay;
1da177e4 2532 if (write) {
8d65af78 2533 proc_dointvec(ctl, write, buffer, lenp, ppos);
5b7c931d 2534 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
1da177e4
LT
2535 return 0;
2536 } else
2537 return -EINVAL;
2538}
2539
760f2d01 2540ctl_table ipv6_route_table_template[] = {
1ab1457c 2541 {
1da177e4 2542 .procname = "flush",
4990509f 2543 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2544 .maxlen = sizeof(int),
89c8b3a1 2545 .mode = 0200,
6d9f239a 2546 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2547 },
2548 {
1da177e4 2549 .procname = "gc_thresh",
9a7ec3a9 2550 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2551 .maxlen = sizeof(int),
2552 .mode = 0644,
6d9f239a 2553 .proc_handler = proc_dointvec,
1da177e4
LT
2554 },
2555 {
1da177e4 2556 .procname = "max_size",
4990509f 2557 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2558 .maxlen = sizeof(int),
2559 .mode = 0644,
6d9f239a 2560 .proc_handler = proc_dointvec,
1da177e4
LT
2561 },
2562 {
1da177e4 2563 .procname = "gc_min_interval",
4990509f 2564 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2565 .maxlen = sizeof(int),
2566 .mode = 0644,
6d9f239a 2567 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2568 },
2569 {
1da177e4 2570 .procname = "gc_timeout",
4990509f 2571 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
2572 .maxlen = sizeof(int),
2573 .mode = 0644,
6d9f239a 2574 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2575 },
2576 {
1da177e4 2577 .procname = "gc_interval",
4990509f 2578 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
2579 .maxlen = sizeof(int),
2580 .mode = 0644,
6d9f239a 2581 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2582 },
2583 {
1da177e4 2584 .procname = "gc_elasticity",
4990509f 2585 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
2586 .maxlen = sizeof(int),
2587 .mode = 0644,
6d9f239a 2588 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2589 },
2590 {
1da177e4 2591 .procname = "mtu_expires",
4990509f 2592 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
2593 .maxlen = sizeof(int),
2594 .mode = 0644,
6d9f239a 2595 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2596 },
2597 {
1da177e4 2598 .procname = "min_adv_mss",
4990509f 2599 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
2600 .maxlen = sizeof(int),
2601 .mode = 0644,
6d9f239a 2602 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
2603 },
2604 {
1da177e4 2605 .procname = "gc_min_interval_ms",
4990509f 2606 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
2607 .maxlen = sizeof(int),
2608 .mode = 0644,
6d9f239a 2609 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 2610 },
f8572d8f 2611 { }
1da177e4
LT
2612};
2613
2c8c1e72 2614struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
2615{
2616 struct ctl_table *table;
2617
2618 table = kmemdup(ipv6_route_table_template,
2619 sizeof(ipv6_route_table_template),
2620 GFP_KERNEL);
5ee09105
YH
2621
2622 if (table) {
2623 table[0].data = &net->ipv6.sysctl.flush_delay;
86393e52 2624 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
2625 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2626 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2627 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2628 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2629 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2630 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2631 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 2632 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5ee09105
YH
2633 }
2634
760f2d01
DL
2635 return table;
2636}
1da177e4
LT
2637#endif
2638
2c8c1e72 2639static int __net_init ip6_route_net_init(struct net *net)
cdb18761 2640{
633d424b 2641 int ret = -ENOMEM;
8ed67789 2642
86393e52
AD
2643 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2644 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 2645
8ed67789
DL
2646 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2647 sizeof(*net->ipv6.ip6_null_entry),
2648 GFP_KERNEL);
2649 if (!net->ipv6.ip6_null_entry)
f2fc6a54 2650 goto out_ip6_dst_ops;
8ed67789
DL
2651 net->ipv6.ip6_null_entry->u.dst.path =
2652 (struct dst_entry *)net->ipv6.ip6_null_entry;
86393e52 2653 net->ipv6.ip6_null_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2654
2655#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2656 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2657 sizeof(*net->ipv6.ip6_prohibit_entry),
2658 GFP_KERNEL);
68fffc67
PZ
2659 if (!net->ipv6.ip6_prohibit_entry)
2660 goto out_ip6_null_entry;
8ed67789
DL
2661 net->ipv6.ip6_prohibit_entry->u.dst.path =
2662 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
86393e52 2663 net->ipv6.ip6_prohibit_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2664
2665 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2666 sizeof(*net->ipv6.ip6_blk_hole_entry),
2667 GFP_KERNEL);
68fffc67
PZ
2668 if (!net->ipv6.ip6_blk_hole_entry)
2669 goto out_ip6_prohibit_entry;
8ed67789
DL
2670 net->ipv6.ip6_blk_hole_entry->u.dst.path =
2671 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
86393e52 2672 net->ipv6.ip6_blk_hole_entry->u.dst.ops = &net->ipv6.ip6_dst_ops;
8ed67789
DL
2673#endif
2674
b339a47c
PZ
2675 net->ipv6.sysctl.flush_delay = 0;
2676 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2677 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2678 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2679 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2680 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2681 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2682 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2683
cdb18761
DL
2684#ifdef CONFIG_PROC_FS
2685 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2686 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2687#endif
6891a346
BT
2688 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2689
8ed67789
DL
2690 ret = 0;
2691out:
2692 return ret;
f2fc6a54 2693
68fffc67
PZ
2694#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2695out_ip6_prohibit_entry:
2696 kfree(net->ipv6.ip6_prohibit_entry);
2697out_ip6_null_entry:
2698 kfree(net->ipv6.ip6_null_entry);
2699#endif
f2fc6a54 2700out_ip6_dst_ops:
f2fc6a54 2701 goto out;
cdb18761
DL
2702}
2703
2c8c1e72 2704static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761
DL
2705{
2706#ifdef CONFIG_PROC_FS
2707 proc_net_remove(net, "ipv6_route");
2708 proc_net_remove(net, "rt6_stats");
2709#endif
8ed67789
DL
2710 kfree(net->ipv6.ip6_null_entry);
2711#ifdef CONFIG_IPV6_MULTIPLE_TABLES
2712 kfree(net->ipv6.ip6_prohibit_entry);
2713 kfree(net->ipv6.ip6_blk_hole_entry);
2714#endif
cdb18761
DL
2715}
2716
2717static struct pernet_operations ip6_route_net_ops = {
2718 .init = ip6_route_net_init,
2719 .exit = ip6_route_net_exit,
2720};
2721
8ed67789
DL
2722static struct notifier_block ip6_route_dev_notifier = {
2723 .notifier_call = ip6_route_dev_notify,
2724 .priority = 0,
2725};
2726
433d49c3 2727int __init ip6_route_init(void)
1da177e4 2728{
433d49c3
DL
2729 int ret;
2730
9a7ec3a9
DL
2731 ret = -ENOMEM;
2732 ip6_dst_ops_template.kmem_cachep =
e5d679f3 2733 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 2734 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 2735 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 2736 goto out;
14e50e57 2737
8ed67789
DL
2738 ret = register_pernet_subsys(&ip6_route_net_ops);
2739 if (ret)
bdb3289f 2740 goto out_kmem_cache;
bdb3289f 2741
5dc121e9
AE
2742 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2743
8ed67789
DL
2744 /* Registering of the loopback is done before this portion of code,
2745 * the loopback reference in rt6_info will not be taken, do it
2746 * manually for init_net */
2747 init_net.ipv6.ip6_null_entry->u.dst.dev = init_net.loopback_dev;
2748 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2749 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2750 init_net.ipv6.ip6_prohibit_entry->u.dst.dev = init_net.loopback_dev;
2751 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2752 init_net.ipv6.ip6_blk_hole_entry->u.dst.dev = init_net.loopback_dev;
2753 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2754 #endif
433d49c3
DL
2755 ret = fib6_init();
2756 if (ret)
8ed67789 2757 goto out_register_subsys;
433d49c3 2758
433d49c3
DL
2759 ret = xfrm6_init();
2760 if (ret)
cdb18761 2761 goto out_fib6_init;
c35b7e72 2762
433d49c3
DL
2763 ret = fib6_rules_init();
2764 if (ret)
2765 goto xfrm6_init;
7e5449c2 2766
433d49c3
DL
2767 ret = -ENOBUFS;
2768 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2769 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2770 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2771 goto fib6_rules_init;
c127ea2c 2772
8ed67789 2773 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761
DL
2774 if (ret)
2775 goto fib6_rules_init;
8ed67789 2776
433d49c3
DL
2777out:
2778 return ret;
2779
2780fib6_rules_init:
433d49c3
DL
2781 fib6_rules_cleanup();
2782xfrm6_init:
433d49c3 2783 xfrm6_fini();
433d49c3 2784out_fib6_init:
433d49c3 2785 fib6_gc_cleanup();
8ed67789
DL
2786out_register_subsys:
2787 unregister_pernet_subsys(&ip6_route_net_ops);
433d49c3 2788out_kmem_cache:
f2fc6a54 2789 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 2790 goto out;
1da177e4
LT
2791}
2792
2793void ip6_route_cleanup(void)
2794{
8ed67789 2795 unregister_netdevice_notifier(&ip6_route_dev_notifier);
101367c2 2796 fib6_rules_cleanup();
1da177e4 2797 xfrm6_fini();
1da177e4 2798 fib6_gc_cleanup();
8ed67789 2799 unregister_pernet_subsys(&ip6_route_net_ops);
f2fc6a54 2800 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 2801}