]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/route.c
ipv6: Set FLOWI_FLAG_KNOWN_NH at flowi6_flags
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
afc154e9 68enum rt6_nud_state {
7e980569
JB
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
72 RT6_NUD_SUCCEED = 1
73};
74
1716a961 75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 76 const struct in6_addr *dest);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 79static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 88static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 90static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
4b32b5ad 95static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 96static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 97
70ceb4f5 98#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 99static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
100 const struct in6_addr *prefix, int prefixlen,
101 const struct in6_addr *gwaddr, int ifindex,
95c96174 102 unsigned int pref);
efa2cea0 103static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
106#endif
107
06582540
DM
108static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
109{
4b32b5ad 110 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 111
4b32b5ad
MKL
112 if (rt->rt6i_flags & RTF_CACHE)
113 return NULL;
114 else
3b471175 115 return dst_cow_metrics_generic(dst, old);
06582540
DM
116}
117
f894cbf8
DM
118static inline const void *choose_neigh_daddr(struct rt6_info *rt,
119 struct sk_buff *skb,
120 const void *daddr)
39232973
DM
121{
122 struct in6_addr *p = &rt->rt6i_gateway;
123
a7563f34 124 if (!ipv6_addr_any(p))
39232973 125 return (const void *) p;
f894cbf8
DM
126 else if (skb)
127 return &ipv6_hdr(skb)->daddr;
39232973
DM
128 return daddr;
129}
130
f894cbf8
DM
131static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
132 struct sk_buff *skb,
133 const void *daddr)
d3aaeb38 134{
39232973
DM
135 struct rt6_info *rt = (struct rt6_info *) dst;
136 struct neighbour *n;
137
f894cbf8 138 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 139 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
140 if (n)
141 return n;
142 return neigh_create(&nd_tbl, daddr, dst->dev);
143}
144
9a7ec3a9 145static struct dst_ops ip6_dst_ops_template = {
1da177e4 146 .family = AF_INET6,
1da177e4
LT
147 .gc = ip6_dst_gc,
148 .gc_thresh = 1024,
149 .check = ip6_dst_check,
0dbaee3b 150 .default_advmss = ip6_default_advmss,
ebb762f2 151 .mtu = ip6_mtu,
06582540 152 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
153 .destroy = ip6_dst_destroy,
154 .ifdown = ip6_dst_ifdown,
155 .negative_advice = ip6_negative_advice,
156 .link_failure = ip6_link_failure,
157 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 158 .redirect = rt6_do_redirect,
1ac06e03 159 .local_out = __ip6_local_out,
d3aaeb38 160 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
161};
162
ebb762f2 163static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 164{
618f9bc7
SK
165 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
166
167 return mtu ? : dst->dev->mtu;
ec831ea7
RD
168}
169
6700c270
DM
170static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
171 struct sk_buff *skb, u32 mtu)
14e50e57
DM
172{
173}
174
6700c270
DM
175static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
176 struct sk_buff *skb)
b587ee3b
DM
177{
178}
179
0972ddb2
HB
180static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
181 unsigned long old)
182{
183 return NULL;
184}
185
14e50e57
DM
186static struct dst_ops ip6_dst_blackhole_ops = {
187 .family = AF_INET6,
14e50e57
DM
188 .destroy = ip6_dst_destroy,
189 .check = ip6_dst_check,
ebb762f2 190 .mtu = ip6_blackhole_mtu,
214f45c9 191 .default_advmss = ip6_default_advmss,
14e50e57 192 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 193 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 194 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 195 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
196};
197
62fa8a84 198static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 199 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
200};
201
fb0af4c7 202static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
203 .dst = {
204 .__refcnt = ATOMIC_INIT(1),
205 .__use = 1,
2c20cbd7 206 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 207 .error = -ENETUNREACH,
d8d1f30b
CG
208 .input = ip6_pkt_discard,
209 .output = ip6_pkt_discard_out,
1da177e4
LT
210 },
211 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 212 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
213 .rt6i_metric = ~(u32) 0,
214 .rt6i_ref = ATOMIC_INIT(1),
215};
216
101367c2
TG
217#ifdef CONFIG_IPV6_MULTIPLE_TABLES
218
fb0af4c7 219static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
220 .dst = {
221 .__refcnt = ATOMIC_INIT(1),
222 .__use = 1,
2c20cbd7 223 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 224 .error = -EACCES,
d8d1f30b
CG
225 .input = ip6_pkt_prohibit,
226 .output = ip6_pkt_prohibit_out,
101367c2
TG
227 },
228 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 229 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
230 .rt6i_metric = ~(u32) 0,
231 .rt6i_ref = ATOMIC_INIT(1),
232};
233
fb0af4c7 234static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
235 .dst = {
236 .__refcnt = ATOMIC_INIT(1),
237 .__use = 1,
2c20cbd7 238 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 239 .error = -EINVAL,
d8d1f30b 240 .input = dst_discard,
aad88724 241 .output = dst_discard_sk,
101367c2
TG
242 },
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 244 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
247};
248
249#endif
250
1da177e4 251/* allocate dst with ip6_dst_ops */
97bab73f 252static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 253 struct net_device *dev,
8b96d22d
DM
254 int flags,
255 struct fib6_table *table)
1da177e4 256{
97bab73f 257 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 258 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 259
97bab73f 260 if (rt) {
8104891b
SK
261 struct dst_entry *dst = &rt->dst;
262
263 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
51ebd318 264 INIT_LIST_HEAD(&rt->rt6i_siblings);
97bab73f 265 }
cf911662 266 return rt;
1da177e4
LT
267}
268
269static void ip6_dst_destroy(struct dst_entry *dst)
270{
271 struct rt6_info *rt = (struct rt6_info *)dst;
272 struct inet6_dev *idev = rt->rt6i_idev;
ecd98837 273 struct dst_entry *from = dst->from;
1da177e4 274
4b32b5ad 275 dst_destroy_metrics_generic(dst);
8e2ec639 276
38308473 277 if (idev) {
1da177e4
LT
278 rt->rt6i_idev = NULL;
279 in6_dev_put(idev);
1ab1457c 280 }
1716a961 281
ecd98837
YH
282 dst->from = NULL;
283 dst_release(from);
b3419363
DM
284}
285
1da177e4
LT
286static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
287 int how)
288{
289 struct rt6_info *rt = (struct rt6_info *)dst;
290 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 291 struct net_device *loopback_dev =
c346dca1 292 dev_net(dev)->loopback_dev;
1da177e4 293
97cac082
DM
294 if (dev != loopback_dev) {
295 if (idev && idev->dev == dev) {
296 struct inet6_dev *loopback_idev =
297 in6_dev_get(loopback_dev);
298 if (loopback_idev) {
299 rt->rt6i_idev = loopback_idev;
300 in6_dev_put(idev);
301 }
302 }
1da177e4
LT
303 }
304}
305
a50feda5 306static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 307{
1716a961
G
308 if (rt->rt6i_flags & RTF_EXPIRES) {
309 if (time_after(jiffies, rt->dst.expires))
a50feda5 310 return true;
1716a961 311 } else if (rt->dst.from) {
3fd91fb3 312 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 313 }
a50feda5 314 return false;
1da177e4
LT
315}
316
51ebd318
ND
317/* Multipath route selection:
318 * Hash based function using packet header and flowlabel.
319 * Adapted from fib_info_hashfn()
320 */
321static int rt6_info_hash_nhsfn(unsigned int candidate_count,
322 const struct flowi6 *fl6)
323{
324 unsigned int val = fl6->flowi6_proto;
325
c08977bb
YH
326 val ^= ipv6_addr_hash(&fl6->daddr);
327 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
328
329 /* Work only if this not encapsulated */
330 switch (fl6->flowi6_proto) {
331 case IPPROTO_UDP:
332 case IPPROTO_TCP:
333 case IPPROTO_SCTP:
b3ce5ae1
ND
334 val ^= (__force u16)fl6->fl6_sport;
335 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
336 break;
337
338 case IPPROTO_ICMPV6:
b3ce5ae1
ND
339 val ^= (__force u16)fl6->fl6_icmp_type;
340 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
341 break;
342 }
343 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 344 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
345
346 /* Perhaps, we need to tune, this function? */
347 val = val ^ (val >> 7) ^ (val >> 12);
348 return val % candidate_count;
349}
350
351static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
352 struct flowi6 *fl6, int oif,
353 int strict)
51ebd318
ND
354{
355 struct rt6_info *sibling, *next_sibling;
356 int route_choosen;
357
358 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
359 /* Don't change the route, if route_choosen == 0
360 * (siblings does not include ourself)
361 */
362 if (route_choosen)
363 list_for_each_entry_safe(sibling, next_sibling,
364 &match->rt6i_siblings, rt6i_siblings) {
365 route_choosen--;
366 if (route_choosen == 0) {
52bd4c0c
ND
367 if (rt6_score_route(sibling, oif, strict) < 0)
368 break;
51ebd318
ND
369 match = sibling;
370 break;
371 }
372 }
373 return match;
374}
375
1da177e4 376/*
c71099ac 377 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
378 */
379
8ed67789
DL
380static inline struct rt6_info *rt6_device_match(struct net *net,
381 struct rt6_info *rt,
b71d1d42 382 const struct in6_addr *saddr,
1da177e4 383 int oif,
d420895e 384 int flags)
1da177e4
LT
385{
386 struct rt6_info *local = NULL;
387 struct rt6_info *sprt;
388
dd3abc4e
YH
389 if (!oif && ipv6_addr_any(saddr))
390 goto out;
391
d8d1f30b 392 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 393 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
394
395 if (oif) {
1da177e4
LT
396 if (dev->ifindex == oif)
397 return sprt;
398 if (dev->flags & IFF_LOOPBACK) {
38308473 399 if (!sprt->rt6i_idev ||
1da177e4 400 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 401 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 402 continue;
1ab1457c 403 if (local && (!oif ||
1da177e4
LT
404 local->rt6i_idev->dev->ifindex == oif))
405 continue;
406 }
407 local = sprt;
408 }
dd3abc4e
YH
409 } else {
410 if (ipv6_chk_addr(net, saddr, dev,
411 flags & RT6_LOOKUP_F_IFACE))
412 return sprt;
1da177e4 413 }
dd3abc4e 414 }
1da177e4 415
dd3abc4e 416 if (oif) {
1da177e4
LT
417 if (local)
418 return local;
419
d420895e 420 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 421 return net->ipv6.ip6_null_entry;
1da177e4 422 }
dd3abc4e 423out:
1da177e4
LT
424 return rt;
425}
426
27097255 427#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
428struct __rt6_probe_work {
429 struct work_struct work;
430 struct in6_addr target;
431 struct net_device *dev;
432};
433
434static void rt6_probe_deferred(struct work_struct *w)
435{
436 struct in6_addr mcaddr;
437 struct __rt6_probe_work *work =
438 container_of(w, struct __rt6_probe_work, work);
439
440 addrconf_addr_solict_mult(&work->target, &mcaddr);
441 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
442 dev_put(work->dev);
662f5533 443 kfree(work);
c2f17e82
HFS
444}
445
27097255
YH
446static void rt6_probe(struct rt6_info *rt)
447{
f2c31e32 448 struct neighbour *neigh;
27097255
YH
449 /*
450 * Okay, this does not seem to be appropriate
451 * for now, however, we need to check if it
452 * is really so; aka Router Reachability Probing.
453 *
454 * Router Reachability Probe MUST be rate-limited
455 * to no more than one per minute.
456 */
2152caea 457 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 458 return;
2152caea
YH
459 rcu_read_lock_bh();
460 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
461 if (neigh) {
462 write_lock(&neigh->lock);
463 if (neigh->nud_state & NUD_VALID)
464 goto out;
7ff74a59 465 }
2152caea
YH
466
467 if (!neigh ||
52e16356 468 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
c2f17e82 469 struct __rt6_probe_work *work;
27097255 470
c2f17e82
HFS
471 work = kmalloc(sizeof(*work), GFP_ATOMIC);
472
473 if (neigh && work)
7e980569 474 __neigh_set_probe_once(neigh);
c2f17e82
HFS
475
476 if (neigh)
2152caea
YH
477 write_unlock(&neigh->lock);
478
c2f17e82
HFS
479 if (work) {
480 INIT_WORK(&work->work, rt6_probe_deferred);
481 work->target = rt->rt6i_gateway;
482 dev_hold(rt->dst.dev);
483 work->dev = rt->dst.dev;
484 schedule_work(&work->work);
485 }
f2c31e32 486 } else {
2152caea
YH
487out:
488 write_unlock(&neigh->lock);
f2c31e32 489 }
2152caea 490 rcu_read_unlock_bh();
27097255
YH
491}
492#else
493static inline void rt6_probe(struct rt6_info *rt)
494{
27097255
YH
495}
496#endif
497
1da177e4 498/*
554cfb7e 499 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 500 */
b6f99a21 501static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 502{
d1918542 503 struct net_device *dev = rt->dst.dev;
161980f4 504 if (!oif || dev->ifindex == oif)
554cfb7e 505 return 2;
161980f4
DM
506 if ((dev->flags & IFF_LOOPBACK) &&
507 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
508 return 1;
509 return 0;
554cfb7e 510}
1da177e4 511
afc154e9 512static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 513{
f2c31e32 514 struct neighbour *neigh;
afc154e9 515 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 516
4d0c5911
YH
517 if (rt->rt6i_flags & RTF_NONEXTHOP ||
518 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 519 return RT6_NUD_SUCCEED;
145a3621
YH
520
521 rcu_read_lock_bh();
522 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
523 if (neigh) {
524 read_lock(&neigh->lock);
554cfb7e 525 if (neigh->nud_state & NUD_VALID)
afc154e9 526 ret = RT6_NUD_SUCCEED;
398bcbeb 527#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 528 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 529 ret = RT6_NUD_SUCCEED;
7e980569
JB
530 else
531 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 532#endif
145a3621 533 read_unlock(&neigh->lock);
afc154e9
HFS
534 } else {
535 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 536 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 537 }
145a3621
YH
538 rcu_read_unlock_bh();
539
a5a81f0b 540 return ret;
1da177e4
LT
541}
542
554cfb7e
YH
543static int rt6_score_route(struct rt6_info *rt, int oif,
544 int strict)
1da177e4 545{
a5a81f0b 546 int m;
1ab1457c 547
4d0c5911 548 m = rt6_check_dev(rt, oif);
77d16f45 549 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 550 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
551#ifdef CONFIG_IPV6_ROUTER_PREF
552 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
553#endif
afc154e9
HFS
554 if (strict & RT6_LOOKUP_F_REACHABLE) {
555 int n = rt6_check_neigh(rt);
556 if (n < 0)
557 return n;
558 }
554cfb7e
YH
559 return m;
560}
561
f11e6659 562static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
563 int *mpri, struct rt6_info *match,
564 bool *do_rr)
554cfb7e 565{
f11e6659 566 int m;
afc154e9 567 bool match_do_rr = false;
f11e6659
DM
568
569 if (rt6_check_expired(rt))
570 goto out;
571
572 m = rt6_score_route(rt, oif, strict);
7e980569 573 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
574 match_do_rr = true;
575 m = 0; /* lowest valid score */
7e980569 576 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 577 goto out;
afc154e9
HFS
578 }
579
580 if (strict & RT6_LOOKUP_F_REACHABLE)
581 rt6_probe(rt);
f11e6659 582
7e980569 583 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 584 if (m > *mpri) {
afc154e9 585 *do_rr = match_do_rr;
f11e6659
DM
586 *mpri = m;
587 match = rt;
f11e6659 588 }
f11e6659
DM
589out:
590 return match;
591}
592
593static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
594 struct rt6_info *rr_head,
afc154e9
HFS
595 u32 metric, int oif, int strict,
596 bool *do_rr)
f11e6659 597{
9fbdcfaf 598 struct rt6_info *rt, *match, *cont;
554cfb7e 599 int mpri = -1;
1da177e4 600
f11e6659 601 match = NULL;
9fbdcfaf
SK
602 cont = NULL;
603 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
604 if (rt->rt6i_metric != metric) {
605 cont = rt;
606 break;
607 }
608
609 match = find_match(rt, oif, strict, &mpri, match, do_rr);
610 }
611
612 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
613 if (rt->rt6i_metric != metric) {
614 cont = rt;
615 break;
616 }
617
afc154e9 618 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
619 }
620
621 if (match || !cont)
622 return match;
623
624 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 625 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 626
f11e6659
DM
627 return match;
628}
1da177e4 629
f11e6659
DM
630static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
631{
632 struct rt6_info *match, *rt0;
8ed67789 633 struct net *net;
afc154e9 634 bool do_rr = false;
1da177e4 635
f11e6659
DM
636 rt0 = fn->rr_ptr;
637 if (!rt0)
638 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 639
afc154e9
HFS
640 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
641 &do_rr);
1da177e4 642
afc154e9 643 if (do_rr) {
d8d1f30b 644 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 645
554cfb7e 646 /* no entries matched; do round-robin */
f11e6659
DM
647 if (!next || next->rt6i_metric != rt0->rt6i_metric)
648 next = fn->leaf;
649
650 if (next != rt0)
651 fn->rr_ptr = next;
1da177e4 652 }
1da177e4 653
d1918542 654 net = dev_net(rt0->dst.dev);
a02cec21 655 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
656}
657
8b9df265
MKL
658static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
659{
660 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
661}
662
70ceb4f5
YH
663#ifdef CONFIG_IPV6_ROUTE_INFO
664int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 665 const struct in6_addr *gwaddr)
70ceb4f5 666{
c346dca1 667 struct net *net = dev_net(dev);
70ceb4f5
YH
668 struct route_info *rinfo = (struct route_info *) opt;
669 struct in6_addr prefix_buf, *prefix;
670 unsigned int pref;
4bed72e4 671 unsigned long lifetime;
70ceb4f5
YH
672 struct rt6_info *rt;
673
674 if (len < sizeof(struct route_info)) {
675 return -EINVAL;
676 }
677
678 /* Sanity check for prefix_len and length */
679 if (rinfo->length > 3) {
680 return -EINVAL;
681 } else if (rinfo->prefix_len > 128) {
682 return -EINVAL;
683 } else if (rinfo->prefix_len > 64) {
684 if (rinfo->length < 2) {
685 return -EINVAL;
686 }
687 } else if (rinfo->prefix_len > 0) {
688 if (rinfo->length < 1) {
689 return -EINVAL;
690 }
691 }
692
693 pref = rinfo->route_pref;
694 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 695 return -EINVAL;
70ceb4f5 696
4bed72e4 697 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
698
699 if (rinfo->length == 3)
700 prefix = (struct in6_addr *)rinfo->prefix;
701 else {
702 /* this function is safe */
703 ipv6_addr_prefix(&prefix_buf,
704 (struct in6_addr *)rinfo->prefix,
705 rinfo->prefix_len);
706 prefix = &prefix_buf;
707 }
708
f104a567
DJ
709 if (rinfo->prefix_len == 0)
710 rt = rt6_get_dflt_router(gwaddr, dev);
711 else
712 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
713 gwaddr, dev->ifindex);
70ceb4f5
YH
714
715 if (rt && !lifetime) {
e0a1ad73 716 ip6_del_rt(rt);
70ceb4f5
YH
717 rt = NULL;
718 }
719
720 if (!rt && lifetime)
efa2cea0 721 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
722 pref);
723 else if (rt)
724 rt->rt6i_flags = RTF_ROUTEINFO |
725 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
726
727 if (rt) {
1716a961
G
728 if (!addrconf_finite_timeout(lifetime))
729 rt6_clean_expires(rt);
730 else
731 rt6_set_expires(rt, jiffies + HZ * lifetime);
732
94e187c0 733 ip6_rt_put(rt);
70ceb4f5
YH
734 }
735 return 0;
736}
737#endif
738
a3c00e46
MKL
739static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
740 struct in6_addr *saddr)
741{
742 struct fib6_node *pn;
743 while (1) {
744 if (fn->fn_flags & RTN_TL_ROOT)
745 return NULL;
746 pn = fn->parent;
747 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
748 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
749 else
750 fn = pn;
751 if (fn->fn_flags & RTN_RTINFO)
752 return fn;
753 }
754}
c71099ac 755
8ed67789
DL
756static struct rt6_info *ip6_pol_route_lookup(struct net *net,
757 struct fib6_table *table,
4c9483b2 758 struct flowi6 *fl6, int flags)
1da177e4
LT
759{
760 struct fib6_node *fn;
761 struct rt6_info *rt;
762
c71099ac 763 read_lock_bh(&table->tb6_lock);
4c9483b2 764 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
765restart:
766 rt = fn->leaf;
4c9483b2 767 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 768 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 769 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
770 if (rt == net->ipv6.ip6_null_entry) {
771 fn = fib6_backtrack(fn, &fl6->saddr);
772 if (fn)
773 goto restart;
774 }
d8d1f30b 775 dst_use(&rt->dst, jiffies);
c71099ac 776 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
777 return rt;
778
779}
780
67ba4152 781struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
782 int flags)
783{
784 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
785}
786EXPORT_SYMBOL_GPL(ip6_route_lookup);
787
9acd9f3a
YH
788struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
789 const struct in6_addr *saddr, int oif, int strict)
c71099ac 790{
4c9483b2
DM
791 struct flowi6 fl6 = {
792 .flowi6_oif = oif,
793 .daddr = *daddr,
c71099ac
TG
794 };
795 struct dst_entry *dst;
77d16f45 796 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 797
adaa70bb 798 if (saddr) {
4c9483b2 799 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
800 flags |= RT6_LOOKUP_F_HAS_SADDR;
801 }
802
4c9483b2 803 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
804 if (dst->error == 0)
805 return (struct rt6_info *) dst;
806
807 dst_release(dst);
808
1da177e4
LT
809 return NULL;
810}
7159039a
YH
811EXPORT_SYMBOL(rt6_lookup);
812
c71099ac 813/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
814 It takes new route entry, the addition fails by any reason the
815 route is freed. In any case, if caller does not hold it, it may
816 be destroyed.
817 */
818
e5fd387a 819static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 820 struct mx6_config *mxc)
1da177e4
LT
821{
822 int err;
c71099ac 823 struct fib6_table *table;
1da177e4 824
c71099ac
TG
825 table = rt->rt6i_table;
826 write_lock_bh(&table->tb6_lock);
e715b6d3 827 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 828 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
829
830 return err;
831}
832
40e22e8f
TG
833int ip6_ins_rt(struct rt6_info *rt)
834{
e715b6d3
FW
835 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
836 struct mx6_config mxc = { .mx = NULL, };
837
838 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
839}
840
8b9df265
MKL
841static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
842 const struct in6_addr *daddr,
843 const struct in6_addr *saddr)
1da177e4 844{
1da177e4
LT
845 struct rt6_info *rt;
846
847 /*
848 * Clone the route.
849 */
850
21efcfa0 851 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
852
853 if (rt) {
1da177e4 854 rt->rt6i_flags |= RTF_CACHE;
1da177e4 855
8b9df265
MKL
856 if (!rt6_is_gw_or_nonexthop(ort)) {
857 if (ort->rt6i_dst.plen != 128 &&
858 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
859 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 860#ifdef CONFIG_IPV6_SUBTREES
8b9df265
MKL
861 if (rt->rt6i_src.plen && saddr) {
862 rt->rt6i_src.addr = *saddr;
863 rt->rt6i_src.plen = 128;
864 }
1da177e4 865#endif
8b9df265 866 }
95a9a5ba 867 }
1da177e4 868
95a9a5ba
YH
869 return rt;
870}
1da177e4 871
8ed67789 872static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 873 struct flowi6 *fl6, int flags)
1da177e4 874{
367efcb9 875 struct fib6_node *fn, *saved_fn;
45e4fd26 876 struct rt6_info *rt;
c71099ac 877 int strict = 0;
1da177e4 878
77d16f45 879 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
880 if (net->ipv6.devconf_all->forwarding == 0)
881 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 882
c71099ac 883 read_lock_bh(&table->tb6_lock);
1da177e4 884
4c9483b2 885 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 886 saved_fn = fn;
1da177e4 887
a3c00e46 888redo_rt6_select:
367efcb9 889 rt = rt6_select(fn, oif, strict);
52bd4c0c 890 if (rt->rt6i_nsiblings)
367efcb9 891 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
892 if (rt == net->ipv6.ip6_null_entry) {
893 fn = fib6_backtrack(fn, &fl6->saddr);
894 if (fn)
895 goto redo_rt6_select;
367efcb9
MKL
896 else if (strict & RT6_LOOKUP_F_REACHABLE) {
897 /* also consider unreachable route */
898 strict &= ~RT6_LOOKUP_F_REACHABLE;
899 fn = saved_fn;
900 goto redo_rt6_select;
367efcb9 901 }
a3c00e46
MKL
902 }
903
d8d1f30b 904 dst_hold(&rt->dst);
c71099ac 905 read_unlock_bh(&table->tb6_lock);
fb9de91e 906
4b32b5ad 907 rt6_dst_from_metrics_check(rt);
d8d1f30b
CG
908 rt->dst.lastuse = jiffies;
909 rt->dst.__use++;
c71099ac
TG
910
911 return rt;
1da177e4
LT
912}
913
8ed67789 914static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 915 struct flowi6 *fl6, int flags)
4acad72d 916{
4c9483b2 917 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
918}
919
72331bc0
SL
920static struct dst_entry *ip6_route_input_lookup(struct net *net,
921 struct net_device *dev,
922 struct flowi6 *fl6, int flags)
923{
924 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
925 flags |= RT6_LOOKUP_F_IFACE;
926
927 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
928}
929
c71099ac
TG
930void ip6_route_input(struct sk_buff *skb)
931{
b71d1d42 932 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 933 struct net *net = dev_net(skb->dev);
adaa70bb 934 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
935 struct flowi6 fl6 = {
936 .flowi6_iif = skb->dev->ifindex,
937 .daddr = iph->daddr,
938 .saddr = iph->saddr,
6502ca52 939 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
940 .flowi6_mark = skb->mark,
941 .flowi6_proto = iph->nexthdr,
c71099ac 942 };
adaa70bb 943
72331bc0 944 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
945}
946
8ed67789 947static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 948 struct flowi6 *fl6, int flags)
1da177e4 949{
4c9483b2 950 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
951}
952
67ba4152 953struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 954 struct flowi6 *fl6)
c71099ac
TG
955{
956 int flags = 0;
957
1fb9489b 958 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 959
4c9483b2 960 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 961 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 962
4c9483b2 963 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 964 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
965 else if (sk)
966 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 967
4c9483b2 968 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 969}
7159039a 970EXPORT_SYMBOL(ip6_route_output);
1da177e4 971
2774c131 972struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 973{
5c1e6aa3 974 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
975 struct dst_entry *new = NULL;
976
f5b0a874 977 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 978 if (rt) {
d8d1f30b 979 new = &rt->dst;
14e50e57 980
8104891b 981 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
8104891b 982
14e50e57 983 new->__use = 1;
352e512c 984 new->input = dst_discard;
aad88724 985 new->output = dst_discard_sk;
14e50e57 986
21efcfa0
ED
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
989 else
990 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
991 rt->rt6i_idev = ort->rt6i_idev;
992 if (rt->rt6i_idev)
993 in6_dev_hold(rt->rt6i_idev);
14e50e57 994
4e3fd7a0 995 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 996 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
997 rt->rt6i_metric = 0;
998
999 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1000#ifdef CONFIG_IPV6_SUBTREES
1001 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1002#endif
1003
1004 dst_free(new);
1005 }
1006
69ead7af
DM
1007 dst_release(dst_orig);
1008 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1009}
14e50e57 1010
1da177e4
LT
1011/*
1012 * Destination cache support functions
1013 */
1014
4b32b5ad
MKL
1015static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1016{
1017 if (rt->dst.from &&
1018 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1019 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1020}
1021
1da177e4
LT
1022static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1023{
1024 struct rt6_info *rt;
1025
1026 rt = (struct rt6_info *) dst;
1027
6f3118b5
ND
1028 /* All IPV6 dsts are created with ->obsolete set to the value
1029 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1030 * into this function always.
1031 */
e3bc10bd
HFS
1032 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1033 return NULL;
a4477c4d 1034
e3bc10bd
HFS
1035 if (rt6_check_expired(rt))
1036 return NULL;
1037
4b32b5ad
MKL
1038 rt6_dst_from_metrics_check(rt);
1039
e3bc10bd 1040 return dst;
1da177e4
LT
1041}
1042
1043static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1044{
1045 struct rt6_info *rt = (struct rt6_info *) dst;
1046
1047 if (rt) {
54c1a859
YH
1048 if (rt->rt6i_flags & RTF_CACHE) {
1049 if (rt6_check_expired(rt)) {
1050 ip6_del_rt(rt);
1051 dst = NULL;
1052 }
1053 } else {
1da177e4 1054 dst_release(dst);
54c1a859
YH
1055 dst = NULL;
1056 }
1da177e4 1057 }
54c1a859 1058 return dst;
1da177e4
LT
1059}
1060
1061static void ip6_link_failure(struct sk_buff *skb)
1062{
1063 struct rt6_info *rt;
1064
3ffe533c 1065 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1066
adf30907 1067 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1068 if (rt) {
1eb4f758
HFS
1069 if (rt->rt6i_flags & RTF_CACHE) {
1070 dst_hold(&rt->dst);
1071 if (ip6_del_rt(rt))
1072 dst_free(&rt->dst);
1073 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1074 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1075 }
1da177e4
LT
1076 }
1077}
1078
45e4fd26
MKL
1079static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1080{
1081 struct net *net = dev_net(rt->dst.dev);
1082
1083 rt->rt6i_flags |= RTF_MODIFIED;
1084 rt->rt6i_pmtu = mtu;
1085 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1086}
1087
1088static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1089 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1090{
67ba4152 1091 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1092
45e4fd26
MKL
1093 if (rt6->rt6i_flags & RTF_LOCAL)
1094 return;
81aded24 1095
45e4fd26
MKL
1096 dst_confirm(dst);
1097 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1098 if (mtu >= dst_mtu(dst))
1099 return;
9d289715 1100
45e4fd26
MKL
1101 if (rt6->rt6i_flags & RTF_CACHE) {
1102 rt6_do_update_pmtu(rt6, mtu);
1103 } else {
1104 const struct in6_addr *daddr, *saddr;
1105 struct rt6_info *nrt6;
1106
1107 if (iph) {
1108 daddr = &iph->daddr;
1109 saddr = &iph->saddr;
1110 } else if (sk) {
1111 daddr = &sk->sk_v6_daddr;
1112 saddr = &inet6_sk(sk)->saddr;
1113 } else {
1114 return;
1115 }
1116 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1117 if (nrt6) {
1118 rt6_do_update_pmtu(nrt6, mtu);
1119
1120 /* ip6_ins_rt(nrt6) will bump the
1121 * rt6->rt6i_node->fn_sernum
1122 * which will fail the next rt6_check() and
1123 * invalidate the sk->sk_dst_cache.
1124 */
1125 ip6_ins_rt(nrt6);
1126 }
1da177e4
LT
1127 }
1128}
1129
45e4fd26
MKL
1130static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1131 struct sk_buff *skb, u32 mtu)
1132{
1133 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1134}
1135
42ae66c8
DM
1136void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1137 int oif, u32 mark)
81aded24
DM
1138{
1139 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1140 struct dst_entry *dst;
1141 struct flowi6 fl6;
1142
1143 memset(&fl6, 0, sizeof(fl6));
1144 fl6.flowi6_oif = oif;
1b3c61dc 1145 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1146 fl6.daddr = iph->daddr;
1147 fl6.saddr = iph->saddr;
6502ca52 1148 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1149
1150 dst = ip6_route_output(net, NULL, &fl6);
1151 if (!dst->error)
45e4fd26 1152 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1153 dst_release(dst);
1154}
1155EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1156
1157void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1158{
1159 ip6_update_pmtu(skb, sock_net(sk), mtu,
1160 sk->sk_bound_dev_if, sk->sk_mark);
1161}
1162EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1163
b55b76b2
DJ
1164/* Handle redirects */
1165struct ip6rd_flowi {
1166 struct flowi6 fl6;
1167 struct in6_addr gateway;
1168};
1169
1170static struct rt6_info *__ip6_route_redirect(struct net *net,
1171 struct fib6_table *table,
1172 struct flowi6 *fl6,
1173 int flags)
1174{
1175 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1176 struct rt6_info *rt;
1177 struct fib6_node *fn;
1178
1179 /* Get the "current" route for this destination and
1180 * check if the redirect has come from approriate router.
1181 *
1182 * RFC 4861 specifies that redirects should only be
1183 * accepted if they come from the nexthop to the target.
1184 * Due to the way the routes are chosen, this notion
1185 * is a bit fuzzy and one might need to check all possible
1186 * routes.
1187 */
1188
1189 read_lock_bh(&table->tb6_lock);
1190 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1191restart:
1192 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1193 if (rt6_check_expired(rt))
1194 continue;
1195 if (rt->dst.error)
1196 break;
1197 if (!(rt->rt6i_flags & RTF_GATEWAY))
1198 continue;
1199 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1200 continue;
1201 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1202 continue;
1203 break;
1204 }
1205
1206 if (!rt)
1207 rt = net->ipv6.ip6_null_entry;
1208 else if (rt->dst.error) {
1209 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1210 goto out;
1211 }
1212
1213 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1214 fn = fib6_backtrack(fn, &fl6->saddr);
1215 if (fn)
1216 goto restart;
b55b76b2 1217 }
a3c00e46 1218
b0a1ba59 1219out:
b55b76b2
DJ
1220 dst_hold(&rt->dst);
1221
1222 read_unlock_bh(&table->tb6_lock);
1223
1224 return rt;
1225};
1226
1227static struct dst_entry *ip6_route_redirect(struct net *net,
1228 const struct flowi6 *fl6,
1229 const struct in6_addr *gateway)
1230{
1231 int flags = RT6_LOOKUP_F_HAS_SADDR;
1232 struct ip6rd_flowi rdfl;
1233
1234 rdfl.fl6 = *fl6;
1235 rdfl.gateway = *gateway;
1236
1237 return fib6_rule_lookup(net, &rdfl.fl6,
1238 flags, __ip6_route_redirect);
1239}
1240
3a5ad2ee
DM
1241void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1242{
1243 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1244 struct dst_entry *dst;
1245 struct flowi6 fl6;
1246
1247 memset(&fl6, 0, sizeof(fl6));
e374c618 1248 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1249 fl6.flowi6_oif = oif;
1250 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1251 fl6.daddr = iph->daddr;
1252 fl6.saddr = iph->saddr;
6502ca52 1253 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1254
b55b76b2
DJ
1255 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1256 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1257 dst_release(dst);
1258}
1259EXPORT_SYMBOL_GPL(ip6_redirect);
1260
c92a59ec
DJ
1261void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1262 u32 mark)
1263{
1264 const struct ipv6hdr *iph = ipv6_hdr(skb);
1265 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1266 struct dst_entry *dst;
1267 struct flowi6 fl6;
1268
1269 memset(&fl6, 0, sizeof(fl6));
e374c618 1270 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1271 fl6.flowi6_oif = oif;
1272 fl6.flowi6_mark = mark;
c92a59ec
DJ
1273 fl6.daddr = msg->dest;
1274 fl6.saddr = iph->daddr;
1275
b55b76b2
DJ
1276 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1277 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1278 dst_release(dst);
1279}
1280
3a5ad2ee
DM
1281void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1282{
1283 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1284}
1285EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1286
0dbaee3b 1287static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1288{
0dbaee3b
DM
1289 struct net_device *dev = dst->dev;
1290 unsigned int mtu = dst_mtu(dst);
1291 struct net *net = dev_net(dev);
1292
1da177e4
LT
1293 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1294
5578689a
DL
1295 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1296 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1297
1298 /*
1ab1457c
YH
1299 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1300 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1301 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1302 * rely only on pmtu discovery"
1303 */
1304 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1305 mtu = IPV6_MAXPLEN;
1306 return mtu;
1307}
1308
ebb762f2 1309static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1310{
4b32b5ad
MKL
1311 const struct rt6_info *rt = (const struct rt6_info *)dst;
1312 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1313 struct inet6_dev *idev;
618f9bc7 1314
4b32b5ad
MKL
1315 if (mtu)
1316 goto out;
1317
1318 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1319 if (mtu)
30f78d8e 1320 goto out;
618f9bc7
SK
1321
1322 mtu = IPV6_MIN_MTU;
d33e4553
DM
1323
1324 rcu_read_lock();
1325 idev = __in6_dev_get(dst->dev);
1326 if (idev)
1327 mtu = idev->cnf.mtu6;
1328 rcu_read_unlock();
1329
30f78d8e
ED
1330out:
1331 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1332}
1333
3b00944c
YH
1334static struct dst_entry *icmp6_dst_gc_list;
1335static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1336
3b00944c 1337struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1338 struct flowi6 *fl6)
1da177e4 1339{
87a11578 1340 struct dst_entry *dst;
1da177e4
LT
1341 struct rt6_info *rt;
1342 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1343 struct net *net = dev_net(dev);
1da177e4 1344
38308473 1345 if (unlikely(!idev))
122bdf67 1346 return ERR_PTR(-ENODEV);
1da177e4 1347
8b96d22d 1348 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1349 if (unlikely(!rt)) {
1da177e4 1350 in6_dev_put(idev);
87a11578 1351 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1352 goto out;
1353 }
1354
8e2ec639
YZ
1355 rt->dst.flags |= DST_HOST;
1356 rt->dst.output = ip6_output;
d8d1f30b 1357 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1358 rt->rt6i_gateway = fl6->daddr;
87a11578 1359 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1360 rt->rt6i_dst.plen = 128;
1361 rt->rt6i_idev = idev;
14edd87d 1362 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1363
3b00944c 1364 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1365 rt->dst.next = icmp6_dst_gc_list;
1366 icmp6_dst_gc_list = &rt->dst;
3b00944c 1367 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1368
5578689a 1369 fib6_force_start_gc(net);
1da177e4 1370
87a11578
DM
1371 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1372
1da177e4 1373out:
87a11578 1374 return dst;
1da177e4
LT
1375}
1376
3d0f24a7 1377int icmp6_dst_gc(void)
1da177e4 1378{
e9476e95 1379 struct dst_entry *dst, **pprev;
3d0f24a7 1380 int more = 0;
1da177e4 1381
3b00944c
YH
1382 spin_lock_bh(&icmp6_dst_lock);
1383 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1384
1da177e4
LT
1385 while ((dst = *pprev) != NULL) {
1386 if (!atomic_read(&dst->__refcnt)) {
1387 *pprev = dst->next;
1388 dst_free(dst);
1da177e4
LT
1389 } else {
1390 pprev = &dst->next;
3d0f24a7 1391 ++more;
1da177e4
LT
1392 }
1393 }
1394
3b00944c 1395 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1396
3d0f24a7 1397 return more;
1da177e4
LT
1398}
1399
1e493d19
DM
1400static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1401 void *arg)
1402{
1403 struct dst_entry *dst, **pprev;
1404
1405 spin_lock_bh(&icmp6_dst_lock);
1406 pprev = &icmp6_dst_gc_list;
1407 while ((dst = *pprev) != NULL) {
1408 struct rt6_info *rt = (struct rt6_info *) dst;
1409 if (func(rt, arg)) {
1410 *pprev = dst->next;
1411 dst_free(dst);
1412 } else {
1413 pprev = &dst->next;
1414 }
1415 }
1416 spin_unlock_bh(&icmp6_dst_lock);
1417}
1418
569d3645 1419static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1420{
86393e52 1421 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1422 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1423 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1424 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1425 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1426 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1427 int entries;
7019b78e 1428
fc66f95c 1429 entries = dst_entries_get_fast(ops);
49a18d86 1430 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1431 entries <= rt_max_size)
1da177e4
LT
1432 goto out;
1433
6891a346 1434 net->ipv6.ip6_rt_gc_expire++;
14956643 1435 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1436 entries = dst_entries_get_slow(ops);
1437 if (entries < ops->gc_thresh)
7019b78e 1438 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1439out:
7019b78e 1440 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1441 return entries > rt_max_size;
1da177e4
LT
1442}
1443
e715b6d3
FW
1444static int ip6_convert_metrics(struct mx6_config *mxc,
1445 const struct fib6_config *cfg)
1446{
1447 struct nlattr *nla;
1448 int remaining;
1449 u32 *mp;
1450
63159f29 1451 if (!cfg->fc_mx)
e715b6d3
FW
1452 return 0;
1453
1454 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1455 if (unlikely(!mp))
1456 return -ENOMEM;
1457
1458 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1459 int type = nla_type(nla);
1460
1461 if (type) {
ea697639
DB
1462 u32 val;
1463
e715b6d3
FW
1464 if (unlikely(type > RTAX_MAX))
1465 goto err;
ea697639
DB
1466 if (type == RTAX_CC_ALGO) {
1467 char tmp[TCP_CA_NAME_MAX];
1468
1469 nla_strlcpy(tmp, nla, sizeof(tmp));
1470 val = tcp_ca_get_key_by_name(tmp);
1471 if (val == TCP_CA_UNSPEC)
1472 goto err;
1473 } else {
1474 val = nla_get_u32(nla);
1475 }
e715b6d3 1476
ea697639 1477 mp[type - 1] = val;
e715b6d3
FW
1478 __set_bit(type - 1, mxc->mx_valid);
1479 }
1480 }
1481
1482 mxc->mx = mp;
1483
1484 return 0;
1485 err:
1486 kfree(mp);
1487 return -EINVAL;
1488}
1da177e4 1489
86872cb5 1490int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1491{
1492 int err;
5578689a 1493 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1494 struct rt6_info *rt = NULL;
1495 struct net_device *dev = NULL;
1496 struct inet6_dev *idev = NULL;
c71099ac 1497 struct fib6_table *table;
e715b6d3 1498 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1499 int addr_type;
1500
86872cb5 1501 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1502 return -EINVAL;
1503#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1504 if (cfg->fc_src_len)
1da177e4
LT
1505 return -EINVAL;
1506#endif
86872cb5 1507 if (cfg->fc_ifindex) {
1da177e4 1508 err = -ENODEV;
5578689a 1509 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1510 if (!dev)
1511 goto out;
1512 idev = in6_dev_get(dev);
1513 if (!idev)
1514 goto out;
1515 }
1516
86872cb5
TG
1517 if (cfg->fc_metric == 0)
1518 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1519
d71314b4 1520 err = -ENOBUFS;
38308473
DM
1521 if (cfg->fc_nlinfo.nlh &&
1522 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1523 table = fib6_get_table(net, cfg->fc_table);
38308473 1524 if (!table) {
f3213831 1525 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1526 table = fib6_new_table(net, cfg->fc_table);
1527 }
1528 } else {
1529 table = fib6_new_table(net, cfg->fc_table);
1530 }
38308473
DM
1531
1532 if (!table)
c71099ac 1533 goto out;
c71099ac 1534
c88507fb 1535 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1da177e4 1536
38308473 1537 if (!rt) {
1da177e4
LT
1538 err = -ENOMEM;
1539 goto out;
1540 }
1541
1716a961
G
1542 if (cfg->fc_flags & RTF_EXPIRES)
1543 rt6_set_expires(rt, jiffies +
1544 clock_t_to_jiffies(cfg->fc_expires));
1545 else
1546 rt6_clean_expires(rt);
1da177e4 1547
86872cb5
TG
1548 if (cfg->fc_protocol == RTPROT_UNSPEC)
1549 cfg->fc_protocol = RTPROT_BOOT;
1550 rt->rt6i_protocol = cfg->fc_protocol;
1551
1552 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1553
1554 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1555 rt->dst.input = ip6_mc_input;
ab79ad14
1556 else if (cfg->fc_flags & RTF_LOCAL)
1557 rt->dst.input = ip6_input;
1da177e4 1558 else
d8d1f30b 1559 rt->dst.input = ip6_forward;
1da177e4 1560
d8d1f30b 1561 rt->dst.output = ip6_output;
1da177e4 1562
86872cb5
TG
1563 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1564 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1565 if (rt->rt6i_dst.plen == 128)
e5fd387a 1566 rt->dst.flags |= DST_HOST;
e5fd387a 1567
1da177e4 1568#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1569 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1570 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1571#endif
1572
86872cb5 1573 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1574
1575 /* We cannot add true routes via loopback here,
1576 they would result in kernel looping; promote them to reject routes
1577 */
86872cb5 1578 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1579 (dev && (dev->flags & IFF_LOOPBACK) &&
1580 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1581 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1582 /* hold loopback dev/idev if we haven't done so. */
5578689a 1583 if (dev != net->loopback_dev) {
1da177e4
LT
1584 if (dev) {
1585 dev_put(dev);
1586 in6_dev_put(idev);
1587 }
5578689a 1588 dev = net->loopback_dev;
1da177e4
LT
1589 dev_hold(dev);
1590 idev = in6_dev_get(dev);
1591 if (!idev) {
1592 err = -ENODEV;
1593 goto out;
1594 }
1595 }
1da177e4 1596 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1597 switch (cfg->fc_type) {
1598 case RTN_BLACKHOLE:
1599 rt->dst.error = -EINVAL;
aad88724 1600 rt->dst.output = dst_discard_sk;
7150aede 1601 rt->dst.input = dst_discard;
ef2c7d7b
ND
1602 break;
1603 case RTN_PROHIBIT:
1604 rt->dst.error = -EACCES;
7150aede
K
1605 rt->dst.output = ip6_pkt_prohibit_out;
1606 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1607 break;
b4949ab2 1608 case RTN_THROW:
ef2c7d7b 1609 default:
7150aede
K
1610 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1611 : -ENETUNREACH;
1612 rt->dst.output = ip6_pkt_discard_out;
1613 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1614 break;
1615 }
1da177e4
LT
1616 goto install_route;
1617 }
1618
86872cb5 1619 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1620 const struct in6_addr *gw_addr;
1da177e4
LT
1621 int gwa_type;
1622
86872cb5 1623 gw_addr = &cfg->fc_gateway;
48ed7b26
FW
1624
1625 /* if gw_addr is local we will fail to detect this in case
1626 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1627 * will return already-added prefix route via interface that
1628 * prefix route was assigned to, which might be non-loopback.
1629 */
1630 err = -EINVAL;
1631 if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0))
1632 goto out;
1633
4e3fd7a0 1634 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1635 gwa_type = ipv6_addr_type(gw_addr);
1636
1637 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1638 struct rt6_info *grt;
1639
1640 /* IPv6 strictly inhibits using not link-local
1641 addresses as nexthop address.
1642 Otherwise, router will not able to send redirects.
1643 It is very good, but in some (rare!) circumstances
1644 (SIT, PtP, NBMA NOARP links) it is handy to allow
1645 some exceptions. --ANK
1646 */
38308473 1647 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1648 goto out;
1649
5578689a 1650 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1651
1652 err = -EHOSTUNREACH;
38308473 1653 if (!grt)
1da177e4
LT
1654 goto out;
1655 if (dev) {
d1918542 1656 if (dev != grt->dst.dev) {
94e187c0 1657 ip6_rt_put(grt);
1da177e4
LT
1658 goto out;
1659 }
1660 } else {
d1918542 1661 dev = grt->dst.dev;
1da177e4
LT
1662 idev = grt->rt6i_idev;
1663 dev_hold(dev);
1664 in6_dev_hold(grt->rt6i_idev);
1665 }
38308473 1666 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1667 err = 0;
94e187c0 1668 ip6_rt_put(grt);
1da177e4
LT
1669
1670 if (err)
1671 goto out;
1672 }
1673 err = -EINVAL;
38308473 1674 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1675 goto out;
1676 }
1677
1678 err = -ENODEV;
38308473 1679 if (!dev)
1da177e4
LT
1680 goto out;
1681
c3968a85
DW
1682 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1683 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1684 err = -EINVAL;
1685 goto out;
1686 }
4e3fd7a0 1687 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1688 rt->rt6i_prefsrc.plen = 128;
1689 } else
1690 rt->rt6i_prefsrc.plen = 0;
1691
86872cb5 1692 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1693
1694install_route:
d8d1f30b 1695 rt->dst.dev = dev;
1da177e4 1696 rt->rt6i_idev = idev;
c71099ac 1697 rt->rt6i_table = table;
63152fc0 1698
c346dca1 1699 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1700
e715b6d3
FW
1701 err = ip6_convert_metrics(&mxc, cfg);
1702 if (err)
1703 goto out;
1da177e4 1704
e715b6d3
FW
1705 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1706
1707 kfree(mxc.mx);
1708 return err;
1da177e4
LT
1709out:
1710 if (dev)
1711 dev_put(dev);
1712 if (idev)
1713 in6_dev_put(idev);
1714 if (rt)
d8d1f30b 1715 dst_free(&rt->dst);
1da177e4
LT
1716 return err;
1717}
1718
86872cb5 1719static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1720{
1721 int err;
c71099ac 1722 struct fib6_table *table;
d1918542 1723 struct net *net = dev_net(rt->dst.dev);
1da177e4 1724
6825a26c
G
1725 if (rt == net->ipv6.ip6_null_entry) {
1726 err = -ENOENT;
1727 goto out;
1728 }
6c813a72 1729
c71099ac
TG
1730 table = rt->rt6i_table;
1731 write_lock_bh(&table->tb6_lock);
86872cb5 1732 err = fib6_del(rt, info);
c71099ac 1733 write_unlock_bh(&table->tb6_lock);
1da177e4 1734
6825a26c 1735out:
94e187c0 1736 ip6_rt_put(rt);
1da177e4
LT
1737 return err;
1738}
1739
e0a1ad73
TG
1740int ip6_del_rt(struct rt6_info *rt)
1741{
4d1169c1 1742 struct nl_info info = {
d1918542 1743 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1744 };
528c4ceb 1745 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1746}
1747
86872cb5 1748static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1749{
c71099ac 1750 struct fib6_table *table;
1da177e4
LT
1751 struct fib6_node *fn;
1752 struct rt6_info *rt;
1753 int err = -ESRCH;
1754
5578689a 1755 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1756 if (!table)
c71099ac
TG
1757 return err;
1758
1759 read_lock_bh(&table->tb6_lock);
1da177e4 1760
c71099ac 1761 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1762 &cfg->fc_dst, cfg->fc_dst_len,
1763 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1764
1da177e4 1765 if (fn) {
d8d1f30b 1766 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
1767 if ((rt->rt6i_flags & RTF_CACHE) &&
1768 !(cfg->fc_flags & RTF_CACHE))
1769 continue;
86872cb5 1770 if (cfg->fc_ifindex &&
d1918542
DM
1771 (!rt->dst.dev ||
1772 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1773 continue;
86872cb5
TG
1774 if (cfg->fc_flags & RTF_GATEWAY &&
1775 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1776 continue;
86872cb5 1777 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1778 continue;
d8d1f30b 1779 dst_hold(&rt->dst);
c71099ac 1780 read_unlock_bh(&table->tb6_lock);
1da177e4 1781
86872cb5 1782 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1783 }
1784 }
c71099ac 1785 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1786
1787 return err;
1788}
1789
6700c270 1790static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1791{
e8599ff4 1792 struct net *net = dev_net(skb->dev);
a6279458 1793 struct netevent_redirect netevent;
e8599ff4 1794 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
1795 struct ndisc_options ndopts;
1796 struct inet6_dev *in6_dev;
1797 struct neighbour *neigh;
71bcdba0 1798 struct rd_msg *msg;
6e157b6a
DM
1799 int optlen, on_link;
1800 u8 *lladdr;
e8599ff4 1801
29a3cad5 1802 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 1803 optlen -= sizeof(*msg);
e8599ff4
DM
1804
1805 if (optlen < 0) {
6e157b6a 1806 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1807 return;
1808 }
1809
71bcdba0 1810 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 1811
71bcdba0 1812 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 1813 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1814 return;
1815 }
1816
6e157b6a 1817 on_link = 0;
71bcdba0 1818 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 1819 on_link = 1;
71bcdba0 1820 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 1821 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1822 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1823 return;
1824 }
1825
1826 in6_dev = __in6_dev_get(skb->dev);
1827 if (!in6_dev)
1828 return;
1829 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1830 return;
1831
1832 /* RFC2461 8.1:
1833 * The IP source address of the Redirect MUST be the same as the current
1834 * first-hop router for the specified ICMP Destination Address.
1835 */
1836
71bcdba0 1837 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
1838 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1839 return;
1840 }
6e157b6a
DM
1841
1842 lladdr = NULL;
e8599ff4
DM
1843 if (ndopts.nd_opts_tgt_lladdr) {
1844 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1845 skb->dev);
1846 if (!lladdr) {
1847 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1848 return;
1849 }
1850 }
1851
6e157b6a
DM
1852 rt = (struct rt6_info *) dst;
1853 if (rt == net->ipv6.ip6_null_entry) {
1854 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1855 return;
6e157b6a 1856 }
e8599ff4 1857
6e157b6a
DM
1858 /* Redirect received -> path was valid.
1859 * Look, redirects are sent only in response to data packets,
1860 * so that this nexthop apparently is reachable. --ANK
1861 */
1862 dst_confirm(&rt->dst);
a6279458 1863
71bcdba0 1864 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
1865 if (!neigh)
1866 return;
a6279458 1867
1da177e4
LT
1868 /*
1869 * We have finally decided to accept it.
1870 */
1871
1ab1457c 1872 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1873 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1874 NEIGH_UPDATE_F_OVERRIDE|
1875 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1876 NEIGH_UPDATE_F_ISROUTER))
1877 );
1878
71bcdba0 1879 nrt = ip6_rt_copy(rt, &msg->dest);
38308473 1880 if (!nrt)
1da177e4
LT
1881 goto out;
1882
1883 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1884 if (on_link)
1885 nrt->rt6i_flags &= ~RTF_GATEWAY;
1886
4e3fd7a0 1887 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 1888
40e22e8f 1889 if (ip6_ins_rt(nrt))
1da177e4
LT
1890 goto out;
1891
d8d1f30b
CG
1892 netevent.old = &rt->dst;
1893 netevent.new = &nrt->dst;
71bcdba0 1894 netevent.daddr = &msg->dest;
60592833 1895 netevent.neigh = neigh;
8d71740c
TT
1896 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1897
38308473 1898 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1899 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1900 ip6_del_rt(rt);
1da177e4
LT
1901 }
1902
1903out:
e8599ff4 1904 neigh_release(neigh);
6e157b6a
DM
1905}
1906
1da177e4
LT
1907/*
1908 * Misc support functions
1909 */
1910
4b32b5ad
MKL
1911static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
1912{
1913 BUG_ON(from->dst.from);
1914
1915 rt->rt6i_flags &= ~RTF_EXPIRES;
1916 dst_hold(&from->dst);
1917 rt->dst.from = &from->dst;
1918 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
1919}
1920
1716a961 1921static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1922 const struct in6_addr *dest)
1da177e4 1923{
d1918542 1924 struct net *net = dev_net(ort->dst.dev);
4b32b5ad
MKL
1925 struct rt6_info *rt;
1926
1927 if (ort->rt6i_flags & RTF_CACHE)
1928 ort = (struct rt6_info *)ort->dst.from;
1929
1930 rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1931 ort->rt6i_table);
1da177e4
LT
1932
1933 if (rt) {
d8d1f30b
CG
1934 rt->dst.input = ort->dst.input;
1935 rt->dst.output = ort->dst.output;
8e2ec639 1936 rt->dst.flags |= DST_HOST;
d8d1f30b 1937
4e3fd7a0 1938 rt->rt6i_dst.addr = *dest;
8e2ec639 1939 rt->rt6i_dst.plen = 128;
d8d1f30b 1940 rt->dst.error = ort->dst.error;
1da177e4
LT
1941 rt->rt6i_idev = ort->rt6i_idev;
1942 if (rt->rt6i_idev)
1943 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1944 rt->dst.lastuse = jiffies;
2647a9b0 1945 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1946 rt->rt6i_flags = ort->rt6i_flags;
24f5b855 1947 rt6_set_from(rt, ort);
1da177e4
LT
1948 rt->rt6i_metric = 0;
1949
1da177e4
LT
1950#ifdef CONFIG_IPV6_SUBTREES
1951 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1952#endif
0f6c6392 1953 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1954 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1955 }
1956 return rt;
1957}
1958
70ceb4f5 1959#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1960static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1961 const struct in6_addr *prefix, int prefixlen,
1962 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1963{
1964 struct fib6_node *fn;
1965 struct rt6_info *rt = NULL;
c71099ac
TG
1966 struct fib6_table *table;
1967
efa2cea0 1968 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1969 if (!table)
c71099ac 1970 return NULL;
70ceb4f5 1971
5744dd9b 1972 read_lock_bh(&table->tb6_lock);
67ba4152 1973 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
1974 if (!fn)
1975 goto out;
1976
d8d1f30b 1977 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 1978 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
1979 continue;
1980 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1981 continue;
1982 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1983 continue;
d8d1f30b 1984 dst_hold(&rt->dst);
70ceb4f5
YH
1985 break;
1986 }
1987out:
5744dd9b 1988 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
1989 return rt;
1990}
1991
efa2cea0 1992static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
1993 const struct in6_addr *prefix, int prefixlen,
1994 const struct in6_addr *gwaddr, int ifindex,
95c96174 1995 unsigned int pref)
70ceb4f5 1996{
86872cb5
TG
1997 struct fib6_config cfg = {
1998 .fc_table = RT6_TABLE_INFO,
238fc7ea 1999 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2000 .fc_ifindex = ifindex,
2001 .fc_dst_len = prefixlen,
2002 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2003 RTF_UP | RTF_PREF(pref),
15e47304 2004 .fc_nlinfo.portid = 0,
efa2cea0
DL
2005 .fc_nlinfo.nlh = NULL,
2006 .fc_nlinfo.nl_net = net,
86872cb5
TG
2007 };
2008
4e3fd7a0
AD
2009 cfg.fc_dst = *prefix;
2010 cfg.fc_gateway = *gwaddr;
70ceb4f5 2011
e317da96
YH
2012 /* We should treat it as a default route if prefix length is 0. */
2013 if (!prefixlen)
86872cb5 2014 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2015
86872cb5 2016 ip6_route_add(&cfg);
70ceb4f5 2017
efa2cea0 2018 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2019}
2020#endif
2021
b71d1d42 2022struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2023{
1da177e4 2024 struct rt6_info *rt;
c71099ac 2025 struct fib6_table *table;
1da177e4 2026
c346dca1 2027 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2028 if (!table)
c71099ac 2029 return NULL;
1da177e4 2030
5744dd9b 2031 read_lock_bh(&table->tb6_lock);
67ba4152 2032 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2033 if (dev == rt->dst.dev &&
045927ff 2034 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2035 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2036 break;
2037 }
2038 if (rt)
d8d1f30b 2039 dst_hold(&rt->dst);
5744dd9b 2040 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2041 return rt;
2042}
2043
b71d1d42 2044struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2045 struct net_device *dev,
2046 unsigned int pref)
1da177e4 2047{
86872cb5
TG
2048 struct fib6_config cfg = {
2049 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2050 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2051 .fc_ifindex = dev->ifindex,
2052 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2053 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2054 .fc_nlinfo.portid = 0,
5578689a 2055 .fc_nlinfo.nlh = NULL,
c346dca1 2056 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2057 };
1da177e4 2058
4e3fd7a0 2059 cfg.fc_gateway = *gwaddr;
1da177e4 2060
86872cb5 2061 ip6_route_add(&cfg);
1da177e4 2062
1da177e4
LT
2063 return rt6_get_dflt_router(gwaddr, dev);
2064}
2065
7b4da532 2066void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2067{
2068 struct rt6_info *rt;
c71099ac
TG
2069 struct fib6_table *table;
2070
2071 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2072 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2073 if (!table)
c71099ac 2074 return;
1da177e4
LT
2075
2076restart:
c71099ac 2077 read_lock_bh(&table->tb6_lock);
d8d1f30b 2078 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2079 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2080 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2081 dst_hold(&rt->dst);
c71099ac 2082 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2083 ip6_del_rt(rt);
1da177e4
LT
2084 goto restart;
2085 }
2086 }
c71099ac 2087 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2088}
2089
5578689a
DL
2090static void rtmsg_to_fib6_config(struct net *net,
2091 struct in6_rtmsg *rtmsg,
86872cb5
TG
2092 struct fib6_config *cfg)
2093{
2094 memset(cfg, 0, sizeof(*cfg));
2095
2096 cfg->fc_table = RT6_TABLE_MAIN;
2097 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2098 cfg->fc_metric = rtmsg->rtmsg_metric;
2099 cfg->fc_expires = rtmsg->rtmsg_info;
2100 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2101 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2102 cfg->fc_flags = rtmsg->rtmsg_flags;
2103
5578689a 2104 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2105
4e3fd7a0
AD
2106 cfg->fc_dst = rtmsg->rtmsg_dst;
2107 cfg->fc_src = rtmsg->rtmsg_src;
2108 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2109}
2110
5578689a 2111int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2112{
86872cb5 2113 struct fib6_config cfg;
1da177e4
LT
2114 struct in6_rtmsg rtmsg;
2115 int err;
2116
67ba4152 2117 switch (cmd) {
1da177e4
LT
2118 case SIOCADDRT: /* Add a route */
2119 case SIOCDELRT: /* Delete a route */
af31f412 2120 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2121 return -EPERM;
2122 err = copy_from_user(&rtmsg, arg,
2123 sizeof(struct in6_rtmsg));
2124 if (err)
2125 return -EFAULT;
86872cb5 2126
5578689a 2127 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2128
1da177e4
LT
2129 rtnl_lock();
2130 switch (cmd) {
2131 case SIOCADDRT:
86872cb5 2132 err = ip6_route_add(&cfg);
1da177e4
LT
2133 break;
2134 case SIOCDELRT:
86872cb5 2135 err = ip6_route_del(&cfg);
1da177e4
LT
2136 break;
2137 default:
2138 err = -EINVAL;
2139 }
2140 rtnl_unlock();
2141
2142 return err;
3ff50b79 2143 }
1da177e4
LT
2144
2145 return -EINVAL;
2146}
2147
2148/*
2149 * Drop the packet on the floor
2150 */
2151
d5fdd6ba 2152static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2153{
612f09e8 2154 int type;
adf30907 2155 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2156 switch (ipstats_mib_noroutes) {
2157 case IPSTATS_MIB_INNOROUTES:
0660e03f 2158 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2159 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2160 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2161 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2162 break;
2163 }
2164 /* FALLTHROUGH */
2165 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2166 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2167 ipstats_mib_noroutes);
612f09e8
YH
2168 break;
2169 }
3ffe533c 2170 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2171 kfree_skb(skb);
2172 return 0;
2173}
2174
9ce8ade0
TG
2175static int ip6_pkt_discard(struct sk_buff *skb)
2176{
612f09e8 2177 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2178}
2179
aad88724 2180static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2181{
adf30907 2182 skb->dev = skb_dst(skb)->dev;
612f09e8 2183 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2184}
2185
9ce8ade0
TG
2186static int ip6_pkt_prohibit(struct sk_buff *skb)
2187{
612f09e8 2188 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2189}
2190
aad88724 2191static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2192{
adf30907 2193 skb->dev = skb_dst(skb)->dev;
612f09e8 2194 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2195}
2196
1da177e4
LT
2197/*
2198 * Allocate a dst for local (unicast / anycast) address.
2199 */
2200
2201struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2202 const struct in6_addr *addr,
8f031519 2203 bool anycast)
1da177e4 2204{
c346dca1 2205 struct net *net = dev_net(idev->dev);
a3300ef4
HFS
2206 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2207 DST_NOCOUNT, NULL);
2208 if (!rt)
1da177e4
LT
2209 return ERR_PTR(-ENOMEM);
2210
1da177e4
LT
2211 in6_dev_hold(idev);
2212
11d53b49 2213 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2214 rt->dst.input = ip6_input;
2215 rt->dst.output = ip6_output;
1da177e4 2216 rt->rt6i_idev = idev;
1da177e4
LT
2217
2218 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2219 if (anycast)
2220 rt->rt6i_flags |= RTF_ANYCAST;
2221 else
1da177e4 2222 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2223
550bab42 2224 rt->rt6i_gateway = *addr;
4e3fd7a0 2225 rt->rt6i_dst.addr = *addr;
1da177e4 2226 rt->rt6i_dst.plen = 128;
5578689a 2227 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2228
d8d1f30b 2229 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2230
2231 return rt;
2232}
2233
c3968a85
DW
2234int ip6_route_get_saddr(struct net *net,
2235 struct rt6_info *rt,
b71d1d42 2236 const struct in6_addr *daddr,
c3968a85
DW
2237 unsigned int prefs,
2238 struct in6_addr *saddr)
2239{
e16e888b
MS
2240 struct inet6_dev *idev =
2241 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2242 int err = 0;
e16e888b 2243 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2244 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2245 else
2246 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2247 daddr, prefs, saddr);
2248 return err;
2249}
2250
2251/* remove deleted ip from prefsrc entries */
2252struct arg_dev_net_ip {
2253 struct net_device *dev;
2254 struct net *net;
2255 struct in6_addr *addr;
2256};
2257
2258static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2259{
2260 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2261 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2262 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2263
d1918542 2264 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2265 rt != net->ipv6.ip6_null_entry &&
2266 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2267 /* remove prefsrc entry */
2268 rt->rt6i_prefsrc.plen = 0;
2269 }
2270 return 0;
2271}
2272
2273void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2274{
2275 struct net *net = dev_net(ifp->idev->dev);
2276 struct arg_dev_net_ip adni = {
2277 .dev = ifp->idev->dev,
2278 .net = net,
2279 .addr = &ifp->addr,
2280 };
0c3584d5 2281 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2282}
2283
be7a010d
DJ
2284#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2285#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2286
2287/* Remove routers and update dst entries when gateway turn into host. */
2288static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2289{
2290 struct in6_addr *gateway = (struct in6_addr *)arg;
2291
2292 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2293 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2294 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2295 return -1;
2296 }
2297 return 0;
2298}
2299
2300void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2301{
2302 fib6_clean_all(net, fib6_clean_tohost, gateway);
2303}
2304
8ed67789
DL
2305struct arg_dev_net {
2306 struct net_device *dev;
2307 struct net *net;
2308};
2309
1da177e4
LT
2310static int fib6_ifdown(struct rt6_info *rt, void *arg)
2311{
bc3ef660 2312 const struct arg_dev_net *adn = arg;
2313 const struct net_device *dev = adn->dev;
8ed67789 2314
d1918542 2315 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2316 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2317 return -1;
c159d30c 2318
1da177e4
LT
2319 return 0;
2320}
2321
f3db4851 2322void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2323{
8ed67789
DL
2324 struct arg_dev_net adn = {
2325 .dev = dev,
2326 .net = net,
2327 };
2328
0c3584d5 2329 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2330 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2331}
2332
95c96174 2333struct rt6_mtu_change_arg {
1da177e4 2334 struct net_device *dev;
95c96174 2335 unsigned int mtu;
1da177e4
LT
2336};
2337
2338static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2339{
2340 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2341 struct inet6_dev *idev;
2342
2343 /* In IPv6 pmtu discovery is not optional,
2344 so that RTAX_MTU lock cannot disable it.
2345 We still use this lock to block changes
2346 caused by addrconf/ndisc.
2347 */
2348
2349 idev = __in6_dev_get(arg->dev);
38308473 2350 if (!idev)
1da177e4
LT
2351 return 0;
2352
2353 /* For administrative MTU increase, there is no way to discover
2354 IPv6 PMTU increase, so PMTU increase should be updated here.
2355 Since RFC 1981 doesn't include administrative MTU increase
2356 update PMTU increase is a MUST. (i.e. jumbo frame)
2357 */
2358 /*
2359 If new MTU is less than route PMTU, this new MTU will be the
2360 lowest MTU in the path, update the route PMTU to reflect PMTU
2361 decreases; if new MTU is greater than route PMTU, and the
2362 old MTU is the lowest MTU in the path, update the route PMTU
2363 to reflect the increase. In this case if the other nodes' MTU
2364 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2365 PMTU discouvery.
2366 */
d1918542 2367 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2368 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2369 if (rt->rt6i_flags & RTF_CACHE) {
2370 /* For RTF_CACHE with rt6i_pmtu == 0
2371 * (i.e. a redirected route),
2372 * the metrics of its rt->dst.from has already
2373 * been updated.
2374 */
2375 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2376 rt->rt6i_pmtu = arg->mtu;
2377 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2378 (dst_mtu(&rt->dst) < arg->mtu &&
2379 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2380 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2381 }
566cfd8f 2382 }
1da177e4
LT
2383 return 0;
2384}
2385
95c96174 2386void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2387{
c71099ac
TG
2388 struct rt6_mtu_change_arg arg = {
2389 .dev = dev,
2390 .mtu = mtu,
2391 };
1da177e4 2392
0c3584d5 2393 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2394}
2395
ef7c79ed 2396static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2397 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2398 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2399 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2400 [RTA_PRIORITY] = { .type = NLA_U32 },
2401 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2402 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2403 [RTA_PREF] = { .type = NLA_U8 },
86872cb5
TG
2404};
2405
2406static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2407 struct fib6_config *cfg)
1da177e4 2408{
86872cb5
TG
2409 struct rtmsg *rtm;
2410 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2411 unsigned int pref;
86872cb5 2412 int err;
1da177e4 2413
86872cb5
TG
2414 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2415 if (err < 0)
2416 goto errout;
1da177e4 2417
86872cb5
TG
2418 err = -EINVAL;
2419 rtm = nlmsg_data(nlh);
2420 memset(cfg, 0, sizeof(*cfg));
2421
2422 cfg->fc_table = rtm->rtm_table;
2423 cfg->fc_dst_len = rtm->rtm_dst_len;
2424 cfg->fc_src_len = rtm->rtm_src_len;
2425 cfg->fc_flags = RTF_UP;
2426 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2427 cfg->fc_type = rtm->rtm_type;
86872cb5 2428
ef2c7d7b
ND
2429 if (rtm->rtm_type == RTN_UNREACHABLE ||
2430 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2431 rtm->rtm_type == RTN_PROHIBIT ||
2432 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2433 cfg->fc_flags |= RTF_REJECT;
2434
ab79ad14
2435 if (rtm->rtm_type == RTN_LOCAL)
2436 cfg->fc_flags |= RTF_LOCAL;
2437
1f56a01f
MKL
2438 if (rtm->rtm_flags & RTM_F_CLONED)
2439 cfg->fc_flags |= RTF_CACHE;
2440
15e47304 2441 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2442 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2443 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2444
2445 if (tb[RTA_GATEWAY]) {
67b61f6c 2446 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2447 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2448 }
86872cb5
TG
2449
2450 if (tb[RTA_DST]) {
2451 int plen = (rtm->rtm_dst_len + 7) >> 3;
2452
2453 if (nla_len(tb[RTA_DST]) < plen)
2454 goto errout;
2455
2456 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2457 }
86872cb5
TG
2458
2459 if (tb[RTA_SRC]) {
2460 int plen = (rtm->rtm_src_len + 7) >> 3;
2461
2462 if (nla_len(tb[RTA_SRC]) < plen)
2463 goto errout;
2464
2465 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2466 }
86872cb5 2467
c3968a85 2468 if (tb[RTA_PREFSRC])
67b61f6c 2469 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2470
86872cb5
TG
2471 if (tb[RTA_OIF])
2472 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2473
2474 if (tb[RTA_PRIORITY])
2475 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2476
2477 if (tb[RTA_METRICS]) {
2478 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2479 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2480 }
86872cb5
TG
2481
2482 if (tb[RTA_TABLE])
2483 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2484
51ebd318
ND
2485 if (tb[RTA_MULTIPATH]) {
2486 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2487 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2488 }
2489
c78ba6d6
LR
2490 if (tb[RTA_PREF]) {
2491 pref = nla_get_u8(tb[RTA_PREF]);
2492 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2493 pref != ICMPV6_ROUTER_PREF_HIGH)
2494 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2495 cfg->fc_flags |= RTF_PREF(pref);
2496 }
2497
86872cb5
TG
2498 err = 0;
2499errout:
2500 return err;
1da177e4
LT
2501}
2502
51ebd318
ND
2503static int ip6_route_multipath(struct fib6_config *cfg, int add)
2504{
2505 struct fib6_config r_cfg;
2506 struct rtnexthop *rtnh;
2507 int remaining;
2508 int attrlen;
2509 int err = 0, last_err = 0;
2510
35f1b4e9 2511 remaining = cfg->fc_mp_len;
51ebd318
ND
2512beginning:
2513 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318
ND
2514
2515 /* Parse a Multipath Entry */
2516 while (rtnh_ok(rtnh, remaining)) {
2517 memcpy(&r_cfg, cfg, sizeof(*cfg));
2518 if (rtnh->rtnh_ifindex)
2519 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2520
2521 attrlen = rtnh_attrlen(rtnh);
2522 if (attrlen > 0) {
2523 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2524
2525 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2526 if (nla) {
67b61f6c 2527 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2528 r_cfg.fc_flags |= RTF_GATEWAY;
2529 }
2530 }
2531 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2532 if (err) {
2533 last_err = err;
2534 /* If we are trying to remove a route, do not stop the
2535 * loop when ip6_route_del() fails (because next hop is
2536 * already gone), we should try to remove all next hops.
2537 */
2538 if (add) {
2539 /* If add fails, we should try to delete all
2540 * next hops that have been already added.
2541 */
2542 add = 0;
35f1b4e9 2543 remaining = cfg->fc_mp_len - remaining;
51ebd318
ND
2544 goto beginning;
2545 }
2546 }
1a72418b 2547 /* Because each route is added like a single route we remove
27596472
MK
2548 * these flags after the first nexthop: if there is a collision,
2549 * we have already failed to add the first nexthop:
2550 * fib6_add_rt2node() has rejected it; when replacing, old
2551 * nexthops have been replaced by first new, the rest should
2552 * be added to it.
1a72418b 2553 */
27596472
MK
2554 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2555 NLM_F_REPLACE);
51ebd318
ND
2556 rtnh = rtnh_next(rtnh, &remaining);
2557 }
2558
2559 return last_err;
2560}
2561
67ba4152 2562static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2563{
86872cb5
TG
2564 struct fib6_config cfg;
2565 int err;
1da177e4 2566
86872cb5
TG
2567 err = rtm_to_fib6_config(skb, nlh, &cfg);
2568 if (err < 0)
2569 return err;
2570
51ebd318
ND
2571 if (cfg.fc_mp)
2572 return ip6_route_multipath(&cfg, 0);
2573 else
2574 return ip6_route_del(&cfg);
1da177e4
LT
2575}
2576
67ba4152 2577static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2578{
86872cb5
TG
2579 struct fib6_config cfg;
2580 int err;
1da177e4 2581
86872cb5
TG
2582 err = rtm_to_fib6_config(skb, nlh, &cfg);
2583 if (err < 0)
2584 return err;
2585
51ebd318
ND
2586 if (cfg.fc_mp)
2587 return ip6_route_multipath(&cfg, 1);
2588 else
2589 return ip6_route_add(&cfg);
1da177e4
LT
2590}
2591
339bf98f
TG
2592static inline size_t rt6_nlmsg_size(void)
2593{
2594 return NLMSG_ALIGN(sizeof(struct rtmsg))
2595 + nla_total_size(16) /* RTA_SRC */
2596 + nla_total_size(16) /* RTA_DST */
2597 + nla_total_size(16) /* RTA_GATEWAY */
2598 + nla_total_size(16) /* RTA_PREFSRC */
2599 + nla_total_size(4) /* RTA_TABLE */
2600 + nla_total_size(4) /* RTA_IIF */
2601 + nla_total_size(4) /* RTA_OIF */
2602 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2603 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2604 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6
LR
2605 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2606 + nla_total_size(1); /* RTA_PREF */
339bf98f
TG
2607}
2608
191cd582
BH
2609static int rt6_fill_node(struct net *net,
2610 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2611 struct in6_addr *dst, struct in6_addr *src,
15e47304 2612 int iif, int type, u32 portid, u32 seq,
7bc570c8 2613 int prefix, int nowait, unsigned int flags)
1da177e4 2614{
4b32b5ad 2615 u32 metrics[RTAX_MAX];
1da177e4 2616 struct rtmsg *rtm;
2d7202bf 2617 struct nlmsghdr *nlh;
e3703b3d 2618 long expires;
9e762a4a 2619 u32 table;
1da177e4
LT
2620
2621 if (prefix) { /* user wants prefix routes only */
2622 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2623 /* success since this is not a prefix route */
2624 return 1;
2625 }
2626 }
2627
15e47304 2628 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2629 if (!nlh)
26932566 2630 return -EMSGSIZE;
2d7202bf
TG
2631
2632 rtm = nlmsg_data(nlh);
1da177e4
LT
2633 rtm->rtm_family = AF_INET6;
2634 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2635 rtm->rtm_src_len = rt->rt6i_src.plen;
2636 rtm->rtm_tos = 0;
c71099ac 2637 if (rt->rt6i_table)
9e762a4a 2638 table = rt->rt6i_table->tb6_id;
c71099ac 2639 else
9e762a4a
PM
2640 table = RT6_TABLE_UNSPEC;
2641 rtm->rtm_table = table;
c78679e8
DM
2642 if (nla_put_u32(skb, RTA_TABLE, table))
2643 goto nla_put_failure;
ef2c7d7b
ND
2644 if (rt->rt6i_flags & RTF_REJECT) {
2645 switch (rt->dst.error) {
2646 case -EINVAL:
2647 rtm->rtm_type = RTN_BLACKHOLE;
2648 break;
2649 case -EACCES:
2650 rtm->rtm_type = RTN_PROHIBIT;
2651 break;
b4949ab2
ND
2652 case -EAGAIN:
2653 rtm->rtm_type = RTN_THROW;
2654 break;
ef2c7d7b
ND
2655 default:
2656 rtm->rtm_type = RTN_UNREACHABLE;
2657 break;
2658 }
2659 }
38308473 2660 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2661 rtm->rtm_type = RTN_LOCAL;
d1918542 2662 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2663 rtm->rtm_type = RTN_LOCAL;
2664 else
2665 rtm->rtm_type = RTN_UNICAST;
2666 rtm->rtm_flags = 0;
2667 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2668 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2669 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2670 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2671 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2672 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2673 rtm->rtm_protocol = RTPROT_RA;
2674 else
2675 rtm->rtm_protocol = RTPROT_KERNEL;
2676 }
1da177e4 2677
38308473 2678 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2679 rtm->rtm_flags |= RTM_F_CLONED;
2680
2681 if (dst) {
930345ea 2682 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2683 goto nla_put_failure;
1ab1457c 2684 rtm->rtm_dst_len = 128;
1da177e4 2685 } else if (rtm->rtm_dst_len)
930345ea 2686 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2687 goto nla_put_failure;
1da177e4
LT
2688#ifdef CONFIG_IPV6_SUBTREES
2689 if (src) {
930345ea 2690 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2691 goto nla_put_failure;
1ab1457c 2692 rtm->rtm_src_len = 128;
c78679e8 2693 } else if (rtm->rtm_src_len &&
930345ea 2694 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2695 goto nla_put_failure;
1da177e4 2696#endif
7bc570c8
YH
2697 if (iif) {
2698#ifdef CONFIG_IPV6_MROUTE
2699 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2700 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2701 if (err <= 0) {
2702 if (!nowait) {
2703 if (err == 0)
2704 return 0;
2705 goto nla_put_failure;
2706 } else {
2707 if (err == -EMSGSIZE)
2708 goto nla_put_failure;
2709 }
2710 }
2711 } else
2712#endif
c78679e8
DM
2713 if (nla_put_u32(skb, RTA_IIF, iif))
2714 goto nla_put_failure;
7bc570c8 2715 } else if (dst) {
1da177e4 2716 struct in6_addr saddr_buf;
c78679e8 2717 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2718 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2719 goto nla_put_failure;
1da177e4 2720 }
2d7202bf 2721
c3968a85
DW
2722 if (rt->rt6i_prefsrc.plen) {
2723 struct in6_addr saddr_buf;
4e3fd7a0 2724 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 2725 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2726 goto nla_put_failure;
c3968a85
DW
2727 }
2728
4b32b5ad
MKL
2729 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
2730 if (rt->rt6i_pmtu)
2731 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
2732 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
2733 goto nla_put_failure;
2734
dd0cbf29 2735 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 2736 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 2737 goto nla_put_failure;
94f826b8 2738 }
2d7202bf 2739
c78679e8
DM
2740 if (rt->dst.dev &&
2741 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2742 goto nla_put_failure;
2743 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2744 goto nla_put_failure;
8253947e
LW
2745
2746 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2747
87a50699 2748 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2749 goto nla_put_failure;
2d7202bf 2750
c78ba6d6
LR
2751 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2752 goto nla_put_failure;
2753
053c095a
JB
2754 nlmsg_end(skb, nlh);
2755 return 0;
2d7202bf
TG
2756
2757nla_put_failure:
26932566
PM
2758 nlmsg_cancel(skb, nlh);
2759 return -EMSGSIZE;
1da177e4
LT
2760}
2761
1b43af54 2762int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2763{
2764 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2765 int prefix;
2766
2d7202bf
TG
2767 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2768 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2769 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2770 } else
2771 prefix = 0;
2772
191cd582
BH
2773 return rt6_fill_node(arg->net,
2774 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2775 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2776 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2777}
2778
67ba4152 2779static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2780{
3b1e0a65 2781 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2782 struct nlattr *tb[RTA_MAX+1];
2783 struct rt6_info *rt;
1da177e4 2784 struct sk_buff *skb;
ab364a6f 2785 struct rtmsg *rtm;
4c9483b2 2786 struct flowi6 fl6;
72331bc0 2787 int err, iif = 0, oif = 0;
1da177e4 2788
ab364a6f
TG
2789 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2790 if (err < 0)
2791 goto errout;
1da177e4 2792
ab364a6f 2793 err = -EINVAL;
4c9483b2 2794 memset(&fl6, 0, sizeof(fl6));
1da177e4 2795
ab364a6f
TG
2796 if (tb[RTA_SRC]) {
2797 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2798 goto errout;
2799
4e3fd7a0 2800 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2801 }
2802
2803 if (tb[RTA_DST]) {
2804 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2805 goto errout;
2806
4e3fd7a0 2807 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2808 }
2809
2810 if (tb[RTA_IIF])
2811 iif = nla_get_u32(tb[RTA_IIF]);
2812
2813 if (tb[RTA_OIF])
72331bc0 2814 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 2815
2e47b291
LC
2816 if (tb[RTA_MARK])
2817 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2818
1da177e4
LT
2819 if (iif) {
2820 struct net_device *dev;
72331bc0
SL
2821 int flags = 0;
2822
5578689a 2823 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2824 if (!dev) {
2825 err = -ENODEV;
ab364a6f 2826 goto errout;
1da177e4 2827 }
72331bc0
SL
2828
2829 fl6.flowi6_iif = iif;
2830
2831 if (!ipv6_addr_any(&fl6.saddr))
2832 flags |= RT6_LOOKUP_F_HAS_SADDR;
2833
2834 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2835 flags);
2836 } else {
2837 fl6.flowi6_oif = oif;
2838
2839 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2840 }
2841
ab364a6f 2842 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2843 if (!skb) {
94e187c0 2844 ip6_rt_put(rt);
ab364a6f
TG
2845 err = -ENOBUFS;
2846 goto errout;
2847 }
1da177e4 2848
ab364a6f
TG
2849 /* Reserve room for dummy headers, this skb can pass
2850 through good chunk of routing engine.
2851 */
459a98ed 2852 skb_reset_mac_header(skb);
ab364a6f 2853 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2854
d8d1f30b 2855 skb_dst_set(skb, &rt->dst);
1da177e4 2856
4c9483b2 2857 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2858 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2859 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2860 if (err < 0) {
ab364a6f
TG
2861 kfree_skb(skb);
2862 goto errout;
1da177e4
LT
2863 }
2864
15e47304 2865 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2866errout:
1da177e4 2867 return err;
1da177e4
LT
2868}
2869
86872cb5 2870void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2871{
2872 struct sk_buff *skb;
5578689a 2873 struct net *net = info->nl_net;
528c4ceb
DL
2874 u32 seq;
2875 int err;
2876
2877 err = -ENOBUFS;
38308473 2878 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2879
339bf98f 2880 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2881 if (!skb)
21713ebc
TG
2882 goto errout;
2883
191cd582 2884 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2885 event, info->portid, seq, 0, 0, 0);
26932566
PM
2886 if (err < 0) {
2887 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2888 WARN_ON(err == -EMSGSIZE);
2889 kfree_skb(skb);
2890 goto errout;
2891 }
15e47304 2892 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2893 info->nlh, gfp_any());
2894 return;
21713ebc
TG
2895errout:
2896 if (err < 0)
5578689a 2897 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2898}
2899
8ed67789 2900static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 2901 unsigned long event, void *ptr)
8ed67789 2902{
351638e7 2903 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 2904 struct net *net = dev_net(dev);
8ed67789
DL
2905
2906 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2907 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2908 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2909#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2910 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2911 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2912 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2913 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2914#endif
2915 }
2916
2917 return NOTIFY_OK;
2918}
2919
1da177e4
LT
2920/*
2921 * /proc
2922 */
2923
2924#ifdef CONFIG_PROC_FS
2925
33120b30
AD
2926static const struct file_operations ipv6_route_proc_fops = {
2927 .owner = THIS_MODULE,
2928 .open = ipv6_route_open,
2929 .read = seq_read,
2930 .llseek = seq_lseek,
8d2ca1d7 2931 .release = seq_release_net,
33120b30
AD
2932};
2933
1da177e4
LT
2934static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2935{
69ddb805 2936 struct net *net = (struct net *)seq->private;
1da177e4 2937 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2938 net->ipv6.rt6_stats->fib_nodes,
2939 net->ipv6.rt6_stats->fib_route_nodes,
2940 net->ipv6.rt6_stats->fib_rt_alloc,
2941 net->ipv6.rt6_stats->fib_rt_entries,
2942 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2943 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2944 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2945
2946 return 0;
2947}
2948
2949static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2950{
de05c557 2951 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2952}
2953
9a32144e 2954static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2955 .owner = THIS_MODULE,
2956 .open = rt6_stats_seq_open,
2957 .read = seq_read,
2958 .llseek = seq_lseek,
b6fcbdb4 2959 .release = single_release_net,
1da177e4
LT
2960};
2961#endif /* CONFIG_PROC_FS */
2962
2963#ifdef CONFIG_SYSCTL
2964
1da177e4 2965static
fe2c6338 2966int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
2967 void __user *buffer, size_t *lenp, loff_t *ppos)
2968{
c486da34
LAG
2969 struct net *net;
2970 int delay;
2971 if (!write)
1da177e4 2972 return -EINVAL;
c486da34
LAG
2973
2974 net = (struct net *)ctl->extra1;
2975 delay = net->ipv6.sysctl.flush_delay;
2976 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 2977 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 2978 return 0;
1da177e4
LT
2979}
2980
fe2c6338 2981struct ctl_table ipv6_route_table_template[] = {
1ab1457c 2982 {
1da177e4 2983 .procname = "flush",
4990509f 2984 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2985 .maxlen = sizeof(int),
89c8b3a1 2986 .mode = 0200,
6d9f239a 2987 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2988 },
2989 {
1da177e4 2990 .procname = "gc_thresh",
9a7ec3a9 2991 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
2992 .maxlen = sizeof(int),
2993 .mode = 0644,
6d9f239a 2994 .proc_handler = proc_dointvec,
1da177e4
LT
2995 },
2996 {
1da177e4 2997 .procname = "max_size",
4990509f 2998 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
2999 .maxlen = sizeof(int),
3000 .mode = 0644,
6d9f239a 3001 .proc_handler = proc_dointvec,
1da177e4
LT
3002 },
3003 {
1da177e4 3004 .procname = "gc_min_interval",
4990509f 3005 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3006 .maxlen = sizeof(int),
3007 .mode = 0644,
6d9f239a 3008 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3009 },
3010 {
1da177e4 3011 .procname = "gc_timeout",
4990509f 3012 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3013 .maxlen = sizeof(int),
3014 .mode = 0644,
6d9f239a 3015 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3016 },
3017 {
1da177e4 3018 .procname = "gc_interval",
4990509f 3019 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3020 .maxlen = sizeof(int),
3021 .mode = 0644,
6d9f239a 3022 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3023 },
3024 {
1da177e4 3025 .procname = "gc_elasticity",
4990509f 3026 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3027 .maxlen = sizeof(int),
3028 .mode = 0644,
f3d3f616 3029 .proc_handler = proc_dointvec,
1da177e4
LT
3030 },
3031 {
1da177e4 3032 .procname = "mtu_expires",
4990509f 3033 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3034 .maxlen = sizeof(int),
3035 .mode = 0644,
6d9f239a 3036 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3037 },
3038 {
1da177e4 3039 .procname = "min_adv_mss",
4990509f 3040 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3041 .maxlen = sizeof(int),
3042 .mode = 0644,
f3d3f616 3043 .proc_handler = proc_dointvec,
1da177e4
LT
3044 },
3045 {
1da177e4 3046 .procname = "gc_min_interval_ms",
4990509f 3047 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3048 .maxlen = sizeof(int),
3049 .mode = 0644,
6d9f239a 3050 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3051 },
f8572d8f 3052 { }
1da177e4
LT
3053};
3054
2c8c1e72 3055struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3056{
3057 struct ctl_table *table;
3058
3059 table = kmemdup(ipv6_route_table_template,
3060 sizeof(ipv6_route_table_template),
3061 GFP_KERNEL);
5ee09105
YH
3062
3063 if (table) {
3064 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3065 table[0].extra1 = net;
86393e52 3066 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3067 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3068 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3069 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3070 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3071 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3072 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3073 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3074 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3075
3076 /* Don't export sysctls to unprivileged users */
3077 if (net->user_ns != &init_user_ns)
3078 table[0].procname = NULL;
5ee09105
YH
3079 }
3080
760f2d01
DL
3081 return table;
3082}
1da177e4
LT
3083#endif
3084
2c8c1e72 3085static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3086{
633d424b 3087 int ret = -ENOMEM;
8ed67789 3088
86393e52
AD
3089 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3090 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3091
fc66f95c
ED
3092 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3093 goto out_ip6_dst_ops;
3094
8ed67789
DL
3095 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3096 sizeof(*net->ipv6.ip6_null_entry),
3097 GFP_KERNEL);
3098 if (!net->ipv6.ip6_null_entry)
fc66f95c 3099 goto out_ip6_dst_entries;
d8d1f30b 3100 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3101 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3102 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3103 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3104 ip6_template_metrics, true);
8ed67789
DL
3105
3106#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3107 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3108 sizeof(*net->ipv6.ip6_prohibit_entry),
3109 GFP_KERNEL);
68fffc67
PZ
3110 if (!net->ipv6.ip6_prohibit_entry)
3111 goto out_ip6_null_entry;
d8d1f30b 3112 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3113 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3114 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3115 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3116 ip6_template_metrics, true);
8ed67789
DL
3117
3118 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3119 sizeof(*net->ipv6.ip6_blk_hole_entry),
3120 GFP_KERNEL);
68fffc67
PZ
3121 if (!net->ipv6.ip6_blk_hole_entry)
3122 goto out_ip6_prohibit_entry;
d8d1f30b 3123 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3124 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3125 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3126 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3127 ip6_template_metrics, true);
8ed67789
DL
3128#endif
3129
b339a47c
PZ
3130 net->ipv6.sysctl.flush_delay = 0;
3131 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3132 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3133 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3134 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3135 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3136 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3137 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3138
6891a346
BT
3139 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3140
8ed67789
DL
3141 ret = 0;
3142out:
3143 return ret;
f2fc6a54 3144
68fffc67
PZ
3145#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3146out_ip6_prohibit_entry:
3147 kfree(net->ipv6.ip6_prohibit_entry);
3148out_ip6_null_entry:
3149 kfree(net->ipv6.ip6_null_entry);
3150#endif
fc66f95c
ED
3151out_ip6_dst_entries:
3152 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3153out_ip6_dst_ops:
f2fc6a54 3154 goto out;
cdb18761
DL
3155}
3156
2c8c1e72 3157static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3158{
8ed67789
DL
3159 kfree(net->ipv6.ip6_null_entry);
3160#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3161 kfree(net->ipv6.ip6_prohibit_entry);
3162 kfree(net->ipv6.ip6_blk_hole_entry);
3163#endif
41bb78b4 3164 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3165}
3166
d189634e
TG
3167static int __net_init ip6_route_net_init_late(struct net *net)
3168{
3169#ifdef CONFIG_PROC_FS
d4beaa66
G
3170 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3171 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3172#endif
3173 return 0;
3174}
3175
3176static void __net_exit ip6_route_net_exit_late(struct net *net)
3177{
3178#ifdef CONFIG_PROC_FS
ece31ffd
G
3179 remove_proc_entry("ipv6_route", net->proc_net);
3180 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3181#endif
3182}
3183
cdb18761
DL
3184static struct pernet_operations ip6_route_net_ops = {
3185 .init = ip6_route_net_init,
3186 .exit = ip6_route_net_exit,
3187};
3188
c3426b47
DM
3189static int __net_init ipv6_inetpeer_init(struct net *net)
3190{
3191 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3192
3193 if (!bp)
3194 return -ENOMEM;
3195 inet_peer_base_init(bp);
3196 net->ipv6.peers = bp;
3197 return 0;
3198}
3199
3200static void __net_exit ipv6_inetpeer_exit(struct net *net)
3201{
3202 struct inet_peer_base *bp = net->ipv6.peers;
3203
3204 net->ipv6.peers = NULL;
56a6b248 3205 inetpeer_invalidate_tree(bp);
c3426b47
DM
3206 kfree(bp);
3207}
3208
2b823f72 3209static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3210 .init = ipv6_inetpeer_init,
3211 .exit = ipv6_inetpeer_exit,
3212};
3213
d189634e
TG
3214static struct pernet_operations ip6_route_net_late_ops = {
3215 .init = ip6_route_net_init_late,
3216 .exit = ip6_route_net_exit_late,
3217};
3218
8ed67789
DL
3219static struct notifier_block ip6_route_dev_notifier = {
3220 .notifier_call = ip6_route_dev_notify,
3221 .priority = 0,
3222};
3223
433d49c3 3224int __init ip6_route_init(void)
1da177e4 3225{
433d49c3
DL
3226 int ret;
3227
9a7ec3a9
DL
3228 ret = -ENOMEM;
3229 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3230 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3231 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3232 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3233 goto out;
14e50e57 3234
fc66f95c 3235 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3236 if (ret)
bdb3289f 3237 goto out_kmem_cache;
bdb3289f 3238
c3426b47
DM
3239 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3240 if (ret)
e8803b6c 3241 goto out_dst_entries;
2a0c451a 3242
7e52b33b
DM
3243 ret = register_pernet_subsys(&ip6_route_net_ops);
3244 if (ret)
3245 goto out_register_inetpeer;
c3426b47 3246
5dc121e9
AE
3247 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3248
8ed67789
DL
3249 /* Registering of the loopback is done before this portion of code,
3250 * the loopback reference in rt6_info will not be taken, do it
3251 * manually for init_net */
d8d1f30b 3252 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3253 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3254 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3255 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3256 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3257 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3258 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3259 #endif
e8803b6c 3260 ret = fib6_init();
433d49c3 3261 if (ret)
8ed67789 3262 goto out_register_subsys;
433d49c3 3263
433d49c3
DL
3264 ret = xfrm6_init();
3265 if (ret)
e8803b6c 3266 goto out_fib6_init;
c35b7e72 3267
433d49c3
DL
3268 ret = fib6_rules_init();
3269 if (ret)
3270 goto xfrm6_init;
7e5449c2 3271
d189634e
TG
3272 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3273 if (ret)
3274 goto fib6_rules_init;
3275
433d49c3 3276 ret = -ENOBUFS;
c7ac8679
GR
3277 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3278 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3279 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3280 goto out_register_late_subsys;
c127ea2c 3281
8ed67789 3282 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3283 if (ret)
d189634e 3284 goto out_register_late_subsys;
8ed67789 3285
433d49c3
DL
3286out:
3287 return ret;
3288
d189634e
TG
3289out_register_late_subsys:
3290 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3291fib6_rules_init:
433d49c3
DL
3292 fib6_rules_cleanup();
3293xfrm6_init:
433d49c3 3294 xfrm6_fini();
2a0c451a
TG
3295out_fib6_init:
3296 fib6_gc_cleanup();
8ed67789
DL
3297out_register_subsys:
3298 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3299out_register_inetpeer:
3300 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3301out_dst_entries:
3302 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3303out_kmem_cache:
f2fc6a54 3304 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3305 goto out;
1da177e4
LT
3306}
3307
3308void ip6_route_cleanup(void)
3309{
8ed67789 3310 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3311 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3312 fib6_rules_cleanup();
1da177e4 3313 xfrm6_fini();
1da177e4 3314 fib6_gc_cleanup();
c3426b47 3315 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3316 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3317 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3318 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3319}