]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
ipv6: Stop /128 route from disappearing after pmtu update
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
57#include <net/xfrm.h>
8d71740c 58#include <net/netevent.h>
21713ebc 59#include <net/netlink.h>
51ebd318 60#include <net/nexthop.h>
1da177e4
LT
61
62#include <asm/uaccess.h>
63
64#ifdef CONFIG_SYSCTL
65#include <linux/sysctl.h>
66#endif
67
afc154e9 68enum rt6_nud_state {
7e980569
JB
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
72 RT6_NUD_SUCCEED = 1
73};
74
1716a961 75static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 76 const struct in6_addr *dest);
1da177e4 77static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 78static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 79static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
80static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81static void ip6_dst_destroy(struct dst_entry *);
82static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
569d3645 84static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
85
86static int ip6_pkt_discard(struct sk_buff *skb);
aad88724 87static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
7150aede 88static int ip6_pkt_prohibit(struct sk_buff *skb);
aad88724 89static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
1da177e4 90static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
91static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
52bd4c0c 95static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 96
70ceb4f5 97#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 98static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, int ifindex,
95c96174 101 unsigned int pref);
efa2cea0 102static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
105#endif
106
e8243534 107static void rt6_bind_peer(struct rt6_info *rt, int create)
108{
109 struct inet_peer_base *base;
110 struct inet_peer *peer;
111
112 base = inetpeer_base_ptr(rt->_rt6i_peer);
113 if (!base)
114 return;
115
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 if (peer) {
118 if (!rt6_set_peer(rt, peer))
119 inet_putpeer(peer);
120 }
121}
122
123static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124{
125 if (rt6_has_peer(rt))
126 return rt6_peer_ptr(rt);
127
128 rt6_bind_peer(rt, create);
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130}
131
132static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133{
134 return __rt6_get_peer(rt, 1);
135}
136
06582540
DM
137static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138{
139 struct rt6_info *rt = (struct rt6_info *) dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142
8e2ec639 143 if (!(rt->dst.flags & DST_HOST))
3b471175 144 return dst_cow_metrics_generic(dst, old);
8e2ec639 145
fbfe95a4 146 peer = rt6_get_peer_create(rt);
06582540
DM
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
e5fd387a
MK
152 if (inet_metrics_new(peer) ||
153 (old & DST_METRICS_FORCE_OVERWRITE))
06582540
DM
154 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
155
156 new = (unsigned long) p;
157 prev = cmpxchg(&dst->_metrics, old, new);
158
159 if (prev != old) {
160 p = __DST_METRICS_PTR(prev);
161 if (prev & DST_METRICS_READ_ONLY)
162 p = NULL;
163 }
164 }
165 return p;
166}
167
f894cbf8
DM
168static inline const void *choose_neigh_daddr(struct rt6_info *rt,
169 struct sk_buff *skb,
170 const void *daddr)
39232973
DM
171{
172 struct in6_addr *p = &rt->rt6i_gateway;
173
a7563f34 174 if (!ipv6_addr_any(p))
39232973 175 return (const void *) p;
f894cbf8
DM
176 else if (skb)
177 return &ipv6_hdr(skb)->daddr;
39232973
DM
178 return daddr;
179}
180
f894cbf8
DM
181static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
182 struct sk_buff *skb,
183 const void *daddr)
d3aaeb38 184{
39232973
DM
185 struct rt6_info *rt = (struct rt6_info *) dst;
186 struct neighbour *n;
187
f894cbf8 188 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 189 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
190 if (n)
191 return n;
192 return neigh_create(&nd_tbl, daddr, dst->dev);
193}
194
9a7ec3a9 195static struct dst_ops ip6_dst_ops_template = {
1da177e4 196 .family = AF_INET6,
1da177e4
LT
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
0dbaee3b 200 .default_advmss = ip6_default_advmss,
ebb762f2 201 .mtu = ip6_mtu,
06582540 202 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 208 .redirect = rt6_do_redirect,
1ac06e03 209 .local_out = __ip6_local_out,
d3aaeb38 210 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
211};
212
ebb762f2 213static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 214{
618f9bc7
SK
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
ec831ea7
RD
218}
219
6700c270
DM
220static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
14e50e57
DM
222{
223}
224
6700c270
DM
225static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
b587ee3b
DM
227{
228}
229
0972ddb2
HB
230static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232{
233 return NULL;
234}
235
14e50e57
DM
236static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
14e50e57
DM
238 .destroy = ip6_dst_destroy,
239 .check = ip6_dst_check,
ebb762f2 240 .mtu = ip6_blackhole_mtu,
214f45c9 241 .default_advmss = ip6_default_advmss,
14e50e57 242 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 243 .redirect = ip6_rt_blackhole_redirect,
0972ddb2 244 .cow_metrics = ip6_rt_blackhole_cow_metrics,
d3aaeb38 245 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
246};
247
62fa8a84 248static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 249 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
250};
251
fb0af4c7 252static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
253 .dst = {
254 .__refcnt = ATOMIC_INIT(1),
255 .__use = 1,
2c20cbd7 256 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 257 .error = -ENETUNREACH,
d8d1f30b
CG
258 .input = ip6_pkt_discard,
259 .output = ip6_pkt_discard_out,
1da177e4
LT
260 },
261 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 262 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
263 .rt6i_metric = ~(u32) 0,
264 .rt6i_ref = ATOMIC_INIT(1),
265};
266
101367c2
TG
267#ifdef CONFIG_IPV6_MULTIPLE_TABLES
268
fb0af4c7 269static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
2c20cbd7 273 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 274 .error = -EACCES,
d8d1f30b
CG
275 .input = ip6_pkt_prohibit,
276 .output = ip6_pkt_prohibit_out,
101367c2
TG
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 279 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
fb0af4c7 284static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
285 .dst = {
286 .__refcnt = ATOMIC_INIT(1),
287 .__use = 1,
2c20cbd7 288 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 289 .error = -EINVAL,
d8d1f30b 290 .input = dst_discard,
aad88724 291 .output = dst_discard_sk,
101367c2
TG
292 },
293 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 294 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
295 .rt6i_metric = ~(u32) 0,
296 .rt6i_ref = ATOMIC_INIT(1),
297};
298
299#endif
300
1da177e4 301/* allocate dst with ip6_dst_ops */
97bab73f 302static inline struct rt6_info *ip6_dst_alloc(struct net *net,
957c665f 303 struct net_device *dev,
8b96d22d
DM
304 int flags,
305 struct fib6_table *table)
1da177e4 306{
97bab73f 307 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 308 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 309
97bab73f 310 if (rt) {
8104891b
SK
311 struct dst_entry *dst = &rt->dst;
312
313 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
8b96d22d 314 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
51ebd318 315 INIT_LIST_HEAD(&rt->rt6i_siblings);
97bab73f 316 }
cf911662 317 return rt;
1da177e4
LT
318}
319
320static void ip6_dst_destroy(struct dst_entry *dst)
321{
322 struct rt6_info *rt = (struct rt6_info *)dst;
323 struct inet6_dev *idev = rt->rt6i_idev;
ecd98837 324 struct dst_entry *from = dst->from;
1da177e4 325
8e2ec639
YZ
326 if (!(rt->dst.flags & DST_HOST))
327 dst_destroy_metrics_generic(dst);
328
38308473 329 if (idev) {
1da177e4
LT
330 rt->rt6i_idev = NULL;
331 in6_dev_put(idev);
1ab1457c 332 }
1716a961 333
ecd98837
YH
334 dst->from = NULL;
335 dst_release(from);
1716a961 336
97bab73f
DM
337 if (rt6_has_peer(rt)) {
338 struct inet_peer *peer = rt6_peer_ptr(rt);
b3419363
DM
339 inet_putpeer(peer);
340 }
341}
342
1da177e4
LT
343static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
344 int how)
345{
346 struct rt6_info *rt = (struct rt6_info *)dst;
347 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 348 struct net_device *loopback_dev =
c346dca1 349 dev_net(dev)->loopback_dev;
1da177e4 350
97cac082
DM
351 if (dev != loopback_dev) {
352 if (idev && idev->dev == dev) {
353 struct inet6_dev *loopback_idev =
354 in6_dev_get(loopback_dev);
355 if (loopback_idev) {
356 rt->rt6i_idev = loopback_idev;
357 in6_dev_put(idev);
358 }
359 }
1da177e4
LT
360 }
361}
362
a50feda5 363static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 364{
1716a961
G
365 if (rt->rt6i_flags & RTF_EXPIRES) {
366 if (time_after(jiffies, rt->dst.expires))
a50feda5 367 return true;
1716a961 368 } else if (rt->dst.from) {
3fd91fb3 369 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 370 }
a50feda5 371 return false;
1da177e4
LT
372}
373
51ebd318
ND
374/* Multipath route selection:
375 * Hash based function using packet header and flowlabel.
376 * Adapted from fib_info_hashfn()
377 */
378static int rt6_info_hash_nhsfn(unsigned int candidate_count,
379 const struct flowi6 *fl6)
380{
381 unsigned int val = fl6->flowi6_proto;
382
c08977bb
YH
383 val ^= ipv6_addr_hash(&fl6->daddr);
384 val ^= ipv6_addr_hash(&fl6->saddr);
51ebd318
ND
385
386 /* Work only if this not encapsulated */
387 switch (fl6->flowi6_proto) {
388 case IPPROTO_UDP:
389 case IPPROTO_TCP:
390 case IPPROTO_SCTP:
b3ce5ae1
ND
391 val ^= (__force u16)fl6->fl6_sport;
392 val ^= (__force u16)fl6->fl6_dport;
51ebd318
ND
393 break;
394
395 case IPPROTO_ICMPV6:
b3ce5ae1
ND
396 val ^= (__force u16)fl6->fl6_icmp_type;
397 val ^= (__force u16)fl6->fl6_icmp_code;
51ebd318
ND
398 break;
399 }
400 /* RFC6438 recommands to use flowlabel */
b3ce5ae1 401 val ^= (__force u32)fl6->flowlabel;
51ebd318
ND
402
403 /* Perhaps, we need to tune, this function? */
404 val = val ^ (val >> 7) ^ (val >> 12);
405 return val % candidate_count;
406}
407
408static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
409 struct flowi6 *fl6, int oif,
410 int strict)
51ebd318
ND
411{
412 struct rt6_info *sibling, *next_sibling;
413 int route_choosen;
414
415 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
416 /* Don't change the route, if route_choosen == 0
417 * (siblings does not include ourself)
418 */
419 if (route_choosen)
420 list_for_each_entry_safe(sibling, next_sibling,
421 &match->rt6i_siblings, rt6i_siblings) {
422 route_choosen--;
423 if (route_choosen == 0) {
52bd4c0c
ND
424 if (rt6_score_route(sibling, oif, strict) < 0)
425 break;
51ebd318
ND
426 match = sibling;
427 break;
428 }
429 }
430 return match;
431}
432
1da177e4 433/*
c71099ac 434 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
435 */
436
8ed67789
DL
437static inline struct rt6_info *rt6_device_match(struct net *net,
438 struct rt6_info *rt,
b71d1d42 439 const struct in6_addr *saddr,
1da177e4 440 int oif,
d420895e 441 int flags)
1da177e4
LT
442{
443 struct rt6_info *local = NULL;
444 struct rt6_info *sprt;
445
dd3abc4e
YH
446 if (!oif && ipv6_addr_any(saddr))
447 goto out;
448
d8d1f30b 449 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 450 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
451
452 if (oif) {
1da177e4
LT
453 if (dev->ifindex == oif)
454 return sprt;
455 if (dev->flags & IFF_LOOPBACK) {
38308473 456 if (!sprt->rt6i_idev ||
1da177e4 457 sprt->rt6i_idev->dev->ifindex != oif) {
d420895e 458 if (flags & RT6_LOOKUP_F_IFACE && oif)
1da177e4 459 continue;
1ab1457c 460 if (local && (!oif ||
1da177e4
LT
461 local->rt6i_idev->dev->ifindex == oif))
462 continue;
463 }
464 local = sprt;
465 }
dd3abc4e
YH
466 } else {
467 if (ipv6_chk_addr(net, saddr, dev,
468 flags & RT6_LOOKUP_F_IFACE))
469 return sprt;
1da177e4 470 }
dd3abc4e 471 }
1da177e4 472
dd3abc4e 473 if (oif) {
1da177e4
LT
474 if (local)
475 return local;
476
d420895e 477 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 478 return net->ipv6.ip6_null_entry;
1da177e4 479 }
dd3abc4e 480out:
1da177e4
LT
481 return rt;
482}
483
27097255 484#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
485struct __rt6_probe_work {
486 struct work_struct work;
487 struct in6_addr target;
488 struct net_device *dev;
489};
490
491static void rt6_probe_deferred(struct work_struct *w)
492{
493 struct in6_addr mcaddr;
494 struct __rt6_probe_work *work =
495 container_of(w, struct __rt6_probe_work, work);
496
497 addrconf_addr_solict_mult(&work->target, &mcaddr);
498 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
499 dev_put(work->dev);
662f5533 500 kfree(work);
c2f17e82
HFS
501}
502
27097255
YH
503static void rt6_probe(struct rt6_info *rt)
504{
f2c31e32 505 struct neighbour *neigh;
27097255
YH
506 /*
507 * Okay, this does not seem to be appropriate
508 * for now, however, we need to check if it
509 * is really so; aka Router Reachability Probing.
510 *
511 * Router Reachability Probe MUST be rate-limited
512 * to no more than one per minute.
513 */
2152caea 514 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 515 return;
2152caea
YH
516 rcu_read_lock_bh();
517 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
518 if (neigh) {
519 write_lock(&neigh->lock);
520 if (neigh->nud_state & NUD_VALID)
521 goto out;
7ff74a59 522 }
2152caea
YH
523
524 if (!neigh ||
52e16356 525 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
c2f17e82 526 struct __rt6_probe_work *work;
27097255 527
c2f17e82
HFS
528 work = kmalloc(sizeof(*work), GFP_ATOMIC);
529
530 if (neigh && work)
7e980569 531 __neigh_set_probe_once(neigh);
c2f17e82
HFS
532
533 if (neigh)
2152caea
YH
534 write_unlock(&neigh->lock);
535
c2f17e82
HFS
536 if (work) {
537 INIT_WORK(&work->work, rt6_probe_deferred);
538 work->target = rt->rt6i_gateway;
539 dev_hold(rt->dst.dev);
540 work->dev = rt->dst.dev;
541 schedule_work(&work->work);
542 }
f2c31e32 543 } else {
2152caea
YH
544out:
545 write_unlock(&neigh->lock);
f2c31e32 546 }
2152caea 547 rcu_read_unlock_bh();
27097255
YH
548}
549#else
550static inline void rt6_probe(struct rt6_info *rt)
551{
27097255
YH
552}
553#endif
554
1da177e4 555/*
554cfb7e 556 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 557 */
b6f99a21 558static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 559{
d1918542 560 struct net_device *dev = rt->dst.dev;
161980f4 561 if (!oif || dev->ifindex == oif)
554cfb7e 562 return 2;
161980f4
DM
563 if ((dev->flags & IFF_LOOPBACK) &&
564 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
565 return 1;
566 return 0;
554cfb7e 567}
1da177e4 568
afc154e9 569static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 570{
f2c31e32 571 struct neighbour *neigh;
afc154e9 572 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 573
4d0c5911
YH
574 if (rt->rt6i_flags & RTF_NONEXTHOP ||
575 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 576 return RT6_NUD_SUCCEED;
145a3621
YH
577
578 rcu_read_lock_bh();
579 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
580 if (neigh) {
581 read_lock(&neigh->lock);
554cfb7e 582 if (neigh->nud_state & NUD_VALID)
afc154e9 583 ret = RT6_NUD_SUCCEED;
398bcbeb 584#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 585 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 586 ret = RT6_NUD_SUCCEED;
7e980569
JB
587 else
588 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 589#endif
145a3621 590 read_unlock(&neigh->lock);
afc154e9
HFS
591 } else {
592 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 593 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 594 }
145a3621
YH
595 rcu_read_unlock_bh();
596
a5a81f0b 597 return ret;
1da177e4
LT
598}
599
554cfb7e
YH
600static int rt6_score_route(struct rt6_info *rt, int oif,
601 int strict)
1da177e4 602{
a5a81f0b 603 int m;
1ab1457c 604
4d0c5911 605 m = rt6_check_dev(rt, oif);
77d16f45 606 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 607 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
608#ifdef CONFIG_IPV6_ROUTER_PREF
609 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
610#endif
afc154e9
HFS
611 if (strict & RT6_LOOKUP_F_REACHABLE) {
612 int n = rt6_check_neigh(rt);
613 if (n < 0)
614 return n;
615 }
554cfb7e
YH
616 return m;
617}
618
f11e6659 619static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
620 int *mpri, struct rt6_info *match,
621 bool *do_rr)
554cfb7e 622{
f11e6659 623 int m;
afc154e9 624 bool match_do_rr = false;
f11e6659
DM
625
626 if (rt6_check_expired(rt))
627 goto out;
628
629 m = rt6_score_route(rt, oif, strict);
7e980569 630 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
631 match_do_rr = true;
632 m = 0; /* lowest valid score */
7e980569 633 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 634 goto out;
afc154e9
HFS
635 }
636
637 if (strict & RT6_LOOKUP_F_REACHABLE)
638 rt6_probe(rt);
f11e6659 639
7e980569 640 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 641 if (m > *mpri) {
afc154e9 642 *do_rr = match_do_rr;
f11e6659
DM
643 *mpri = m;
644 match = rt;
f11e6659 645 }
f11e6659
DM
646out:
647 return match;
648}
649
650static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
651 struct rt6_info *rr_head,
afc154e9
HFS
652 u32 metric, int oif, int strict,
653 bool *do_rr)
f11e6659 654{
9fbdcfaf 655 struct rt6_info *rt, *match, *cont;
554cfb7e 656 int mpri = -1;
1da177e4 657
f11e6659 658 match = NULL;
9fbdcfaf
SK
659 cont = NULL;
660 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
661 if (rt->rt6i_metric != metric) {
662 cont = rt;
663 break;
664 }
665
666 match = find_match(rt, oif, strict, &mpri, match, do_rr);
667 }
668
669 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
670 if (rt->rt6i_metric != metric) {
671 cont = rt;
672 break;
673 }
674
afc154e9 675 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
676 }
677
678 if (match || !cont)
679 return match;
680
681 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 682 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 683
f11e6659
DM
684 return match;
685}
1da177e4 686
f11e6659
DM
687static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
688{
689 struct rt6_info *match, *rt0;
8ed67789 690 struct net *net;
afc154e9 691 bool do_rr = false;
1da177e4 692
f11e6659
DM
693 rt0 = fn->rr_ptr;
694 if (!rt0)
695 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 696
afc154e9
HFS
697 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
698 &do_rr);
1da177e4 699
afc154e9 700 if (do_rr) {
d8d1f30b 701 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 702
554cfb7e 703 /* no entries matched; do round-robin */
f11e6659
DM
704 if (!next || next->rt6i_metric != rt0->rt6i_metric)
705 next = fn->leaf;
706
707 if (next != rt0)
708 fn->rr_ptr = next;
1da177e4 709 }
1da177e4 710
d1918542 711 net = dev_net(rt0->dst.dev);
a02cec21 712 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
713}
714
70ceb4f5
YH
715#ifdef CONFIG_IPV6_ROUTE_INFO
716int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 717 const struct in6_addr *gwaddr)
70ceb4f5 718{
c346dca1 719 struct net *net = dev_net(dev);
70ceb4f5
YH
720 struct route_info *rinfo = (struct route_info *) opt;
721 struct in6_addr prefix_buf, *prefix;
722 unsigned int pref;
4bed72e4 723 unsigned long lifetime;
70ceb4f5
YH
724 struct rt6_info *rt;
725
726 if (len < sizeof(struct route_info)) {
727 return -EINVAL;
728 }
729
730 /* Sanity check for prefix_len and length */
731 if (rinfo->length > 3) {
732 return -EINVAL;
733 } else if (rinfo->prefix_len > 128) {
734 return -EINVAL;
735 } else if (rinfo->prefix_len > 64) {
736 if (rinfo->length < 2) {
737 return -EINVAL;
738 }
739 } else if (rinfo->prefix_len > 0) {
740 if (rinfo->length < 1) {
741 return -EINVAL;
742 }
743 }
744
745 pref = rinfo->route_pref;
746 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 747 return -EINVAL;
70ceb4f5 748
4bed72e4 749 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
750
751 if (rinfo->length == 3)
752 prefix = (struct in6_addr *)rinfo->prefix;
753 else {
754 /* this function is safe */
755 ipv6_addr_prefix(&prefix_buf,
756 (struct in6_addr *)rinfo->prefix,
757 rinfo->prefix_len);
758 prefix = &prefix_buf;
759 }
760
f104a567
DJ
761 if (rinfo->prefix_len == 0)
762 rt = rt6_get_dflt_router(gwaddr, dev);
763 else
764 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
765 gwaddr, dev->ifindex);
70ceb4f5
YH
766
767 if (rt && !lifetime) {
e0a1ad73 768 ip6_del_rt(rt);
70ceb4f5
YH
769 rt = NULL;
770 }
771
772 if (!rt && lifetime)
efa2cea0 773 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
774 pref);
775 else if (rt)
776 rt->rt6i_flags = RTF_ROUTEINFO |
777 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
778
779 if (rt) {
1716a961
G
780 if (!addrconf_finite_timeout(lifetime))
781 rt6_clean_expires(rt);
782 else
783 rt6_set_expires(rt, jiffies + HZ * lifetime);
784
94e187c0 785 ip6_rt_put(rt);
70ceb4f5
YH
786 }
787 return 0;
788}
789#endif
790
a3c00e46
MKL
791static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
792 struct in6_addr *saddr)
793{
794 struct fib6_node *pn;
795 while (1) {
796 if (fn->fn_flags & RTN_TL_ROOT)
797 return NULL;
798 pn = fn->parent;
799 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
800 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
801 else
802 fn = pn;
803 if (fn->fn_flags & RTN_RTINFO)
804 return fn;
805 }
806}
c71099ac 807
8ed67789
DL
808static struct rt6_info *ip6_pol_route_lookup(struct net *net,
809 struct fib6_table *table,
4c9483b2 810 struct flowi6 *fl6, int flags)
1da177e4
LT
811{
812 struct fib6_node *fn;
813 struct rt6_info *rt;
814
c71099ac 815 read_lock_bh(&table->tb6_lock);
4c9483b2 816 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
817restart:
818 rt = fn->leaf;
4c9483b2 819 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 820 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 821 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
822 if (rt == net->ipv6.ip6_null_entry) {
823 fn = fib6_backtrack(fn, &fl6->saddr);
824 if (fn)
825 goto restart;
826 }
d8d1f30b 827 dst_use(&rt->dst, jiffies);
c71099ac 828 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
829 return rt;
830
831}
832
67ba4152 833struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
834 int flags)
835{
836 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
837}
838EXPORT_SYMBOL_GPL(ip6_route_lookup);
839
9acd9f3a
YH
840struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
841 const struct in6_addr *saddr, int oif, int strict)
c71099ac 842{
4c9483b2
DM
843 struct flowi6 fl6 = {
844 .flowi6_oif = oif,
845 .daddr = *daddr,
c71099ac
TG
846 };
847 struct dst_entry *dst;
77d16f45 848 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 849
adaa70bb 850 if (saddr) {
4c9483b2 851 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
852 flags |= RT6_LOOKUP_F_HAS_SADDR;
853 }
854
4c9483b2 855 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
856 if (dst->error == 0)
857 return (struct rt6_info *) dst;
858
859 dst_release(dst);
860
1da177e4
LT
861 return NULL;
862}
7159039a
YH
863EXPORT_SYMBOL(rt6_lookup);
864
c71099ac 865/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
866 It takes new route entry, the addition fails by any reason the
867 route is freed. In any case, if caller does not hold it, it may
868 be destroyed.
869 */
870
e5fd387a 871static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 872 struct mx6_config *mxc)
1da177e4
LT
873{
874 int err;
c71099ac 875 struct fib6_table *table;
1da177e4 876
c71099ac
TG
877 table = rt->rt6i_table;
878 write_lock_bh(&table->tb6_lock);
e715b6d3 879 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 880 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
881
882 return err;
883}
884
40e22e8f
TG
885int ip6_ins_rt(struct rt6_info *rt)
886{
e715b6d3
FW
887 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
888 struct mx6_config mxc = { .mx = NULL, };
889
890 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
891}
892
1716a961 893static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
21efcfa0 894 const struct in6_addr *daddr,
b71d1d42 895 const struct in6_addr *saddr)
1da177e4 896{
1da177e4
LT
897 struct rt6_info *rt;
898
899 /*
900 * Clone the route.
901 */
902
21efcfa0 903 rt = ip6_rt_copy(ort, daddr);
1da177e4
LT
904
905 if (rt) {
249a3630
DJ
906 if (ort->rt6i_dst.plen != 128 &&
907 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
908 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 909
1da177e4 910 rt->rt6i_flags |= RTF_CACHE;
1da177e4
LT
911
912#ifdef CONFIG_IPV6_SUBTREES
913 if (rt->rt6i_src.plen && saddr) {
4e3fd7a0 914 rt->rt6i_src.addr = *saddr;
1da177e4
LT
915 rt->rt6i_src.plen = 128;
916 }
917#endif
95a9a5ba 918 }
1da177e4 919
95a9a5ba
YH
920 return rt;
921}
1da177e4 922
21efcfa0
ED
923static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
924 const struct in6_addr *daddr)
299d9939 925{
21efcfa0
ED
926 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
927
887c95cc 928 if (rt)
299d9939 929 rt->rt6i_flags |= RTF_CACHE;
299d9939
YH
930 return rt;
931}
932
8ed67789 933static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 934 struct flowi6 *fl6, int flags)
1da177e4 935{
367efcb9 936 struct fib6_node *fn, *saved_fn;
519fbd87 937 struct rt6_info *rt, *nrt;
c71099ac 938 int strict = 0;
1da177e4 939 int attempts = 3;
519fbd87 940 int err;
1da177e4 941
77d16f45 942 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
943 if (net->ipv6.devconf_all->forwarding == 0)
944 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 945
a3c00e46 946redo_fib6_lookup_lock:
c71099ac 947 read_lock_bh(&table->tb6_lock);
1da177e4 948
4c9483b2 949 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 950 saved_fn = fn;
1da177e4 951
a3c00e46 952redo_rt6_select:
367efcb9 953 rt = rt6_select(fn, oif, strict);
52bd4c0c 954 if (rt->rt6i_nsiblings)
367efcb9 955 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
956 if (rt == net->ipv6.ip6_null_entry) {
957 fn = fib6_backtrack(fn, &fl6->saddr);
958 if (fn)
959 goto redo_rt6_select;
367efcb9
MKL
960 else if (strict & RT6_LOOKUP_F_REACHABLE) {
961 /* also consider unreachable route */
962 strict &= ~RT6_LOOKUP_F_REACHABLE;
963 fn = saved_fn;
964 goto redo_rt6_select;
965 } else {
966 dst_hold(&rt->dst);
967 read_unlock_bh(&table->tb6_lock);
968 goto out2;
969 }
a3c00e46
MKL
970 }
971
d8d1f30b 972 dst_hold(&rt->dst);
c71099ac 973 read_unlock_bh(&table->tb6_lock);
fb9de91e 974
94c77bb4
MKL
975 if (rt->rt6i_flags & RTF_CACHE)
976 goto out2;
977
c440f160 978 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
4c9483b2 979 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
653437d0 980 else if (!(rt->dst.flags & DST_HOST) || !(rt->dst.flags & RTF_LOCAL))
4c9483b2 981 nrt = rt6_alloc_clone(rt, &fl6->daddr);
7343ff31
DM
982 else
983 goto out2;
e40cf353 984
94e187c0 985 ip6_rt_put(rt);
8ed67789 986 rt = nrt ? : net->ipv6.ip6_null_entry;
1da177e4 987
d8d1f30b 988 dst_hold(&rt->dst);
519fbd87 989 if (nrt) {
40e22e8f 990 err = ip6_ins_rt(nrt);
519fbd87 991 if (!err)
1da177e4 992 goto out2;
1da177e4 993 }
1da177e4 994
519fbd87
YH
995 if (--attempts <= 0)
996 goto out2;
997
998 /*
c71099ac 999 * Race condition! In the gap, when table->tb6_lock was
519fbd87
YH
1000 * released someone could insert this route. Relookup.
1001 */
94e187c0 1002 ip6_rt_put(rt);
a3c00e46 1003 goto redo_fib6_lookup_lock;
519fbd87 1004
1da177e4 1005out2:
d8d1f30b
CG
1006 rt->dst.lastuse = jiffies;
1007 rt->dst.__use++;
c71099ac
TG
1008
1009 return rt;
1da177e4
LT
1010}
1011
8ed67789 1012static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1013 struct flowi6 *fl6, int flags)
4acad72d 1014{
4c9483b2 1015 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1016}
1017
72331bc0
SL
1018static struct dst_entry *ip6_route_input_lookup(struct net *net,
1019 struct net_device *dev,
1020 struct flowi6 *fl6, int flags)
1021{
1022 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1023 flags |= RT6_LOOKUP_F_IFACE;
1024
1025 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1026}
1027
c71099ac
TG
1028void ip6_route_input(struct sk_buff *skb)
1029{
b71d1d42 1030 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1031 struct net *net = dev_net(skb->dev);
adaa70bb 1032 int flags = RT6_LOOKUP_F_HAS_SADDR;
4c9483b2
DM
1033 struct flowi6 fl6 = {
1034 .flowi6_iif = skb->dev->ifindex,
1035 .daddr = iph->daddr,
1036 .saddr = iph->saddr,
6502ca52 1037 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1038 .flowi6_mark = skb->mark,
1039 .flowi6_proto = iph->nexthdr,
c71099ac 1040 };
adaa70bb 1041
72331bc0 1042 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1043}
1044
8ed67789 1045static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1046 struct flowi6 *fl6, int flags)
1da177e4 1047{
4c9483b2 1048 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1049}
1050
67ba4152 1051struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1052 struct flowi6 *fl6)
c71099ac
TG
1053{
1054 int flags = 0;
1055
1fb9489b 1056 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1057
4c9483b2 1058 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
77d16f45 1059 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1060
4c9483b2 1061 if (!ipv6_addr_any(&fl6->saddr))
adaa70bb 1062 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1063 else if (sk)
1064 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1065
4c9483b2 1066 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1067}
7159039a 1068EXPORT_SYMBOL(ip6_route_output);
1da177e4 1069
2774c131 1070struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1071{
5c1e6aa3 1072 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1073 struct dst_entry *new = NULL;
1074
f5b0a874 1075 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1076 if (rt) {
d8d1f30b 1077 new = &rt->dst;
14e50e57 1078
8104891b
SK
1079 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1080 rt6_init_peer(rt, net->ipv6.peers);
1081
14e50e57 1082 new->__use = 1;
352e512c 1083 new->input = dst_discard;
aad88724 1084 new->output = dst_discard_sk;
14e50e57 1085
21efcfa0
ED
1086 if (dst_metrics_read_only(&ort->dst))
1087 new->_metrics = ort->dst._metrics;
1088 else
1089 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1090 rt->rt6i_idev = ort->rt6i_idev;
1091 if (rt->rt6i_idev)
1092 in6_dev_hold(rt->rt6i_idev);
14e50e57 1093
4e3fd7a0 1094 rt->rt6i_gateway = ort->rt6i_gateway;
1716a961 1095 rt->rt6i_flags = ort->rt6i_flags;
14e50e57
DM
1096 rt->rt6i_metric = 0;
1097
1098 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1099#ifdef CONFIG_IPV6_SUBTREES
1100 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1101#endif
1102
1103 dst_free(new);
1104 }
1105
69ead7af
DM
1106 dst_release(dst_orig);
1107 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1108}
14e50e57 1109
1da177e4
LT
1110/*
1111 * Destination cache support functions
1112 */
1113
1114static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1115{
1116 struct rt6_info *rt;
1117
1118 rt = (struct rt6_info *) dst;
1119
6f3118b5
ND
1120 /* All IPV6 dsts are created with ->obsolete set to the value
1121 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1122 * into this function always.
1123 */
e3bc10bd
HFS
1124 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1125 return NULL;
a4477c4d 1126
e3bc10bd
HFS
1127 if (rt6_check_expired(rt))
1128 return NULL;
1129
1130 return dst;
1da177e4
LT
1131}
1132
1133static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1134{
1135 struct rt6_info *rt = (struct rt6_info *) dst;
1136
1137 if (rt) {
54c1a859
YH
1138 if (rt->rt6i_flags & RTF_CACHE) {
1139 if (rt6_check_expired(rt)) {
1140 ip6_del_rt(rt);
1141 dst = NULL;
1142 }
1143 } else {
1da177e4 1144 dst_release(dst);
54c1a859
YH
1145 dst = NULL;
1146 }
1da177e4 1147 }
54c1a859 1148 return dst;
1da177e4
LT
1149}
1150
1151static void ip6_link_failure(struct sk_buff *skb)
1152{
1153 struct rt6_info *rt;
1154
3ffe533c 1155 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1156
adf30907 1157 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1158 if (rt) {
1eb4f758
HFS
1159 if (rt->rt6i_flags & RTF_CACHE) {
1160 dst_hold(&rt->dst);
1161 if (ip6_del_rt(rt))
1162 dst_free(&rt->dst);
1163 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1164 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1165 }
1da177e4
LT
1166 }
1167}
1168
6700c270
DM
1169static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1170 struct sk_buff *skb, u32 mtu)
1da177e4 1171{
67ba4152 1172 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1173
81aded24 1174 dst_confirm(dst);
653437d0 1175 if (mtu < dst_mtu(dst) && (rt6->rt6i_flags & RTF_CACHE)) {
81aded24
DM
1176 struct net *net = dev_net(dst->dev);
1177
1da177e4 1178 rt6->rt6i_flags |= RTF_MODIFIED;
9d289715 1179 if (mtu < IPV6_MIN_MTU)
1da177e4 1180 mtu = IPV6_MIN_MTU;
9d289715 1181
defb3519 1182 dst_metric_set(dst, RTAX_MTU, mtu);
81aded24 1183 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1da177e4
LT
1184 }
1185}
1186
42ae66c8
DM
1187void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1188 int oif, u32 mark)
81aded24
DM
1189{
1190 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1191 struct dst_entry *dst;
1192 struct flowi6 fl6;
1193
1194 memset(&fl6, 0, sizeof(fl6));
1195 fl6.flowi6_oif = oif;
1b3c61dc 1196 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1197 fl6.daddr = iph->daddr;
1198 fl6.saddr = iph->saddr;
6502ca52 1199 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1200
1201 dst = ip6_route_output(net, NULL, &fl6);
1202 if (!dst->error)
6700c270 1203 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
81aded24
DM
1204 dst_release(dst);
1205}
1206EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1207
1208void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1209{
1210 ip6_update_pmtu(skb, sock_net(sk), mtu,
1211 sk->sk_bound_dev_if, sk->sk_mark);
1212}
1213EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1214
b55b76b2
DJ
1215/* Handle redirects */
1216struct ip6rd_flowi {
1217 struct flowi6 fl6;
1218 struct in6_addr gateway;
1219};
1220
1221static struct rt6_info *__ip6_route_redirect(struct net *net,
1222 struct fib6_table *table,
1223 struct flowi6 *fl6,
1224 int flags)
1225{
1226 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1227 struct rt6_info *rt;
1228 struct fib6_node *fn;
1229
1230 /* Get the "current" route for this destination and
1231 * check if the redirect has come from approriate router.
1232 *
1233 * RFC 4861 specifies that redirects should only be
1234 * accepted if they come from the nexthop to the target.
1235 * Due to the way the routes are chosen, this notion
1236 * is a bit fuzzy and one might need to check all possible
1237 * routes.
1238 */
1239
1240 read_lock_bh(&table->tb6_lock);
1241 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1242restart:
1243 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1244 if (rt6_check_expired(rt))
1245 continue;
1246 if (rt->dst.error)
1247 break;
1248 if (!(rt->rt6i_flags & RTF_GATEWAY))
1249 continue;
1250 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1251 continue;
1252 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1253 continue;
1254 break;
1255 }
1256
1257 if (!rt)
1258 rt = net->ipv6.ip6_null_entry;
1259 else if (rt->dst.error) {
1260 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1261 goto out;
1262 }
1263
1264 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1265 fn = fib6_backtrack(fn, &fl6->saddr);
1266 if (fn)
1267 goto restart;
b55b76b2 1268 }
a3c00e46 1269
b0a1ba59 1270out:
b55b76b2
DJ
1271 dst_hold(&rt->dst);
1272
1273 read_unlock_bh(&table->tb6_lock);
1274
1275 return rt;
1276};
1277
1278static struct dst_entry *ip6_route_redirect(struct net *net,
1279 const struct flowi6 *fl6,
1280 const struct in6_addr *gateway)
1281{
1282 int flags = RT6_LOOKUP_F_HAS_SADDR;
1283 struct ip6rd_flowi rdfl;
1284
1285 rdfl.fl6 = *fl6;
1286 rdfl.gateway = *gateway;
1287
1288 return fib6_rule_lookup(net, &rdfl.fl6,
1289 flags, __ip6_route_redirect);
1290}
1291
3a5ad2ee
DM
1292void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1293{
1294 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1295 struct dst_entry *dst;
1296 struct flowi6 fl6;
1297
1298 memset(&fl6, 0, sizeof(fl6));
e374c618 1299 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1300 fl6.flowi6_oif = oif;
1301 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1302 fl6.daddr = iph->daddr;
1303 fl6.saddr = iph->saddr;
6502ca52 1304 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1305
b55b76b2
DJ
1306 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1307 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1308 dst_release(dst);
1309}
1310EXPORT_SYMBOL_GPL(ip6_redirect);
1311
c92a59ec
DJ
1312void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1313 u32 mark)
1314{
1315 const struct ipv6hdr *iph = ipv6_hdr(skb);
1316 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1317 struct dst_entry *dst;
1318 struct flowi6 fl6;
1319
1320 memset(&fl6, 0, sizeof(fl6));
e374c618 1321 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1322 fl6.flowi6_oif = oif;
1323 fl6.flowi6_mark = mark;
c92a59ec
DJ
1324 fl6.daddr = msg->dest;
1325 fl6.saddr = iph->daddr;
1326
b55b76b2
DJ
1327 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1328 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1329 dst_release(dst);
1330}
1331
3a5ad2ee
DM
1332void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1333{
1334 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1335}
1336EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1337
0dbaee3b 1338static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1339{
0dbaee3b
DM
1340 struct net_device *dev = dst->dev;
1341 unsigned int mtu = dst_mtu(dst);
1342 struct net *net = dev_net(dev);
1343
1da177e4
LT
1344 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1345
5578689a
DL
1346 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1347 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1348
1349 /*
1ab1457c
YH
1350 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1351 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1352 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1353 * rely only on pmtu discovery"
1354 */
1355 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1356 mtu = IPV6_MAXPLEN;
1357 return mtu;
1358}
1359
ebb762f2 1360static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1361{
d33e4553 1362 struct inet6_dev *idev;
618f9bc7
SK
1363 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1364
1365 if (mtu)
30f78d8e 1366 goto out;
618f9bc7
SK
1367
1368 mtu = IPV6_MIN_MTU;
d33e4553
DM
1369
1370 rcu_read_lock();
1371 idev = __in6_dev_get(dst->dev);
1372 if (idev)
1373 mtu = idev->cnf.mtu6;
1374 rcu_read_unlock();
1375
30f78d8e
ED
1376out:
1377 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1378}
1379
3b00944c
YH
1380static struct dst_entry *icmp6_dst_gc_list;
1381static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1382
3b00944c 1383struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1384 struct flowi6 *fl6)
1da177e4 1385{
87a11578 1386 struct dst_entry *dst;
1da177e4
LT
1387 struct rt6_info *rt;
1388 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1389 struct net *net = dev_net(dev);
1da177e4 1390
38308473 1391 if (unlikely(!idev))
122bdf67 1392 return ERR_PTR(-ENODEV);
1da177e4 1393
8b96d22d 1394 rt = ip6_dst_alloc(net, dev, 0, NULL);
38308473 1395 if (unlikely(!rt)) {
1da177e4 1396 in6_dev_put(idev);
87a11578 1397 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1398 goto out;
1399 }
1400
8e2ec639
YZ
1401 rt->dst.flags |= DST_HOST;
1402 rt->dst.output = ip6_output;
d8d1f30b 1403 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1404 rt->rt6i_gateway = fl6->daddr;
87a11578 1405 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1406 rt->rt6i_dst.plen = 128;
1407 rt->rt6i_idev = idev;
14edd87d 1408 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1409
3b00944c 1410 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1411 rt->dst.next = icmp6_dst_gc_list;
1412 icmp6_dst_gc_list = &rt->dst;
3b00944c 1413 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1414
5578689a 1415 fib6_force_start_gc(net);
1da177e4 1416
87a11578
DM
1417 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1418
1da177e4 1419out:
87a11578 1420 return dst;
1da177e4
LT
1421}
1422
3d0f24a7 1423int icmp6_dst_gc(void)
1da177e4 1424{
e9476e95 1425 struct dst_entry *dst, **pprev;
3d0f24a7 1426 int more = 0;
1da177e4 1427
3b00944c
YH
1428 spin_lock_bh(&icmp6_dst_lock);
1429 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1430
1da177e4
LT
1431 while ((dst = *pprev) != NULL) {
1432 if (!atomic_read(&dst->__refcnt)) {
1433 *pprev = dst->next;
1434 dst_free(dst);
1da177e4
LT
1435 } else {
1436 pprev = &dst->next;
3d0f24a7 1437 ++more;
1da177e4
LT
1438 }
1439 }
1440
3b00944c 1441 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1442
3d0f24a7 1443 return more;
1da177e4
LT
1444}
1445
1e493d19
DM
1446static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1447 void *arg)
1448{
1449 struct dst_entry *dst, **pprev;
1450
1451 spin_lock_bh(&icmp6_dst_lock);
1452 pprev = &icmp6_dst_gc_list;
1453 while ((dst = *pprev) != NULL) {
1454 struct rt6_info *rt = (struct rt6_info *) dst;
1455 if (func(rt, arg)) {
1456 *pprev = dst->next;
1457 dst_free(dst);
1458 } else {
1459 pprev = &dst->next;
1460 }
1461 }
1462 spin_unlock_bh(&icmp6_dst_lock);
1463}
1464
569d3645 1465static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1466{
86393e52 1467 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1468 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1469 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1470 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1471 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1472 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1473 int entries;
7019b78e 1474
fc66f95c 1475 entries = dst_entries_get_fast(ops);
49a18d86 1476 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1477 entries <= rt_max_size)
1da177e4
LT
1478 goto out;
1479
6891a346 1480 net->ipv6.ip6_rt_gc_expire++;
14956643 1481 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1482 entries = dst_entries_get_slow(ops);
1483 if (entries < ops->gc_thresh)
7019b78e 1484 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1485out:
7019b78e 1486 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1487 return entries > rt_max_size;
1da177e4
LT
1488}
1489
e715b6d3
FW
1490static int ip6_convert_metrics(struct mx6_config *mxc,
1491 const struct fib6_config *cfg)
1492{
1493 struct nlattr *nla;
1494 int remaining;
1495 u32 *mp;
1496
63159f29 1497 if (!cfg->fc_mx)
e715b6d3
FW
1498 return 0;
1499
1500 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1501 if (unlikely(!mp))
1502 return -ENOMEM;
1503
1504 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1505 int type = nla_type(nla);
1506
1507 if (type) {
ea697639
DB
1508 u32 val;
1509
e715b6d3
FW
1510 if (unlikely(type > RTAX_MAX))
1511 goto err;
ea697639
DB
1512 if (type == RTAX_CC_ALGO) {
1513 char tmp[TCP_CA_NAME_MAX];
1514
1515 nla_strlcpy(tmp, nla, sizeof(tmp));
1516 val = tcp_ca_get_key_by_name(tmp);
1517 if (val == TCP_CA_UNSPEC)
1518 goto err;
1519 } else {
1520 val = nla_get_u32(nla);
1521 }
e715b6d3 1522
ea697639 1523 mp[type - 1] = val;
e715b6d3
FW
1524 __set_bit(type - 1, mxc->mx_valid);
1525 }
1526 }
1527
1528 mxc->mx = mp;
1529
1530 return 0;
1531 err:
1532 kfree(mp);
1533 return -EINVAL;
1534}
1da177e4 1535
86872cb5 1536int ip6_route_add(struct fib6_config *cfg)
1da177e4
LT
1537{
1538 int err;
5578689a 1539 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1540 struct rt6_info *rt = NULL;
1541 struct net_device *dev = NULL;
1542 struct inet6_dev *idev = NULL;
c71099ac 1543 struct fib6_table *table;
e715b6d3 1544 struct mx6_config mxc = { .mx = NULL, };
1da177e4
LT
1545 int addr_type;
1546
86872cb5 1547 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1da177e4
LT
1548 return -EINVAL;
1549#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1550 if (cfg->fc_src_len)
1da177e4
LT
1551 return -EINVAL;
1552#endif
86872cb5 1553 if (cfg->fc_ifindex) {
1da177e4 1554 err = -ENODEV;
5578689a 1555 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1556 if (!dev)
1557 goto out;
1558 idev = in6_dev_get(dev);
1559 if (!idev)
1560 goto out;
1561 }
1562
86872cb5
TG
1563 if (cfg->fc_metric == 0)
1564 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1565
d71314b4 1566 err = -ENOBUFS;
38308473
DM
1567 if (cfg->fc_nlinfo.nlh &&
1568 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1569 table = fib6_get_table(net, cfg->fc_table);
38308473 1570 if (!table) {
f3213831 1571 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1572 table = fib6_new_table(net, cfg->fc_table);
1573 }
1574 } else {
1575 table = fib6_new_table(net, cfg->fc_table);
1576 }
38308473
DM
1577
1578 if (!table)
c71099ac 1579 goto out;
c71099ac 1580
c88507fb 1581 rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
1da177e4 1582
38308473 1583 if (!rt) {
1da177e4
LT
1584 err = -ENOMEM;
1585 goto out;
1586 }
1587
1716a961
G
1588 if (cfg->fc_flags & RTF_EXPIRES)
1589 rt6_set_expires(rt, jiffies +
1590 clock_t_to_jiffies(cfg->fc_expires));
1591 else
1592 rt6_clean_expires(rt);
1da177e4 1593
86872cb5
TG
1594 if (cfg->fc_protocol == RTPROT_UNSPEC)
1595 cfg->fc_protocol = RTPROT_BOOT;
1596 rt->rt6i_protocol = cfg->fc_protocol;
1597
1598 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1599
1600 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1601 rt->dst.input = ip6_mc_input;
ab79ad14
1602 else if (cfg->fc_flags & RTF_LOCAL)
1603 rt->dst.input = ip6_input;
1da177e4 1604 else
d8d1f30b 1605 rt->dst.input = ip6_forward;
1da177e4 1606
d8d1f30b 1607 rt->dst.output = ip6_output;
1da177e4 1608
86872cb5
TG
1609 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1610 rt->rt6i_dst.plen = cfg->fc_dst_len;
e5fd387a
MK
1611 if (rt->rt6i_dst.plen == 128) {
1612 rt->dst.flags |= DST_HOST;
1613 dst_metrics_set_force_overwrite(&rt->dst);
8e2ec639 1614 }
e5fd387a 1615
1da177e4 1616#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1617 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1618 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1619#endif
1620
86872cb5 1621 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1622
1623 /* We cannot add true routes via loopback here,
1624 they would result in kernel looping; promote them to reject routes
1625 */
86872cb5 1626 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1627 (dev && (dev->flags & IFF_LOOPBACK) &&
1628 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1629 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1630 /* hold loopback dev/idev if we haven't done so. */
5578689a 1631 if (dev != net->loopback_dev) {
1da177e4
LT
1632 if (dev) {
1633 dev_put(dev);
1634 in6_dev_put(idev);
1635 }
5578689a 1636 dev = net->loopback_dev;
1da177e4
LT
1637 dev_hold(dev);
1638 idev = in6_dev_get(dev);
1639 if (!idev) {
1640 err = -ENODEV;
1641 goto out;
1642 }
1643 }
1da177e4 1644 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1645 switch (cfg->fc_type) {
1646 case RTN_BLACKHOLE:
1647 rt->dst.error = -EINVAL;
aad88724 1648 rt->dst.output = dst_discard_sk;
7150aede 1649 rt->dst.input = dst_discard;
ef2c7d7b
ND
1650 break;
1651 case RTN_PROHIBIT:
1652 rt->dst.error = -EACCES;
7150aede
K
1653 rt->dst.output = ip6_pkt_prohibit_out;
1654 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1655 break;
b4949ab2 1656 case RTN_THROW:
ef2c7d7b 1657 default:
7150aede
K
1658 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1659 : -ENETUNREACH;
1660 rt->dst.output = ip6_pkt_discard_out;
1661 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1662 break;
1663 }
1da177e4
LT
1664 goto install_route;
1665 }
1666
86872cb5 1667 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1668 const struct in6_addr *gw_addr;
1da177e4
LT
1669 int gwa_type;
1670
86872cb5 1671 gw_addr = &cfg->fc_gateway;
4e3fd7a0 1672 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1673 gwa_type = ipv6_addr_type(gw_addr);
1674
1675 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1676 struct rt6_info *grt;
1677
1678 /* IPv6 strictly inhibits using not link-local
1679 addresses as nexthop address.
1680 Otherwise, router will not able to send redirects.
1681 It is very good, but in some (rare!) circumstances
1682 (SIT, PtP, NBMA NOARP links) it is handy to allow
1683 some exceptions. --ANK
1684 */
1685 err = -EINVAL;
38308473 1686 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1687 goto out;
1688
5578689a 1689 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1690
1691 err = -EHOSTUNREACH;
38308473 1692 if (!grt)
1da177e4
LT
1693 goto out;
1694 if (dev) {
d1918542 1695 if (dev != grt->dst.dev) {
94e187c0 1696 ip6_rt_put(grt);
1da177e4
LT
1697 goto out;
1698 }
1699 } else {
d1918542 1700 dev = grt->dst.dev;
1da177e4
LT
1701 idev = grt->rt6i_idev;
1702 dev_hold(dev);
1703 in6_dev_hold(grt->rt6i_idev);
1704 }
38308473 1705 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1706 err = 0;
94e187c0 1707 ip6_rt_put(grt);
1da177e4
LT
1708
1709 if (err)
1710 goto out;
1711 }
1712 err = -EINVAL;
38308473 1713 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1714 goto out;
1715 }
1716
1717 err = -ENODEV;
38308473 1718 if (!dev)
1da177e4
LT
1719 goto out;
1720
c3968a85
DW
1721 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1722 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1723 err = -EINVAL;
1724 goto out;
1725 }
4e3fd7a0 1726 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1727 rt->rt6i_prefsrc.plen = 128;
1728 } else
1729 rt->rt6i_prefsrc.plen = 0;
1730
86872cb5 1731 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1732
1733install_route:
d8d1f30b 1734 rt->dst.dev = dev;
1da177e4 1735 rt->rt6i_idev = idev;
c71099ac 1736 rt->rt6i_table = table;
63152fc0 1737
c346dca1 1738 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1739
e715b6d3
FW
1740 err = ip6_convert_metrics(&mxc, cfg);
1741 if (err)
1742 goto out;
1da177e4 1743
e715b6d3
FW
1744 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1745
1746 kfree(mxc.mx);
1747 return err;
1da177e4
LT
1748out:
1749 if (dev)
1750 dev_put(dev);
1751 if (idev)
1752 in6_dev_put(idev);
1753 if (rt)
d8d1f30b 1754 dst_free(&rt->dst);
1da177e4
LT
1755 return err;
1756}
1757
86872cb5 1758static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
1759{
1760 int err;
c71099ac 1761 struct fib6_table *table;
d1918542 1762 struct net *net = dev_net(rt->dst.dev);
1da177e4 1763
6825a26c
G
1764 if (rt == net->ipv6.ip6_null_entry) {
1765 err = -ENOENT;
1766 goto out;
1767 }
6c813a72 1768
c71099ac
TG
1769 table = rt->rt6i_table;
1770 write_lock_bh(&table->tb6_lock);
86872cb5 1771 err = fib6_del(rt, info);
c71099ac 1772 write_unlock_bh(&table->tb6_lock);
1da177e4 1773
6825a26c 1774out:
94e187c0 1775 ip6_rt_put(rt);
1da177e4
LT
1776 return err;
1777}
1778
e0a1ad73
TG
1779int ip6_del_rt(struct rt6_info *rt)
1780{
4d1169c1 1781 struct nl_info info = {
d1918542 1782 .nl_net = dev_net(rt->dst.dev),
4d1169c1 1783 };
528c4ceb 1784 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
1785}
1786
86872cb5 1787static int ip6_route_del(struct fib6_config *cfg)
1da177e4 1788{
c71099ac 1789 struct fib6_table *table;
1da177e4
LT
1790 struct fib6_node *fn;
1791 struct rt6_info *rt;
1792 int err = -ESRCH;
1793
5578689a 1794 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 1795 if (!table)
c71099ac
TG
1796 return err;
1797
1798 read_lock_bh(&table->tb6_lock);
1da177e4 1799
c71099ac 1800 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
1801 &cfg->fc_dst, cfg->fc_dst_len,
1802 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 1803
1da177e4 1804 if (fn) {
d8d1f30b 1805 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
1806 if ((rt->rt6i_flags & RTF_CACHE) &&
1807 !(cfg->fc_flags & RTF_CACHE))
1808 continue;
86872cb5 1809 if (cfg->fc_ifindex &&
d1918542
DM
1810 (!rt->dst.dev ||
1811 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 1812 continue;
86872cb5
TG
1813 if (cfg->fc_flags & RTF_GATEWAY &&
1814 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 1815 continue;
86872cb5 1816 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 1817 continue;
d8d1f30b 1818 dst_hold(&rt->dst);
c71099ac 1819 read_unlock_bh(&table->tb6_lock);
1da177e4 1820
86872cb5 1821 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
1822 }
1823 }
c71099ac 1824 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
1825
1826 return err;
1827}
1828
6700c270 1829static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 1830{
e8599ff4 1831 struct net *net = dev_net(skb->dev);
a6279458 1832 struct netevent_redirect netevent;
e8599ff4 1833 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
1834 struct ndisc_options ndopts;
1835 struct inet6_dev *in6_dev;
1836 struct neighbour *neigh;
71bcdba0 1837 struct rd_msg *msg;
6e157b6a
DM
1838 int optlen, on_link;
1839 u8 *lladdr;
e8599ff4 1840
29a3cad5 1841 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 1842 optlen -= sizeof(*msg);
e8599ff4
DM
1843
1844 if (optlen < 0) {
6e157b6a 1845 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
1846 return;
1847 }
1848
71bcdba0 1849 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 1850
71bcdba0 1851 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 1852 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
1853 return;
1854 }
1855
6e157b6a 1856 on_link = 0;
71bcdba0 1857 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 1858 on_link = 1;
71bcdba0 1859 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 1860 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 1861 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
1862 return;
1863 }
1864
1865 in6_dev = __in6_dev_get(skb->dev);
1866 if (!in6_dev)
1867 return;
1868 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1869 return;
1870
1871 /* RFC2461 8.1:
1872 * The IP source address of the Redirect MUST be the same as the current
1873 * first-hop router for the specified ICMP Destination Address.
1874 */
1875
71bcdba0 1876 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
1877 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1878 return;
1879 }
6e157b6a
DM
1880
1881 lladdr = NULL;
e8599ff4
DM
1882 if (ndopts.nd_opts_tgt_lladdr) {
1883 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1884 skb->dev);
1885 if (!lladdr) {
1886 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1887 return;
1888 }
1889 }
1890
6e157b6a
DM
1891 rt = (struct rt6_info *) dst;
1892 if (rt == net->ipv6.ip6_null_entry) {
1893 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 1894 return;
6e157b6a 1895 }
e8599ff4 1896
6e157b6a
DM
1897 /* Redirect received -> path was valid.
1898 * Look, redirects are sent only in response to data packets,
1899 * so that this nexthop apparently is reachable. --ANK
1900 */
1901 dst_confirm(&rt->dst);
a6279458 1902
71bcdba0 1903 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
1904 if (!neigh)
1905 return;
a6279458 1906
1da177e4
LT
1907 /*
1908 * We have finally decided to accept it.
1909 */
1910
1ab1457c 1911 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
1912 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1913 NEIGH_UPDATE_F_OVERRIDE|
1914 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1915 NEIGH_UPDATE_F_ISROUTER))
1916 );
1917
71bcdba0 1918 nrt = ip6_rt_copy(rt, &msg->dest);
38308473 1919 if (!nrt)
1da177e4
LT
1920 goto out;
1921
1922 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1923 if (on_link)
1924 nrt->rt6i_flags &= ~RTF_GATEWAY;
1925
4e3fd7a0 1926 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 1927
40e22e8f 1928 if (ip6_ins_rt(nrt))
1da177e4
LT
1929 goto out;
1930
d8d1f30b
CG
1931 netevent.old = &rt->dst;
1932 netevent.new = &nrt->dst;
71bcdba0 1933 netevent.daddr = &msg->dest;
60592833 1934 netevent.neigh = neigh;
8d71740c
TT
1935 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1936
38308473 1937 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 1938 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 1939 ip6_del_rt(rt);
1da177e4
LT
1940 }
1941
1942out:
e8599ff4 1943 neigh_release(neigh);
6e157b6a
DM
1944}
1945
1da177e4
LT
1946/*
1947 * Misc support functions
1948 */
1949
1716a961 1950static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
21efcfa0 1951 const struct in6_addr *dest)
1da177e4 1952{
d1918542 1953 struct net *net = dev_net(ort->dst.dev);
8b96d22d
DM
1954 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1955 ort->rt6i_table);
1da177e4
LT
1956
1957 if (rt) {
d8d1f30b
CG
1958 rt->dst.input = ort->dst.input;
1959 rt->dst.output = ort->dst.output;
8e2ec639 1960 rt->dst.flags |= DST_HOST;
d8d1f30b 1961
4e3fd7a0 1962 rt->rt6i_dst.addr = *dest;
8e2ec639 1963 rt->rt6i_dst.plen = 128;
defb3519 1964 dst_copy_metrics(&rt->dst, &ort->dst);
d8d1f30b 1965 rt->dst.error = ort->dst.error;
1da177e4
LT
1966 rt->rt6i_idev = ort->rt6i_idev;
1967 if (rt->rt6i_idev)
1968 in6_dev_hold(rt->rt6i_idev);
d8d1f30b 1969 rt->dst.lastuse = jiffies;
1da177e4 1970
550bab42
JA
1971 if (ort->rt6i_flags & RTF_GATEWAY)
1972 rt->rt6i_gateway = ort->rt6i_gateway;
1973 else
1974 rt->rt6i_gateway = *dest;
1716a961 1975 rt->rt6i_flags = ort->rt6i_flags;
24f5b855 1976 rt6_set_from(rt, ort);
1da177e4
LT
1977 rt->rt6i_metric = 0;
1978
1da177e4
LT
1979#ifdef CONFIG_IPV6_SUBTREES
1980 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1981#endif
0f6c6392 1982 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
c71099ac 1983 rt->rt6i_table = ort->rt6i_table;
1da177e4
LT
1984 }
1985 return rt;
1986}
1987
70ceb4f5 1988#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 1989static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
1990 const struct in6_addr *prefix, int prefixlen,
1991 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
1992{
1993 struct fib6_node *fn;
1994 struct rt6_info *rt = NULL;
c71099ac
TG
1995 struct fib6_table *table;
1996
efa2cea0 1997 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 1998 if (!table)
c71099ac 1999 return NULL;
70ceb4f5 2000
5744dd9b 2001 read_lock_bh(&table->tb6_lock);
67ba4152 2002 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2003 if (!fn)
2004 goto out;
2005
d8d1f30b 2006 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2007 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2008 continue;
2009 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2010 continue;
2011 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2012 continue;
d8d1f30b 2013 dst_hold(&rt->dst);
70ceb4f5
YH
2014 break;
2015 }
2016out:
5744dd9b 2017 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2018 return rt;
2019}
2020
efa2cea0 2021static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2022 const struct in6_addr *prefix, int prefixlen,
2023 const struct in6_addr *gwaddr, int ifindex,
95c96174 2024 unsigned int pref)
70ceb4f5 2025{
86872cb5
TG
2026 struct fib6_config cfg = {
2027 .fc_table = RT6_TABLE_INFO,
238fc7ea 2028 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2029 .fc_ifindex = ifindex,
2030 .fc_dst_len = prefixlen,
2031 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2032 RTF_UP | RTF_PREF(pref),
15e47304 2033 .fc_nlinfo.portid = 0,
efa2cea0
DL
2034 .fc_nlinfo.nlh = NULL,
2035 .fc_nlinfo.nl_net = net,
86872cb5
TG
2036 };
2037
4e3fd7a0
AD
2038 cfg.fc_dst = *prefix;
2039 cfg.fc_gateway = *gwaddr;
70ceb4f5 2040
e317da96
YH
2041 /* We should treat it as a default route if prefix length is 0. */
2042 if (!prefixlen)
86872cb5 2043 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2044
86872cb5 2045 ip6_route_add(&cfg);
70ceb4f5 2046
efa2cea0 2047 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2048}
2049#endif
2050
b71d1d42 2051struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2052{
1da177e4 2053 struct rt6_info *rt;
c71099ac 2054 struct fib6_table *table;
1da177e4 2055
c346dca1 2056 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2057 if (!table)
c71099ac 2058 return NULL;
1da177e4 2059
5744dd9b 2060 read_lock_bh(&table->tb6_lock);
67ba4152 2061 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2062 if (dev == rt->dst.dev &&
045927ff 2063 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2064 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2065 break;
2066 }
2067 if (rt)
d8d1f30b 2068 dst_hold(&rt->dst);
5744dd9b 2069 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2070 return rt;
2071}
2072
b71d1d42 2073struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2074 struct net_device *dev,
2075 unsigned int pref)
1da177e4 2076{
86872cb5
TG
2077 struct fib6_config cfg = {
2078 .fc_table = RT6_TABLE_DFLT,
238fc7ea 2079 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2080 .fc_ifindex = dev->ifindex,
2081 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2082 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2083 .fc_nlinfo.portid = 0,
5578689a 2084 .fc_nlinfo.nlh = NULL,
c346dca1 2085 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2086 };
1da177e4 2087
4e3fd7a0 2088 cfg.fc_gateway = *gwaddr;
1da177e4 2089
86872cb5 2090 ip6_route_add(&cfg);
1da177e4 2091
1da177e4
LT
2092 return rt6_get_dflt_router(gwaddr, dev);
2093}
2094
7b4da532 2095void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2096{
2097 struct rt6_info *rt;
c71099ac
TG
2098 struct fib6_table *table;
2099
2100 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2101 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2102 if (!table)
c71099ac 2103 return;
1da177e4
LT
2104
2105restart:
c71099ac 2106 read_lock_bh(&table->tb6_lock);
d8d1f30b 2107 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2108 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2109 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2110 dst_hold(&rt->dst);
c71099ac 2111 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2112 ip6_del_rt(rt);
1da177e4
LT
2113 goto restart;
2114 }
2115 }
c71099ac 2116 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2117}
2118
5578689a
DL
2119static void rtmsg_to_fib6_config(struct net *net,
2120 struct in6_rtmsg *rtmsg,
86872cb5
TG
2121 struct fib6_config *cfg)
2122{
2123 memset(cfg, 0, sizeof(*cfg));
2124
2125 cfg->fc_table = RT6_TABLE_MAIN;
2126 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2127 cfg->fc_metric = rtmsg->rtmsg_metric;
2128 cfg->fc_expires = rtmsg->rtmsg_info;
2129 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2130 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2131 cfg->fc_flags = rtmsg->rtmsg_flags;
2132
5578689a 2133 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2134
4e3fd7a0
AD
2135 cfg->fc_dst = rtmsg->rtmsg_dst;
2136 cfg->fc_src = rtmsg->rtmsg_src;
2137 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2138}
2139
5578689a 2140int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2141{
86872cb5 2142 struct fib6_config cfg;
1da177e4
LT
2143 struct in6_rtmsg rtmsg;
2144 int err;
2145
67ba4152 2146 switch (cmd) {
1da177e4
LT
2147 case SIOCADDRT: /* Add a route */
2148 case SIOCDELRT: /* Delete a route */
af31f412 2149 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2150 return -EPERM;
2151 err = copy_from_user(&rtmsg, arg,
2152 sizeof(struct in6_rtmsg));
2153 if (err)
2154 return -EFAULT;
86872cb5 2155
5578689a 2156 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2157
1da177e4
LT
2158 rtnl_lock();
2159 switch (cmd) {
2160 case SIOCADDRT:
86872cb5 2161 err = ip6_route_add(&cfg);
1da177e4
LT
2162 break;
2163 case SIOCDELRT:
86872cb5 2164 err = ip6_route_del(&cfg);
1da177e4
LT
2165 break;
2166 default:
2167 err = -EINVAL;
2168 }
2169 rtnl_unlock();
2170
2171 return err;
3ff50b79 2172 }
1da177e4
LT
2173
2174 return -EINVAL;
2175}
2176
2177/*
2178 * Drop the packet on the floor
2179 */
2180
d5fdd6ba 2181static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2182{
612f09e8 2183 int type;
adf30907 2184 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2185 switch (ipstats_mib_noroutes) {
2186 case IPSTATS_MIB_INNOROUTES:
0660e03f 2187 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2188 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2189 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2190 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2191 break;
2192 }
2193 /* FALLTHROUGH */
2194 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2195 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2196 ipstats_mib_noroutes);
612f09e8
YH
2197 break;
2198 }
3ffe533c 2199 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2200 kfree_skb(skb);
2201 return 0;
2202}
2203
9ce8ade0
TG
2204static int ip6_pkt_discard(struct sk_buff *skb)
2205{
612f09e8 2206 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2207}
2208
aad88724 2209static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
1da177e4 2210{
adf30907 2211 skb->dev = skb_dst(skb)->dev;
612f09e8 2212 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2213}
2214
9ce8ade0
TG
2215static int ip6_pkt_prohibit(struct sk_buff *skb)
2216{
612f09e8 2217 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2218}
2219
aad88724 2220static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
9ce8ade0 2221{
adf30907 2222 skb->dev = skb_dst(skb)->dev;
612f09e8 2223 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2224}
2225
1da177e4
LT
2226/*
2227 * Allocate a dst for local (unicast / anycast) address.
2228 */
2229
2230struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2231 const struct in6_addr *addr,
8f031519 2232 bool anycast)
1da177e4 2233{
c346dca1 2234 struct net *net = dev_net(idev->dev);
a3300ef4
HFS
2235 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2236 DST_NOCOUNT, NULL);
2237 if (!rt)
1da177e4
LT
2238 return ERR_PTR(-ENOMEM);
2239
1da177e4
LT
2240 in6_dev_hold(idev);
2241
11d53b49 2242 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2243 rt->dst.input = ip6_input;
2244 rt->dst.output = ip6_output;
1da177e4 2245 rt->rt6i_idev = idev;
1da177e4
LT
2246
2247 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2248 if (anycast)
2249 rt->rt6i_flags |= RTF_ANYCAST;
2250 else
1da177e4 2251 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2252
550bab42 2253 rt->rt6i_gateway = *addr;
4e3fd7a0 2254 rt->rt6i_dst.addr = *addr;
1da177e4 2255 rt->rt6i_dst.plen = 128;
5578689a 2256 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1da177e4 2257
d8d1f30b 2258 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2259
2260 return rt;
2261}
2262
c3968a85
DW
2263int ip6_route_get_saddr(struct net *net,
2264 struct rt6_info *rt,
b71d1d42 2265 const struct in6_addr *daddr,
c3968a85
DW
2266 unsigned int prefs,
2267 struct in6_addr *saddr)
2268{
67ba4152 2269 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt);
c3968a85
DW
2270 int err = 0;
2271 if (rt->rt6i_prefsrc.plen)
4e3fd7a0 2272 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2273 else
2274 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2275 daddr, prefs, saddr);
2276 return err;
2277}
2278
2279/* remove deleted ip from prefsrc entries */
2280struct arg_dev_net_ip {
2281 struct net_device *dev;
2282 struct net *net;
2283 struct in6_addr *addr;
2284};
2285
2286static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2287{
2288 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2289 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2290 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2291
d1918542 2292 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2293 rt != net->ipv6.ip6_null_entry &&
2294 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2295 /* remove prefsrc entry */
2296 rt->rt6i_prefsrc.plen = 0;
2297 }
2298 return 0;
2299}
2300
2301void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2302{
2303 struct net *net = dev_net(ifp->idev->dev);
2304 struct arg_dev_net_ip adni = {
2305 .dev = ifp->idev->dev,
2306 .net = net,
2307 .addr = &ifp->addr,
2308 };
0c3584d5 2309 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2310}
2311
be7a010d
DJ
2312#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2313#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2314
2315/* Remove routers and update dst entries when gateway turn into host. */
2316static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2317{
2318 struct in6_addr *gateway = (struct in6_addr *)arg;
2319
2320 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2321 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2322 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2323 return -1;
2324 }
2325 return 0;
2326}
2327
2328void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2329{
2330 fib6_clean_all(net, fib6_clean_tohost, gateway);
2331}
2332
8ed67789
DL
2333struct arg_dev_net {
2334 struct net_device *dev;
2335 struct net *net;
2336};
2337
1da177e4
LT
2338static int fib6_ifdown(struct rt6_info *rt, void *arg)
2339{
bc3ef660 2340 const struct arg_dev_net *adn = arg;
2341 const struct net_device *dev = adn->dev;
8ed67789 2342
d1918542 2343 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2344 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2345 return -1;
c159d30c 2346
1da177e4
LT
2347 return 0;
2348}
2349
f3db4851 2350void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2351{
8ed67789
DL
2352 struct arg_dev_net adn = {
2353 .dev = dev,
2354 .net = net,
2355 };
2356
0c3584d5 2357 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2358 icmp6_clean_all(fib6_ifdown, &adn);
1da177e4
LT
2359}
2360
95c96174 2361struct rt6_mtu_change_arg {
1da177e4 2362 struct net_device *dev;
95c96174 2363 unsigned int mtu;
1da177e4
LT
2364};
2365
2366static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2367{
2368 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2369 struct inet6_dev *idev;
2370
2371 /* In IPv6 pmtu discovery is not optional,
2372 so that RTAX_MTU lock cannot disable it.
2373 We still use this lock to block changes
2374 caused by addrconf/ndisc.
2375 */
2376
2377 idev = __in6_dev_get(arg->dev);
38308473 2378 if (!idev)
1da177e4
LT
2379 return 0;
2380
2381 /* For administrative MTU increase, there is no way to discover
2382 IPv6 PMTU increase, so PMTU increase should be updated here.
2383 Since RFC 1981 doesn't include administrative MTU increase
2384 update PMTU increase is a MUST. (i.e. jumbo frame)
2385 */
2386 /*
2387 If new MTU is less than route PMTU, this new MTU will be the
2388 lowest MTU in the path, update the route PMTU to reflect PMTU
2389 decreases; if new MTU is greater than route PMTU, and the
2390 old MTU is the lowest MTU in the path, update the route PMTU
2391 to reflect the increase. In this case if the other nodes' MTU
2392 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2393 PMTU discouvery.
2394 */
d1918542 2395 if (rt->dst.dev == arg->dev &&
d8d1f30b
CG
2396 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2397 (dst_mtu(&rt->dst) >= arg->mtu ||
2398 (dst_mtu(&rt->dst) < arg->mtu &&
2399 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
defb3519 2400 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
566cfd8f 2401 }
1da177e4
LT
2402 return 0;
2403}
2404
95c96174 2405void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2406{
c71099ac
TG
2407 struct rt6_mtu_change_arg arg = {
2408 .dev = dev,
2409 .mtu = mtu,
2410 };
1da177e4 2411
0c3584d5 2412 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2413}
2414
ef7c79ed 2415static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2416 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2417 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2418 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2419 [RTA_PRIORITY] = { .type = NLA_U32 },
2420 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2421 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2422 [RTA_PREF] = { .type = NLA_U8 },
86872cb5
TG
2423};
2424
2425static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2426 struct fib6_config *cfg)
1da177e4 2427{
86872cb5
TG
2428 struct rtmsg *rtm;
2429 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2430 unsigned int pref;
86872cb5 2431 int err;
1da177e4 2432
86872cb5
TG
2433 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2434 if (err < 0)
2435 goto errout;
1da177e4 2436
86872cb5
TG
2437 err = -EINVAL;
2438 rtm = nlmsg_data(nlh);
2439 memset(cfg, 0, sizeof(*cfg));
2440
2441 cfg->fc_table = rtm->rtm_table;
2442 cfg->fc_dst_len = rtm->rtm_dst_len;
2443 cfg->fc_src_len = rtm->rtm_src_len;
2444 cfg->fc_flags = RTF_UP;
2445 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2446 cfg->fc_type = rtm->rtm_type;
86872cb5 2447
ef2c7d7b
ND
2448 if (rtm->rtm_type == RTN_UNREACHABLE ||
2449 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2450 rtm->rtm_type == RTN_PROHIBIT ||
2451 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2452 cfg->fc_flags |= RTF_REJECT;
2453
ab79ad14
2454 if (rtm->rtm_type == RTN_LOCAL)
2455 cfg->fc_flags |= RTF_LOCAL;
2456
1f56a01f
MKL
2457 if (rtm->rtm_flags & RTM_F_CLONED)
2458 cfg->fc_flags |= RTF_CACHE;
2459
15e47304 2460 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2461 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2462 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2463
2464 if (tb[RTA_GATEWAY]) {
67b61f6c 2465 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2466 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2467 }
86872cb5
TG
2468
2469 if (tb[RTA_DST]) {
2470 int plen = (rtm->rtm_dst_len + 7) >> 3;
2471
2472 if (nla_len(tb[RTA_DST]) < plen)
2473 goto errout;
2474
2475 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2476 }
86872cb5
TG
2477
2478 if (tb[RTA_SRC]) {
2479 int plen = (rtm->rtm_src_len + 7) >> 3;
2480
2481 if (nla_len(tb[RTA_SRC]) < plen)
2482 goto errout;
2483
2484 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2485 }
86872cb5 2486
c3968a85 2487 if (tb[RTA_PREFSRC])
67b61f6c 2488 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2489
86872cb5
TG
2490 if (tb[RTA_OIF])
2491 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2492
2493 if (tb[RTA_PRIORITY])
2494 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2495
2496 if (tb[RTA_METRICS]) {
2497 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2498 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2499 }
86872cb5
TG
2500
2501 if (tb[RTA_TABLE])
2502 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2503
51ebd318
ND
2504 if (tb[RTA_MULTIPATH]) {
2505 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2506 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2507 }
2508
c78ba6d6
LR
2509 if (tb[RTA_PREF]) {
2510 pref = nla_get_u8(tb[RTA_PREF]);
2511 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2512 pref != ICMPV6_ROUTER_PREF_HIGH)
2513 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2514 cfg->fc_flags |= RTF_PREF(pref);
2515 }
2516
86872cb5
TG
2517 err = 0;
2518errout:
2519 return err;
1da177e4
LT
2520}
2521
51ebd318
ND
2522static int ip6_route_multipath(struct fib6_config *cfg, int add)
2523{
2524 struct fib6_config r_cfg;
2525 struct rtnexthop *rtnh;
2526 int remaining;
2527 int attrlen;
2528 int err = 0, last_err = 0;
2529
2530beginning:
2531 rtnh = (struct rtnexthop *)cfg->fc_mp;
2532 remaining = cfg->fc_mp_len;
2533
2534 /* Parse a Multipath Entry */
2535 while (rtnh_ok(rtnh, remaining)) {
2536 memcpy(&r_cfg, cfg, sizeof(*cfg));
2537 if (rtnh->rtnh_ifindex)
2538 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2539
2540 attrlen = rtnh_attrlen(rtnh);
2541 if (attrlen > 0) {
2542 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2543
2544 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2545 if (nla) {
67b61f6c 2546 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2547 r_cfg.fc_flags |= RTF_GATEWAY;
2548 }
2549 }
2550 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2551 if (err) {
2552 last_err = err;
2553 /* If we are trying to remove a route, do not stop the
2554 * loop when ip6_route_del() fails (because next hop is
2555 * already gone), we should try to remove all next hops.
2556 */
2557 if (add) {
2558 /* If add fails, we should try to delete all
2559 * next hops that have been already added.
2560 */
2561 add = 0;
2562 goto beginning;
2563 }
2564 }
1a72418b
ND
2565 /* Because each route is added like a single route we remove
2566 * this flag after the first nexthop (if there is a collision,
2567 * we have already fail to add the first nexthop:
2568 * fib6_add_rt2node() has reject it).
2569 */
2570 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
51ebd318
ND
2571 rtnh = rtnh_next(rtnh, &remaining);
2572 }
2573
2574 return last_err;
2575}
2576
67ba4152 2577static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2578{
86872cb5
TG
2579 struct fib6_config cfg;
2580 int err;
1da177e4 2581
86872cb5
TG
2582 err = rtm_to_fib6_config(skb, nlh, &cfg);
2583 if (err < 0)
2584 return err;
2585
51ebd318
ND
2586 if (cfg.fc_mp)
2587 return ip6_route_multipath(&cfg, 0);
2588 else
2589 return ip6_route_del(&cfg);
1da177e4
LT
2590}
2591
67ba4152 2592static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2593{
86872cb5
TG
2594 struct fib6_config cfg;
2595 int err;
1da177e4 2596
86872cb5
TG
2597 err = rtm_to_fib6_config(skb, nlh, &cfg);
2598 if (err < 0)
2599 return err;
2600
51ebd318
ND
2601 if (cfg.fc_mp)
2602 return ip6_route_multipath(&cfg, 1);
2603 else
2604 return ip6_route_add(&cfg);
1da177e4
LT
2605}
2606
339bf98f
TG
2607static inline size_t rt6_nlmsg_size(void)
2608{
2609 return NLMSG_ALIGN(sizeof(struct rtmsg))
2610 + nla_total_size(16) /* RTA_SRC */
2611 + nla_total_size(16) /* RTA_DST */
2612 + nla_total_size(16) /* RTA_GATEWAY */
2613 + nla_total_size(16) /* RTA_PREFSRC */
2614 + nla_total_size(4) /* RTA_TABLE */
2615 + nla_total_size(4) /* RTA_IIF */
2616 + nla_total_size(4) /* RTA_OIF */
2617 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 2618 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 2619 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6
LR
2620 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
2621 + nla_total_size(1); /* RTA_PREF */
339bf98f
TG
2622}
2623
191cd582
BH
2624static int rt6_fill_node(struct net *net,
2625 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 2626 struct in6_addr *dst, struct in6_addr *src,
15e47304 2627 int iif, int type, u32 portid, u32 seq,
7bc570c8 2628 int prefix, int nowait, unsigned int flags)
1da177e4
LT
2629{
2630 struct rtmsg *rtm;
2d7202bf 2631 struct nlmsghdr *nlh;
e3703b3d 2632 long expires;
9e762a4a 2633 u32 table;
1da177e4
LT
2634
2635 if (prefix) { /* user wants prefix routes only */
2636 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2637 /* success since this is not a prefix route */
2638 return 1;
2639 }
2640 }
2641
15e47304 2642 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 2643 if (!nlh)
26932566 2644 return -EMSGSIZE;
2d7202bf
TG
2645
2646 rtm = nlmsg_data(nlh);
1da177e4
LT
2647 rtm->rtm_family = AF_INET6;
2648 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2649 rtm->rtm_src_len = rt->rt6i_src.plen;
2650 rtm->rtm_tos = 0;
c71099ac 2651 if (rt->rt6i_table)
9e762a4a 2652 table = rt->rt6i_table->tb6_id;
c71099ac 2653 else
9e762a4a
PM
2654 table = RT6_TABLE_UNSPEC;
2655 rtm->rtm_table = table;
c78679e8
DM
2656 if (nla_put_u32(skb, RTA_TABLE, table))
2657 goto nla_put_failure;
ef2c7d7b
ND
2658 if (rt->rt6i_flags & RTF_REJECT) {
2659 switch (rt->dst.error) {
2660 case -EINVAL:
2661 rtm->rtm_type = RTN_BLACKHOLE;
2662 break;
2663 case -EACCES:
2664 rtm->rtm_type = RTN_PROHIBIT;
2665 break;
b4949ab2
ND
2666 case -EAGAIN:
2667 rtm->rtm_type = RTN_THROW;
2668 break;
ef2c7d7b
ND
2669 default:
2670 rtm->rtm_type = RTN_UNREACHABLE;
2671 break;
2672 }
2673 }
38308473 2674 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 2675 rtm->rtm_type = RTN_LOCAL;
d1918542 2676 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
2677 rtm->rtm_type = RTN_LOCAL;
2678 else
2679 rtm->rtm_type = RTN_UNICAST;
2680 rtm->rtm_flags = 0;
2681 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2682 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 2683 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 2684 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
2685 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2686 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2687 rtm->rtm_protocol = RTPROT_RA;
2688 else
2689 rtm->rtm_protocol = RTPROT_KERNEL;
2690 }
1da177e4 2691
38308473 2692 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
2693 rtm->rtm_flags |= RTM_F_CLONED;
2694
2695 if (dst) {
930345ea 2696 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 2697 goto nla_put_failure;
1ab1457c 2698 rtm->rtm_dst_len = 128;
1da177e4 2699 } else if (rtm->rtm_dst_len)
930345ea 2700 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 2701 goto nla_put_failure;
1da177e4
LT
2702#ifdef CONFIG_IPV6_SUBTREES
2703 if (src) {
930345ea 2704 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 2705 goto nla_put_failure;
1ab1457c 2706 rtm->rtm_src_len = 128;
c78679e8 2707 } else if (rtm->rtm_src_len &&
930345ea 2708 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 2709 goto nla_put_failure;
1da177e4 2710#endif
7bc570c8
YH
2711 if (iif) {
2712#ifdef CONFIG_IPV6_MROUTE
2713 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 2714 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
2715 if (err <= 0) {
2716 if (!nowait) {
2717 if (err == 0)
2718 return 0;
2719 goto nla_put_failure;
2720 } else {
2721 if (err == -EMSGSIZE)
2722 goto nla_put_failure;
2723 }
2724 }
2725 } else
2726#endif
c78679e8
DM
2727 if (nla_put_u32(skb, RTA_IIF, iif))
2728 goto nla_put_failure;
7bc570c8 2729 } else if (dst) {
1da177e4 2730 struct in6_addr saddr_buf;
c78679e8 2731 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 2732 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2733 goto nla_put_failure;
1da177e4 2734 }
2d7202bf 2735
c3968a85
DW
2736 if (rt->rt6i_prefsrc.plen) {
2737 struct in6_addr saddr_buf;
4e3fd7a0 2738 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 2739 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 2740 goto nla_put_failure;
c3968a85
DW
2741 }
2742
defb3519 2743 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2d7202bf
TG
2744 goto nla_put_failure;
2745
dd0cbf29 2746 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 2747 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 2748 goto nla_put_failure;
94f826b8 2749 }
2d7202bf 2750
c78679e8
DM
2751 if (rt->dst.dev &&
2752 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2753 goto nla_put_failure;
2754 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2755 goto nla_put_failure;
8253947e
LW
2756
2757 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 2758
87a50699 2759 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 2760 goto nla_put_failure;
2d7202bf 2761
c78ba6d6
LR
2762 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
2763 goto nla_put_failure;
2764
053c095a
JB
2765 nlmsg_end(skb, nlh);
2766 return 0;
2d7202bf
TG
2767
2768nla_put_failure:
26932566
PM
2769 nlmsg_cancel(skb, nlh);
2770 return -EMSGSIZE;
1da177e4
LT
2771}
2772
1b43af54 2773int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
2774{
2775 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2776 int prefix;
2777
2d7202bf
TG
2778 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2779 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
2780 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2781 } else
2782 prefix = 0;
2783
191cd582
BH
2784 return rt6_fill_node(arg->net,
2785 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 2786 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 2787 prefix, 0, NLM_F_MULTI);
1da177e4
LT
2788}
2789
67ba4152 2790static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 2791{
3b1e0a65 2792 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
2793 struct nlattr *tb[RTA_MAX+1];
2794 struct rt6_info *rt;
1da177e4 2795 struct sk_buff *skb;
ab364a6f 2796 struct rtmsg *rtm;
4c9483b2 2797 struct flowi6 fl6;
72331bc0 2798 int err, iif = 0, oif = 0;
1da177e4 2799
ab364a6f
TG
2800 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2801 if (err < 0)
2802 goto errout;
1da177e4 2803
ab364a6f 2804 err = -EINVAL;
4c9483b2 2805 memset(&fl6, 0, sizeof(fl6));
1da177e4 2806
ab364a6f
TG
2807 if (tb[RTA_SRC]) {
2808 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2809 goto errout;
2810
4e3fd7a0 2811 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
2812 }
2813
2814 if (tb[RTA_DST]) {
2815 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2816 goto errout;
2817
4e3fd7a0 2818 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
2819 }
2820
2821 if (tb[RTA_IIF])
2822 iif = nla_get_u32(tb[RTA_IIF]);
2823
2824 if (tb[RTA_OIF])
72331bc0 2825 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 2826
2e47b291
LC
2827 if (tb[RTA_MARK])
2828 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
2829
1da177e4
LT
2830 if (iif) {
2831 struct net_device *dev;
72331bc0
SL
2832 int flags = 0;
2833
5578689a 2834 dev = __dev_get_by_index(net, iif);
1da177e4
LT
2835 if (!dev) {
2836 err = -ENODEV;
ab364a6f 2837 goto errout;
1da177e4 2838 }
72331bc0
SL
2839
2840 fl6.flowi6_iif = iif;
2841
2842 if (!ipv6_addr_any(&fl6.saddr))
2843 flags |= RT6_LOOKUP_F_HAS_SADDR;
2844
2845 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2846 flags);
2847 } else {
2848 fl6.flowi6_oif = oif;
2849
2850 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
2851 }
2852
ab364a6f 2853 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 2854 if (!skb) {
94e187c0 2855 ip6_rt_put(rt);
ab364a6f
TG
2856 err = -ENOBUFS;
2857 goto errout;
2858 }
1da177e4 2859
ab364a6f
TG
2860 /* Reserve room for dummy headers, this skb can pass
2861 through good chunk of routing engine.
2862 */
459a98ed 2863 skb_reset_mac_header(skb);
ab364a6f 2864 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 2865
d8d1f30b 2866 skb_dst_set(skb, &rt->dst);
1da177e4 2867
4c9483b2 2868 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 2869 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 2870 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 2871 if (err < 0) {
ab364a6f
TG
2872 kfree_skb(skb);
2873 goto errout;
1da177e4
LT
2874 }
2875
15e47304 2876 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 2877errout:
1da177e4 2878 return err;
1da177e4
LT
2879}
2880
86872cb5 2881void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2882{
2883 struct sk_buff *skb;
5578689a 2884 struct net *net = info->nl_net;
528c4ceb
DL
2885 u32 seq;
2886 int err;
2887
2888 err = -ENOBUFS;
38308473 2889 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 2890
339bf98f 2891 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
38308473 2892 if (!skb)
21713ebc
TG
2893 goto errout;
2894
191cd582 2895 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
15e47304 2896 event, info->portid, seq, 0, 0, 0);
26932566
PM
2897 if (err < 0) {
2898 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2899 WARN_ON(err == -EMSGSIZE);
2900 kfree_skb(skb);
2901 goto errout;
2902 }
15e47304 2903 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
2904 info->nlh, gfp_any());
2905 return;
21713ebc
TG
2906errout:
2907 if (err < 0)
5578689a 2908 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
2909}
2910
8ed67789 2911static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 2912 unsigned long event, void *ptr)
8ed67789 2913{
351638e7 2914 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 2915 struct net *net = dev_net(dev);
8ed67789
DL
2916
2917 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 2918 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
2919 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2920#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 2921 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 2922 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 2923 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
2924 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2925#endif
2926 }
2927
2928 return NOTIFY_OK;
2929}
2930
1da177e4
LT
2931/*
2932 * /proc
2933 */
2934
2935#ifdef CONFIG_PROC_FS
2936
33120b30
AD
2937static const struct file_operations ipv6_route_proc_fops = {
2938 .owner = THIS_MODULE,
2939 .open = ipv6_route_open,
2940 .read = seq_read,
2941 .llseek = seq_lseek,
8d2ca1d7 2942 .release = seq_release_net,
33120b30
AD
2943};
2944
1da177e4
LT
2945static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2946{
69ddb805 2947 struct net *net = (struct net *)seq->private;
1da177e4 2948 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
2949 net->ipv6.rt6_stats->fib_nodes,
2950 net->ipv6.rt6_stats->fib_route_nodes,
2951 net->ipv6.rt6_stats->fib_rt_alloc,
2952 net->ipv6.rt6_stats->fib_rt_entries,
2953 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 2954 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 2955 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
2956
2957 return 0;
2958}
2959
2960static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2961{
de05c557 2962 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
2963}
2964
9a32144e 2965static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
2966 .owner = THIS_MODULE,
2967 .open = rt6_stats_seq_open,
2968 .read = seq_read,
2969 .llseek = seq_lseek,
b6fcbdb4 2970 .release = single_release_net,
1da177e4
LT
2971};
2972#endif /* CONFIG_PROC_FS */
2973
2974#ifdef CONFIG_SYSCTL
2975
1da177e4 2976static
fe2c6338 2977int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
2978 void __user *buffer, size_t *lenp, loff_t *ppos)
2979{
c486da34
LAG
2980 struct net *net;
2981 int delay;
2982 if (!write)
1da177e4 2983 return -EINVAL;
c486da34
LAG
2984
2985 net = (struct net *)ctl->extra1;
2986 delay = net->ipv6.sysctl.flush_delay;
2987 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 2988 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 2989 return 0;
1da177e4
LT
2990}
2991
fe2c6338 2992struct ctl_table ipv6_route_table_template[] = {
1ab1457c 2993 {
1da177e4 2994 .procname = "flush",
4990509f 2995 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 2996 .maxlen = sizeof(int),
89c8b3a1 2997 .mode = 0200,
6d9f239a 2998 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
2999 },
3000 {
1da177e4 3001 .procname = "gc_thresh",
9a7ec3a9 3002 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3003 .maxlen = sizeof(int),
3004 .mode = 0644,
6d9f239a 3005 .proc_handler = proc_dointvec,
1da177e4
LT
3006 },
3007 {
1da177e4 3008 .procname = "max_size",
4990509f 3009 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3010 .maxlen = sizeof(int),
3011 .mode = 0644,
6d9f239a 3012 .proc_handler = proc_dointvec,
1da177e4
LT
3013 },
3014 {
1da177e4 3015 .procname = "gc_min_interval",
4990509f 3016 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3017 .maxlen = sizeof(int),
3018 .mode = 0644,
6d9f239a 3019 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3020 },
3021 {
1da177e4 3022 .procname = "gc_timeout",
4990509f 3023 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3024 .maxlen = sizeof(int),
3025 .mode = 0644,
6d9f239a 3026 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3027 },
3028 {
1da177e4 3029 .procname = "gc_interval",
4990509f 3030 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3031 .maxlen = sizeof(int),
3032 .mode = 0644,
6d9f239a 3033 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3034 },
3035 {
1da177e4 3036 .procname = "gc_elasticity",
4990509f 3037 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3038 .maxlen = sizeof(int),
3039 .mode = 0644,
f3d3f616 3040 .proc_handler = proc_dointvec,
1da177e4
LT
3041 },
3042 {
1da177e4 3043 .procname = "mtu_expires",
4990509f 3044 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3045 .maxlen = sizeof(int),
3046 .mode = 0644,
6d9f239a 3047 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3048 },
3049 {
1da177e4 3050 .procname = "min_adv_mss",
4990509f 3051 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3052 .maxlen = sizeof(int),
3053 .mode = 0644,
f3d3f616 3054 .proc_handler = proc_dointvec,
1da177e4
LT
3055 },
3056 {
1da177e4 3057 .procname = "gc_min_interval_ms",
4990509f 3058 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3059 .maxlen = sizeof(int),
3060 .mode = 0644,
6d9f239a 3061 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3062 },
f8572d8f 3063 { }
1da177e4
LT
3064};
3065
2c8c1e72 3066struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3067{
3068 struct ctl_table *table;
3069
3070 table = kmemdup(ipv6_route_table_template,
3071 sizeof(ipv6_route_table_template),
3072 GFP_KERNEL);
5ee09105
YH
3073
3074 if (table) {
3075 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3076 table[0].extra1 = net;
86393e52 3077 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3078 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3079 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3080 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3081 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3082 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3083 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3084 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3085 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3086
3087 /* Don't export sysctls to unprivileged users */
3088 if (net->user_ns != &init_user_ns)
3089 table[0].procname = NULL;
5ee09105
YH
3090 }
3091
760f2d01
DL
3092 return table;
3093}
1da177e4
LT
3094#endif
3095
2c8c1e72 3096static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3097{
633d424b 3098 int ret = -ENOMEM;
8ed67789 3099
86393e52
AD
3100 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3101 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3102
fc66f95c
ED
3103 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3104 goto out_ip6_dst_ops;
3105
8ed67789
DL
3106 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3107 sizeof(*net->ipv6.ip6_null_entry),
3108 GFP_KERNEL);
3109 if (!net->ipv6.ip6_null_entry)
fc66f95c 3110 goto out_ip6_dst_entries;
d8d1f30b 3111 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3112 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3113 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3114 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3115 ip6_template_metrics, true);
8ed67789
DL
3116
3117#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3118 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3119 sizeof(*net->ipv6.ip6_prohibit_entry),
3120 GFP_KERNEL);
68fffc67
PZ
3121 if (!net->ipv6.ip6_prohibit_entry)
3122 goto out_ip6_null_entry;
d8d1f30b 3123 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3124 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3125 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3126 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3127 ip6_template_metrics, true);
8ed67789
DL
3128
3129 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3130 sizeof(*net->ipv6.ip6_blk_hole_entry),
3131 GFP_KERNEL);
68fffc67
PZ
3132 if (!net->ipv6.ip6_blk_hole_entry)
3133 goto out_ip6_prohibit_entry;
d8d1f30b 3134 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3135 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3136 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3137 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3138 ip6_template_metrics, true);
8ed67789
DL
3139#endif
3140
b339a47c
PZ
3141 net->ipv6.sysctl.flush_delay = 0;
3142 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3143 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3144 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3145 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3146 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3147 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3148 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3149
6891a346
BT
3150 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3151
8ed67789
DL
3152 ret = 0;
3153out:
3154 return ret;
f2fc6a54 3155
68fffc67
PZ
3156#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3157out_ip6_prohibit_entry:
3158 kfree(net->ipv6.ip6_prohibit_entry);
3159out_ip6_null_entry:
3160 kfree(net->ipv6.ip6_null_entry);
3161#endif
fc66f95c
ED
3162out_ip6_dst_entries:
3163 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3164out_ip6_dst_ops:
f2fc6a54 3165 goto out;
cdb18761
DL
3166}
3167
2c8c1e72 3168static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3169{
8ed67789
DL
3170 kfree(net->ipv6.ip6_null_entry);
3171#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3172 kfree(net->ipv6.ip6_prohibit_entry);
3173 kfree(net->ipv6.ip6_blk_hole_entry);
3174#endif
41bb78b4 3175 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3176}
3177
d189634e
TG
3178static int __net_init ip6_route_net_init_late(struct net *net)
3179{
3180#ifdef CONFIG_PROC_FS
d4beaa66
G
3181 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3182 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3183#endif
3184 return 0;
3185}
3186
3187static void __net_exit ip6_route_net_exit_late(struct net *net)
3188{
3189#ifdef CONFIG_PROC_FS
ece31ffd
G
3190 remove_proc_entry("ipv6_route", net->proc_net);
3191 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3192#endif
3193}
3194
cdb18761
DL
3195static struct pernet_operations ip6_route_net_ops = {
3196 .init = ip6_route_net_init,
3197 .exit = ip6_route_net_exit,
3198};
3199
c3426b47
DM
3200static int __net_init ipv6_inetpeer_init(struct net *net)
3201{
3202 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3203
3204 if (!bp)
3205 return -ENOMEM;
3206 inet_peer_base_init(bp);
3207 net->ipv6.peers = bp;
3208 return 0;
3209}
3210
3211static void __net_exit ipv6_inetpeer_exit(struct net *net)
3212{
3213 struct inet_peer_base *bp = net->ipv6.peers;
3214
3215 net->ipv6.peers = NULL;
56a6b248 3216 inetpeer_invalidate_tree(bp);
c3426b47
DM
3217 kfree(bp);
3218}
3219
2b823f72 3220static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3221 .init = ipv6_inetpeer_init,
3222 .exit = ipv6_inetpeer_exit,
3223};
3224
d189634e
TG
3225static struct pernet_operations ip6_route_net_late_ops = {
3226 .init = ip6_route_net_init_late,
3227 .exit = ip6_route_net_exit_late,
3228};
3229
8ed67789
DL
3230static struct notifier_block ip6_route_dev_notifier = {
3231 .notifier_call = ip6_route_dev_notify,
3232 .priority = 0,
3233};
3234
433d49c3 3235int __init ip6_route_init(void)
1da177e4 3236{
433d49c3
DL
3237 int ret;
3238
9a7ec3a9
DL
3239 ret = -ENOMEM;
3240 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3241 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3242 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3243 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3244 goto out;
14e50e57 3245
fc66f95c 3246 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3247 if (ret)
bdb3289f 3248 goto out_kmem_cache;
bdb3289f 3249
c3426b47
DM
3250 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3251 if (ret)
e8803b6c 3252 goto out_dst_entries;
2a0c451a 3253
7e52b33b
DM
3254 ret = register_pernet_subsys(&ip6_route_net_ops);
3255 if (ret)
3256 goto out_register_inetpeer;
c3426b47 3257
5dc121e9
AE
3258 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3259
8ed67789
DL
3260 /* Registering of the loopback is done before this portion of code,
3261 * the loopback reference in rt6_info will not be taken, do it
3262 * manually for init_net */
d8d1f30b 3263 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3264 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3265 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3266 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3267 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3268 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3269 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3270 #endif
e8803b6c 3271 ret = fib6_init();
433d49c3 3272 if (ret)
8ed67789 3273 goto out_register_subsys;
433d49c3 3274
433d49c3
DL
3275 ret = xfrm6_init();
3276 if (ret)
e8803b6c 3277 goto out_fib6_init;
c35b7e72 3278
433d49c3
DL
3279 ret = fib6_rules_init();
3280 if (ret)
3281 goto xfrm6_init;
7e5449c2 3282
d189634e
TG
3283 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3284 if (ret)
3285 goto fib6_rules_init;
3286
433d49c3 3287 ret = -ENOBUFS;
c7ac8679
GR
3288 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3289 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3290 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3291 goto out_register_late_subsys;
c127ea2c 3292
8ed67789 3293 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3294 if (ret)
d189634e 3295 goto out_register_late_subsys;
8ed67789 3296
433d49c3
DL
3297out:
3298 return ret;
3299
d189634e
TG
3300out_register_late_subsys:
3301 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3302fib6_rules_init:
433d49c3
DL
3303 fib6_rules_cleanup();
3304xfrm6_init:
433d49c3 3305 xfrm6_fini();
2a0c451a
TG
3306out_fib6_init:
3307 fib6_gc_cleanup();
8ed67789
DL
3308out_register_subsys:
3309 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3310out_register_inetpeer:
3311 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3312out_dst_entries:
3313 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3314out_kmem_cache:
f2fc6a54 3315 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3316 goto out;
1da177e4
LT
3317}
3318
3319void ip6_route_cleanup(void)
3320{
8ed67789 3321 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3322 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3323 fib6_rules_cleanup();
1da177e4 3324 xfrm6_fini();
1da177e4 3325 fib6_gc_cleanup();
c3426b47 3326 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3327 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3328 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3329 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3330}