]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv6/route.c
Merge branch 'mlxsw-flooding-and-cosmetics'
[mirror_ubuntu-bionic-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
1da177e4
LT
65
66#include <asm/uaccess.h>
67
68#ifdef CONFIG_SYSCTL
69#include <linux/sysctl.h>
70#endif
71
afc154e9 72enum rt6_nud_state {
7e980569
JB
73 RT6_NUD_FAIL_HARD = -3,
74 RT6_NUD_FAIL_PROBE = -2,
75 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
76 RT6_NUD_SUCCEED = 1
77};
78
83a09abd 79static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 80static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 81static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 82static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
83static struct dst_entry *ip6_negative_advice(struct dst_entry *);
84static void ip6_dst_destroy(struct dst_entry *);
85static void ip6_dst_ifdown(struct dst_entry *,
86 struct net_device *dev, int how);
569d3645 87static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
88
89static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 90static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 91static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 93static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
94static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
95 struct sk_buff *skb, u32 mtu);
96static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb);
4b32b5ad 98static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 99static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 100
70ceb4f5 101#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 102static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex,
95c96174 105 unsigned int pref);
efa2cea0 106static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
107 const struct in6_addr *prefix, int prefixlen,
108 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
109#endif
110
8d0b94af
MKL
111struct uncached_list {
112 spinlock_t lock;
113 struct list_head head;
114};
115
116static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
117
118static void rt6_uncached_list_add(struct rt6_info *rt)
119{
120 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
121
122 rt->dst.flags |= DST_NOCACHE;
123 rt->rt6i_uncached_list = ul;
124
125 spin_lock_bh(&ul->lock);
126 list_add_tail(&rt->rt6i_uncached, &ul->head);
127 spin_unlock_bh(&ul->lock);
128}
129
130static void rt6_uncached_list_del(struct rt6_info *rt)
131{
132 if (!list_empty(&rt->rt6i_uncached)) {
133 struct uncached_list *ul = rt->rt6i_uncached_list;
134
135 spin_lock_bh(&ul->lock);
136 list_del(&rt->rt6i_uncached);
137 spin_unlock_bh(&ul->lock);
138 }
139}
140
141static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
142{
143 struct net_device *loopback_dev = net->loopback_dev;
144 int cpu;
145
e332bc67
EB
146 if (dev == loopback_dev)
147 return;
148
8d0b94af
MKL
149 for_each_possible_cpu(cpu) {
150 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
151 struct rt6_info *rt;
152
153 spin_lock_bh(&ul->lock);
154 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
155 struct inet6_dev *rt_idev = rt->rt6i_idev;
156 struct net_device *rt_dev = rt->dst.dev;
157
e332bc67 158 if (rt_idev->dev == dev) {
8d0b94af
MKL
159 rt->rt6i_idev = in6_dev_get(loopback_dev);
160 in6_dev_put(rt_idev);
161 }
162
e332bc67 163 if (rt_dev == dev) {
8d0b94af
MKL
164 rt->dst.dev = loopback_dev;
165 dev_hold(rt->dst.dev);
166 dev_put(rt_dev);
167 }
168 }
169 spin_unlock_bh(&ul->lock);
170 }
171}
172
d52d3997
MKL
173static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
174{
175 return dst_metrics_write_ptr(rt->dst.from);
176}
177
06582540
DM
178static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
179{
4b32b5ad 180 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 181
d52d3997
MKL
182 if (rt->rt6i_flags & RTF_PCPU)
183 return rt6_pcpu_cow_metrics(rt);
184 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
185 return NULL;
186 else
3b471175 187 return dst_cow_metrics_generic(dst, old);
06582540
DM
188}
189
f894cbf8
DM
190static inline const void *choose_neigh_daddr(struct rt6_info *rt,
191 struct sk_buff *skb,
192 const void *daddr)
39232973
DM
193{
194 struct in6_addr *p = &rt->rt6i_gateway;
195
a7563f34 196 if (!ipv6_addr_any(p))
39232973 197 return (const void *) p;
f894cbf8
DM
198 else if (skb)
199 return &ipv6_hdr(skb)->daddr;
39232973
DM
200 return daddr;
201}
202
f894cbf8
DM
203static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct rt6_info *rt = (struct rt6_info *) dst;
208 struct neighbour *n;
209
f894cbf8 210 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 211 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
212 if (n)
213 return n;
214 return neigh_create(&nd_tbl, daddr, dst->dev);
215}
216
9a7ec3a9 217static struct dst_ops ip6_dst_ops_template = {
1da177e4 218 .family = AF_INET6,
1da177e4
LT
219 .gc = ip6_dst_gc,
220 .gc_thresh = 1024,
221 .check = ip6_dst_check,
0dbaee3b 222 .default_advmss = ip6_default_advmss,
ebb762f2 223 .mtu = ip6_mtu,
06582540 224 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
225 .destroy = ip6_dst_destroy,
226 .ifdown = ip6_dst_ifdown,
227 .negative_advice = ip6_negative_advice,
228 .link_failure = ip6_link_failure,
229 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 230 .redirect = rt6_do_redirect,
9f8955cc 231 .local_out = __ip6_local_out,
d3aaeb38 232 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
233};
234
ebb762f2 235static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 236{
618f9bc7
SK
237 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
238
239 return mtu ? : dst->dev->mtu;
ec831ea7
RD
240}
241
6700c270
DM
242static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
243 struct sk_buff *skb, u32 mtu)
14e50e57
DM
244{
245}
246
6700c270
DM
247static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
248 struct sk_buff *skb)
b587ee3b
DM
249{
250}
251
14e50e57
DM
252static struct dst_ops ip6_dst_blackhole_ops = {
253 .family = AF_INET6,
14e50e57
DM
254 .destroy = ip6_dst_destroy,
255 .check = ip6_dst_check,
ebb762f2 256 .mtu = ip6_blackhole_mtu,
214f45c9 257 .default_advmss = ip6_default_advmss,
14e50e57 258 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 259 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 260 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
262};
263
62fa8a84 264static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 265 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
266};
267
fb0af4c7 268static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
269 .dst = {
270 .__refcnt = ATOMIC_INIT(1),
271 .__use = 1,
2c20cbd7 272 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 273 .error = -ENETUNREACH,
d8d1f30b
CG
274 .input = ip6_pkt_discard,
275 .output = ip6_pkt_discard_out,
1da177e4
LT
276 },
277 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 278 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
279 .rt6i_metric = ~(u32) 0,
280 .rt6i_ref = ATOMIC_INIT(1),
281};
282
101367c2
TG
283#ifdef CONFIG_IPV6_MULTIPLE_TABLES
284
fb0af4c7 285static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
286 .dst = {
287 .__refcnt = ATOMIC_INIT(1),
288 .__use = 1,
2c20cbd7 289 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 290 .error = -EACCES,
d8d1f30b
CG
291 .input = ip6_pkt_prohibit,
292 .output = ip6_pkt_prohibit_out,
101367c2
TG
293 },
294 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 295 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
296 .rt6i_metric = ~(u32) 0,
297 .rt6i_ref = ATOMIC_INIT(1),
298};
299
fb0af4c7 300static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
301 .dst = {
302 .__refcnt = ATOMIC_INIT(1),
303 .__use = 1,
2c20cbd7 304 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 305 .error = -EINVAL,
d8d1f30b 306 .input = dst_discard,
ede2059d 307 .output = dst_discard_out,
101367c2
TG
308 },
309 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 310 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
311 .rt6i_metric = ~(u32) 0,
312 .rt6i_ref = ATOMIC_INIT(1),
313};
314
315#endif
316
ebfa45f0
MKL
317static void rt6_info_init(struct rt6_info *rt)
318{
319 struct dst_entry *dst = &rt->dst;
320
321 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
322 INIT_LIST_HEAD(&rt->rt6i_siblings);
323 INIT_LIST_HEAD(&rt->rt6i_uncached);
324}
325
1da177e4 326/* allocate dst with ip6_dst_ops */
d52d3997
MKL
327static struct rt6_info *__ip6_dst_alloc(struct net *net,
328 struct net_device *dev,
ad706862 329 int flags)
1da177e4 330{
97bab73f 331 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 332 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 333
ebfa45f0
MKL
334 if (rt)
335 rt6_info_init(rt);
8104891b 336
cf911662 337 return rt;
1da177e4
LT
338}
339
d52d3997
MKL
340static struct rt6_info *ip6_dst_alloc(struct net *net,
341 struct net_device *dev,
ad706862 342 int flags)
d52d3997 343{
ad706862 344 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
345
346 if (rt) {
347 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
348 if (rt->rt6i_pcpu) {
349 int cpu;
350
351 for_each_possible_cpu(cpu) {
352 struct rt6_info **p;
353
354 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
355 /* no one shares rt */
356 *p = NULL;
357 }
358 } else {
359 dst_destroy((struct dst_entry *)rt);
360 return NULL;
361 }
362 }
363
364 return rt;
365}
366
1da177e4
LT
367static void ip6_dst_destroy(struct dst_entry *dst)
368{
369 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 370 struct dst_entry *from = dst->from;
8d0b94af 371 struct inet6_dev *idev;
1da177e4 372
4b32b5ad 373 dst_destroy_metrics_generic(dst);
87775312 374 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
375 rt6_uncached_list_del(rt);
376
377 idev = rt->rt6i_idev;
38308473 378 if (idev) {
1da177e4
LT
379 rt->rt6i_idev = NULL;
380 in6_dev_put(idev);
1ab1457c 381 }
1716a961 382
ecd98837
YH
383 dst->from = NULL;
384 dst_release(from);
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
97cac082
DM
395 if (dev != loopback_dev) {
396 if (idev && idev->dev == dev) {
397 struct inet6_dev *loopback_idev =
398 in6_dev_get(loopback_dev);
399 if (loopback_idev) {
400 rt->rt6i_idev = loopback_idev;
401 in6_dev_put(idev);
402 }
403 }
1da177e4
LT
404 }
405}
406
a50feda5 407static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 408{
1716a961
G
409 if (rt->rt6i_flags & RTF_EXPIRES) {
410 if (time_after(jiffies, rt->dst.expires))
a50feda5 411 return true;
1716a961 412 } else if (rt->dst.from) {
3fd91fb3 413 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 414 }
a50feda5 415 return false;
1da177e4
LT
416}
417
51ebd318
ND
418/* Multipath route selection:
419 * Hash based function using packet header and flowlabel.
420 * Adapted from fib_info_hashfn()
421 */
422static int rt6_info_hash_nhsfn(unsigned int candidate_count,
423 const struct flowi6 *fl6)
424{
644d0e65 425 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
426}
427
428static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
429 struct flowi6 *fl6, int oif,
430 int strict)
51ebd318
ND
431{
432 struct rt6_info *sibling, *next_sibling;
433 int route_choosen;
434
435 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
436 /* Don't change the route, if route_choosen == 0
437 * (siblings does not include ourself)
438 */
439 if (route_choosen)
440 list_for_each_entry_safe(sibling, next_sibling,
441 &match->rt6i_siblings, rt6i_siblings) {
442 route_choosen--;
443 if (route_choosen == 0) {
52bd4c0c
ND
444 if (rt6_score_route(sibling, oif, strict) < 0)
445 break;
51ebd318
ND
446 match = sibling;
447 break;
448 }
449 }
450 return match;
451}
452
1da177e4 453/*
c71099ac 454 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
455 */
456
8ed67789
DL
457static inline struct rt6_info *rt6_device_match(struct net *net,
458 struct rt6_info *rt,
b71d1d42 459 const struct in6_addr *saddr,
1da177e4 460 int oif,
d420895e 461 int flags)
1da177e4
LT
462{
463 struct rt6_info *local = NULL;
464 struct rt6_info *sprt;
465
dd3abc4e
YH
466 if (!oif && ipv6_addr_any(saddr))
467 goto out;
468
d8d1f30b 469 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 470 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
471
472 if (oif) {
1da177e4
LT
473 if (dev->ifindex == oif)
474 return sprt;
475 if (dev->flags & IFF_LOOPBACK) {
38308473 476 if (!sprt->rt6i_idev ||
1da177e4 477 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 478 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 479 continue;
17fb0b2b
DA
480 if (local &&
481 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
482 continue;
483 }
484 local = sprt;
485 }
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
dd3abc4e 493 if (oif) {
1da177e4
LT
494 if (local)
495 return local;
496
d420895e 497 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 498 return net->ipv6.ip6_null_entry;
1da177e4 499 }
dd3abc4e 500out:
1da177e4
LT
501 return rt;
502}
503
27097255 504#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
505struct __rt6_probe_work {
506 struct work_struct work;
507 struct in6_addr target;
508 struct net_device *dev;
509};
510
511static void rt6_probe_deferred(struct work_struct *w)
512{
513 struct in6_addr mcaddr;
514 struct __rt6_probe_work *work =
515 container_of(w, struct __rt6_probe_work, work);
516
517 addrconf_addr_solict_mult(&work->target, &mcaddr);
38cf595b 518 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
c2f17e82 519 dev_put(work->dev);
662f5533 520 kfree(work);
c2f17e82
HFS
521}
522
27097255
YH
523static void rt6_probe(struct rt6_info *rt)
524{
990edb42 525 struct __rt6_probe_work *work;
f2c31e32 526 struct neighbour *neigh;
27097255
YH
527 /*
528 * Okay, this does not seem to be appropriate
529 * for now, however, we need to check if it
530 * is really so; aka Router Reachability Probing.
531 *
532 * Router Reachability Probe MUST be rate-limited
533 * to no more than one per minute.
534 */
2152caea 535 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 536 return;
2152caea
YH
537 rcu_read_lock_bh();
538 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
539 if (neigh) {
8d6c31bf
MKL
540 if (neigh->nud_state & NUD_VALID)
541 goto out;
542
990edb42 543 work = NULL;
2152caea 544 write_lock(&neigh->lock);
990edb42
MKL
545 if (!(neigh->nud_state & NUD_VALID) &&
546 time_after(jiffies,
547 neigh->updated +
548 rt->rt6i_idev->cnf.rtr_probe_interval)) {
549 work = kmalloc(sizeof(*work), GFP_ATOMIC);
550 if (work)
551 __neigh_set_probe_once(neigh);
c2f17e82 552 }
2152caea 553 write_unlock(&neigh->lock);
990edb42
MKL
554 } else {
555 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 556 }
990edb42
MKL
557
558 if (work) {
559 INIT_WORK(&work->work, rt6_probe_deferred);
560 work->target = rt->rt6i_gateway;
561 dev_hold(rt->dst.dev);
562 work->dev = rt->dst.dev;
563 schedule_work(&work->work);
564 }
565
8d6c31bf 566out:
2152caea 567 rcu_read_unlock_bh();
27097255
YH
568}
569#else
570static inline void rt6_probe(struct rt6_info *rt)
571{
27097255
YH
572}
573#endif
574
1da177e4 575/*
554cfb7e 576 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 577 */
b6f99a21 578static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 579{
d1918542 580 struct net_device *dev = rt->dst.dev;
161980f4 581 if (!oif || dev->ifindex == oif)
554cfb7e 582 return 2;
161980f4
DM
583 if ((dev->flags & IFF_LOOPBACK) &&
584 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
585 return 1;
586 return 0;
554cfb7e 587}
1da177e4 588
afc154e9 589static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 590{
f2c31e32 591 struct neighbour *neigh;
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 593
4d0c5911
YH
594 if (rt->rt6i_flags & RTF_NONEXTHOP ||
595 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 596 return RT6_NUD_SUCCEED;
145a3621
YH
597
598 rcu_read_lock_bh();
599 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
600 if (neigh) {
601 read_lock(&neigh->lock);
554cfb7e 602 if (neigh->nud_state & NUD_VALID)
afc154e9 603 ret = RT6_NUD_SUCCEED;
398bcbeb 604#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 605 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 606 ret = RT6_NUD_SUCCEED;
7e980569
JB
607 else
608 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 609#endif
145a3621 610 read_unlock(&neigh->lock);
afc154e9
HFS
611 } else {
612 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 613 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 614 }
145a3621
YH
615 rcu_read_unlock_bh();
616
a5a81f0b 617 return ret;
1da177e4
LT
618}
619
554cfb7e
YH
620static int rt6_score_route(struct rt6_info *rt, int oif,
621 int strict)
1da177e4 622{
a5a81f0b 623 int m;
1ab1457c 624
4d0c5911 625 m = rt6_check_dev(rt, oif);
77d16f45 626 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 627 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
628#ifdef CONFIG_IPV6_ROUTER_PREF
629 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
630#endif
afc154e9
HFS
631 if (strict & RT6_LOOKUP_F_REACHABLE) {
632 int n = rt6_check_neigh(rt);
633 if (n < 0)
634 return n;
635 }
554cfb7e
YH
636 return m;
637}
638
f11e6659 639static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
640 int *mpri, struct rt6_info *match,
641 bool *do_rr)
554cfb7e 642{
f11e6659 643 int m;
afc154e9 644 bool match_do_rr = false;
35103d11
AG
645 struct inet6_dev *idev = rt->rt6i_idev;
646 struct net_device *dev = rt->dst.dev;
647
648 if (dev && !netif_carrier_ok(dev) &&
649 idev->cnf.ignore_routes_with_linkdown)
650 goto out;
f11e6659
DM
651
652 if (rt6_check_expired(rt))
653 goto out;
654
655 m = rt6_score_route(rt, oif, strict);
7e980569 656 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
657 match_do_rr = true;
658 m = 0; /* lowest valid score */
7e980569 659 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 660 goto out;
afc154e9
HFS
661 }
662
663 if (strict & RT6_LOOKUP_F_REACHABLE)
664 rt6_probe(rt);
f11e6659 665
7e980569 666 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 667 if (m > *mpri) {
afc154e9 668 *do_rr = match_do_rr;
f11e6659
DM
669 *mpri = m;
670 match = rt;
f11e6659 671 }
f11e6659
DM
672out:
673 return match;
674}
675
676static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
677 struct rt6_info *rr_head,
afc154e9
HFS
678 u32 metric, int oif, int strict,
679 bool *do_rr)
f11e6659 680{
9fbdcfaf 681 struct rt6_info *rt, *match, *cont;
554cfb7e 682 int mpri = -1;
1da177e4 683
f11e6659 684 match = NULL;
9fbdcfaf
SK
685 cont = NULL;
686 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
687 if (rt->rt6i_metric != metric) {
688 cont = rt;
689 break;
690 }
691
692 match = find_match(rt, oif, strict, &mpri, match, do_rr);
693 }
694
695 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
696 if (rt->rt6i_metric != metric) {
697 cont = rt;
698 break;
699 }
700
afc154e9 701 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
702 }
703
704 if (match || !cont)
705 return match;
706
707 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 708 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 709
f11e6659
DM
710 return match;
711}
1da177e4 712
f11e6659
DM
713static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
714{
715 struct rt6_info *match, *rt0;
8ed67789 716 struct net *net;
afc154e9 717 bool do_rr = false;
1da177e4 718
f11e6659
DM
719 rt0 = fn->rr_ptr;
720 if (!rt0)
721 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 722
afc154e9
HFS
723 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
724 &do_rr);
1da177e4 725
afc154e9 726 if (do_rr) {
d8d1f30b 727 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 728
554cfb7e 729 /* no entries matched; do round-robin */
f11e6659
DM
730 if (!next || next->rt6i_metric != rt0->rt6i_metric)
731 next = fn->leaf;
732
733 if (next != rt0)
734 fn->rr_ptr = next;
1da177e4 735 }
1da177e4 736
d1918542 737 net = dev_net(rt0->dst.dev);
a02cec21 738 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
739}
740
8b9df265
MKL
741static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
742{
743 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
744}
745
70ceb4f5
YH
746#ifdef CONFIG_IPV6_ROUTE_INFO
747int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 748 const struct in6_addr *gwaddr)
70ceb4f5 749{
c346dca1 750 struct net *net = dev_net(dev);
70ceb4f5
YH
751 struct route_info *rinfo = (struct route_info *) opt;
752 struct in6_addr prefix_buf, *prefix;
753 unsigned int pref;
4bed72e4 754 unsigned long lifetime;
70ceb4f5
YH
755 struct rt6_info *rt;
756
757 if (len < sizeof(struct route_info)) {
758 return -EINVAL;
759 }
760
761 /* Sanity check for prefix_len and length */
762 if (rinfo->length > 3) {
763 return -EINVAL;
764 } else if (rinfo->prefix_len > 128) {
765 return -EINVAL;
766 } else if (rinfo->prefix_len > 64) {
767 if (rinfo->length < 2) {
768 return -EINVAL;
769 }
770 } else if (rinfo->prefix_len > 0) {
771 if (rinfo->length < 1) {
772 return -EINVAL;
773 }
774 }
775
776 pref = rinfo->route_pref;
777 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 778 return -EINVAL;
70ceb4f5 779
4bed72e4 780 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
781
782 if (rinfo->length == 3)
783 prefix = (struct in6_addr *)rinfo->prefix;
784 else {
785 /* this function is safe */
786 ipv6_addr_prefix(&prefix_buf,
787 (struct in6_addr *)rinfo->prefix,
788 rinfo->prefix_len);
789 prefix = &prefix_buf;
790 }
791
f104a567
DJ
792 if (rinfo->prefix_len == 0)
793 rt = rt6_get_dflt_router(gwaddr, dev);
794 else
795 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
796 gwaddr, dev->ifindex);
70ceb4f5
YH
797
798 if (rt && !lifetime) {
e0a1ad73 799 ip6_del_rt(rt);
70ceb4f5
YH
800 rt = NULL;
801 }
802
803 if (!rt && lifetime)
efa2cea0 804 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
805 pref);
806 else if (rt)
807 rt->rt6i_flags = RTF_ROUTEINFO |
808 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
809
810 if (rt) {
1716a961
G
811 if (!addrconf_finite_timeout(lifetime))
812 rt6_clean_expires(rt);
813 else
814 rt6_set_expires(rt, jiffies + HZ * lifetime);
815
94e187c0 816 ip6_rt_put(rt);
70ceb4f5
YH
817 }
818 return 0;
819}
820#endif
821
a3c00e46
MKL
822static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
823 struct in6_addr *saddr)
824{
825 struct fib6_node *pn;
826 while (1) {
827 if (fn->fn_flags & RTN_TL_ROOT)
828 return NULL;
829 pn = fn->parent;
830 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
831 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
832 else
833 fn = pn;
834 if (fn->fn_flags & RTN_RTINFO)
835 return fn;
836 }
837}
c71099ac 838
8ed67789
DL
839static struct rt6_info *ip6_pol_route_lookup(struct net *net,
840 struct fib6_table *table,
4c9483b2 841 struct flowi6 *fl6, int flags)
1da177e4
LT
842{
843 struct fib6_node *fn;
844 struct rt6_info *rt;
845
c71099ac 846 read_lock_bh(&table->tb6_lock);
4c9483b2 847 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
848restart:
849 rt = fn->leaf;
4c9483b2 850 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 851 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 852 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
853 if (rt == net->ipv6.ip6_null_entry) {
854 fn = fib6_backtrack(fn, &fl6->saddr);
855 if (fn)
856 goto restart;
857 }
d8d1f30b 858 dst_use(&rt->dst, jiffies);
c71099ac 859 read_unlock_bh(&table->tb6_lock);
c71099ac
TG
860 return rt;
861
862}
863
67ba4152 864struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
865 int flags)
866{
867 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
868}
869EXPORT_SYMBOL_GPL(ip6_route_lookup);
870
9acd9f3a
YH
871struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
872 const struct in6_addr *saddr, int oif, int strict)
c71099ac 873{
4c9483b2
DM
874 struct flowi6 fl6 = {
875 .flowi6_oif = oif,
876 .daddr = *daddr,
c71099ac
TG
877 };
878 struct dst_entry *dst;
77d16f45 879 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 880
adaa70bb 881 if (saddr) {
4c9483b2 882 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
883 flags |= RT6_LOOKUP_F_HAS_SADDR;
884 }
885
4c9483b2 886 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
887 if (dst->error == 0)
888 return (struct rt6_info *) dst;
889
890 dst_release(dst);
891
1da177e4
LT
892 return NULL;
893}
7159039a
YH
894EXPORT_SYMBOL(rt6_lookup);
895
c71099ac 896/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
897 It takes new route entry, the addition fails by any reason the
898 route is freed. In any case, if caller does not hold it, it may
899 be destroyed.
900 */
901
e5fd387a 902static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 903 struct mx6_config *mxc)
1da177e4
LT
904{
905 int err;
c71099ac 906 struct fib6_table *table;
1da177e4 907
c71099ac
TG
908 table = rt->rt6i_table;
909 write_lock_bh(&table->tb6_lock);
e715b6d3 910 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 911 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
912
913 return err;
914}
915
40e22e8f
TG
916int ip6_ins_rt(struct rt6_info *rt)
917{
e715b6d3
FW
918 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
919 struct mx6_config mxc = { .mx = NULL, };
920
921 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
922}
923
8b9df265
MKL
924static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
925 const struct in6_addr *daddr,
926 const struct in6_addr *saddr)
1da177e4 927{
1da177e4
LT
928 struct rt6_info *rt;
929
930 /*
931 * Clone the route.
932 */
933
d52d3997 934 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 935 ort = (struct rt6_info *)ort->dst.from;
1da177e4 936
ad706862 937 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
938
939 if (!rt)
940 return NULL;
941
942 ip6_rt_copy_init(rt, ort);
943 rt->rt6i_flags |= RTF_CACHE;
944 rt->rt6i_metric = 0;
945 rt->dst.flags |= DST_HOST;
946 rt->rt6i_dst.addr = *daddr;
947 rt->rt6i_dst.plen = 128;
1da177e4 948
83a09abd
MKL
949 if (!rt6_is_gw_or_nonexthop(ort)) {
950 if (ort->rt6i_dst.plen != 128 &&
951 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
952 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 953#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
954 if (rt->rt6i_src.plen && saddr) {
955 rt->rt6i_src.addr = *saddr;
956 rt->rt6i_src.plen = 128;
8b9df265 957 }
83a09abd 958#endif
95a9a5ba 959 }
1da177e4 960
95a9a5ba
YH
961 return rt;
962}
1da177e4 963
d52d3997
MKL
964static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
965{
966 struct rt6_info *pcpu_rt;
967
968 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 969 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
970
971 if (!pcpu_rt)
972 return NULL;
973 ip6_rt_copy_init(pcpu_rt, rt);
974 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
975 pcpu_rt->rt6i_flags |= RTF_PCPU;
976 return pcpu_rt;
977}
978
979/* It should be called with read_lock_bh(&tb6_lock) acquired */
980static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
981{
a73e4195 982 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
983
984 p = this_cpu_ptr(rt->rt6i_pcpu);
985 pcpu_rt = *p;
986
a73e4195
MKL
987 if (pcpu_rt) {
988 dst_hold(&pcpu_rt->dst);
989 rt6_dst_from_metrics_check(pcpu_rt);
990 }
991 return pcpu_rt;
992}
993
994static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
995{
9c7370a1 996 struct fib6_table *table = rt->rt6i_table;
a73e4195 997 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
998
999 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1000 if (!pcpu_rt) {
1001 struct net *net = dev_net(rt->dst.dev);
1002
9c7370a1
MKL
1003 dst_hold(&net->ipv6.ip6_null_entry->dst);
1004 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1005 }
1006
9c7370a1
MKL
1007 read_lock_bh(&table->tb6_lock);
1008 if (rt->rt6i_pcpu) {
1009 p = this_cpu_ptr(rt->rt6i_pcpu);
1010 prev = cmpxchg(p, NULL, pcpu_rt);
1011 if (prev) {
1012 /* If someone did it before us, return prev instead */
1013 dst_destroy(&pcpu_rt->dst);
1014 pcpu_rt = prev;
1015 }
1016 } else {
1017 /* rt has been removed from the fib6 tree
1018 * before we have a chance to acquire the read_lock.
1019 * In this case, don't brother to create a pcpu rt
1020 * since rt is going away anyway. The next
1021 * dst_check() will trigger a re-lookup.
1022 */
d52d3997 1023 dst_destroy(&pcpu_rt->dst);
9c7370a1 1024 pcpu_rt = rt;
d52d3997 1025 }
d52d3997
MKL
1026 dst_hold(&pcpu_rt->dst);
1027 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1028 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1029 return pcpu_rt;
1030}
1031
8ed67789 1032static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1033 struct flowi6 *fl6, int flags)
1da177e4 1034{
367efcb9 1035 struct fib6_node *fn, *saved_fn;
45e4fd26 1036 struct rt6_info *rt;
c71099ac 1037 int strict = 0;
1da177e4 1038
77d16f45 1039 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1040 if (net->ipv6.devconf_all->forwarding == 0)
1041 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1042
c71099ac 1043 read_lock_bh(&table->tb6_lock);
1da177e4 1044
4c9483b2 1045 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1046 saved_fn = fn;
1da177e4 1047
ca254490
DA
1048 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1049 oif = 0;
1050
a3c00e46 1051redo_rt6_select:
367efcb9 1052 rt = rt6_select(fn, oif, strict);
52bd4c0c 1053 if (rt->rt6i_nsiblings)
367efcb9 1054 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1055 if (rt == net->ipv6.ip6_null_entry) {
1056 fn = fib6_backtrack(fn, &fl6->saddr);
1057 if (fn)
1058 goto redo_rt6_select;
367efcb9
MKL
1059 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1060 /* also consider unreachable route */
1061 strict &= ~RT6_LOOKUP_F_REACHABLE;
1062 fn = saved_fn;
1063 goto redo_rt6_select;
367efcb9 1064 }
a3c00e46
MKL
1065 }
1066
fb9de91e 1067
3da59bd9 1068 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1069 dst_use(&rt->dst, jiffies);
1070 read_unlock_bh(&table->tb6_lock);
1071
1072 rt6_dst_from_metrics_check(rt);
1073 return rt;
3da59bd9
MKL
1074 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1075 !(rt->rt6i_flags & RTF_GATEWAY))) {
1076 /* Create a RTF_CACHE clone which will not be
1077 * owned by the fib6 tree. It is for the special case where
1078 * the daddr in the skb during the neighbor look-up is different
1079 * from the fl6->daddr used to look-up route here.
1080 */
1081
1082 struct rt6_info *uncached_rt;
1083
d52d3997
MKL
1084 dst_use(&rt->dst, jiffies);
1085 read_unlock_bh(&table->tb6_lock);
1086
3da59bd9
MKL
1087 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1088 dst_release(&rt->dst);
c71099ac 1089
3da59bd9 1090 if (uncached_rt)
8d0b94af 1091 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1092 else
1093 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1094
3da59bd9
MKL
1095 dst_hold(&uncached_rt->dst);
1096 return uncached_rt;
3da59bd9 1097
d52d3997
MKL
1098 } else {
1099 /* Get a percpu copy */
1100
1101 struct rt6_info *pcpu_rt;
1102
1103 rt->dst.lastuse = jiffies;
1104 rt->dst.__use++;
1105 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1106
9c7370a1
MKL
1107 if (pcpu_rt) {
1108 read_unlock_bh(&table->tb6_lock);
1109 } else {
1110 /* We have to do the read_unlock first
1111 * because rt6_make_pcpu_route() may trigger
1112 * ip6_dst_gc() which will take the write_lock.
1113 */
1114 dst_hold(&rt->dst);
1115 read_unlock_bh(&table->tb6_lock);
a73e4195 1116 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1117 dst_release(&rt->dst);
1118 }
d52d3997
MKL
1119
1120 return pcpu_rt;
9c7370a1 1121
d52d3997 1122 }
1da177e4
LT
1123}
1124
8ed67789 1125static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1126 struct flowi6 *fl6, int flags)
4acad72d 1127{
4c9483b2 1128 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1129}
1130
72331bc0
SL
1131static struct dst_entry *ip6_route_input_lookup(struct net *net,
1132 struct net_device *dev,
1133 struct flowi6 *fl6, int flags)
1134{
1135 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1136 flags |= RT6_LOOKUP_F_IFACE;
1137
1138 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1139}
1140
c71099ac
TG
1141void ip6_route_input(struct sk_buff *skb)
1142{
b71d1d42 1143 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1144 struct net *net = dev_net(skb->dev);
adaa70bb 1145 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1146 struct ip_tunnel_info *tun_info;
4c9483b2 1147 struct flowi6 fl6 = {
ca254490 1148 .flowi6_iif = l3mdev_fib_oif(skb->dev),
4c9483b2
DM
1149 .daddr = iph->daddr,
1150 .saddr = iph->saddr,
6502ca52 1151 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1152 .flowi6_mark = skb->mark,
1153 .flowi6_proto = iph->nexthdr,
c71099ac 1154 };
adaa70bb 1155
904af04d 1156 tun_info = skb_tunnel_info(skb);
46fa062a 1157 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1158 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1159 skb_dst_drop(skb);
72331bc0 1160 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1161}
1162
8ed67789 1163static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1164 struct flowi6 *fl6, int flags)
1da177e4 1165{
4c9483b2 1166 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1167}
1168
67ba4152 1169struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
4c9483b2 1170 struct flowi6 *fl6)
c71099ac 1171{
ca254490 1172 struct dst_entry *dst;
c71099ac 1173 int flags = 0;
d46a9d67 1174 bool any_src;
c71099ac 1175
ca254490
DA
1176 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1177 if (dst)
1178 return dst;
1179
1fb9489b 1180 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1181
d46a9d67 1182 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1183 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1184 (fl6->flowi6_oif && any_src))
77d16f45 1185 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1186
d46a9d67 1187 if (!any_src)
adaa70bb 1188 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1189 else if (sk)
1190 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1191
4c9483b2 1192 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1193}
7159039a 1194EXPORT_SYMBOL(ip6_route_output);
1da177e4 1195
2774c131 1196struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1197{
5c1e6aa3 1198 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1199 struct dst_entry *new = NULL;
1200
f5b0a874 1201 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1202 if (rt) {
0a1f5962 1203 rt6_info_init(rt);
8104891b 1204
0a1f5962 1205 new = &rt->dst;
14e50e57 1206 new->__use = 1;
352e512c 1207 new->input = dst_discard;
ede2059d 1208 new->output = dst_discard_out;
14e50e57 1209
0a1f5962 1210 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1211 rt->rt6i_idev = ort->rt6i_idev;
1212 if (rt->rt6i_idev)
1213 in6_dev_hold(rt->rt6i_idev);
14e50e57 1214
4e3fd7a0 1215 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1216 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1217 rt->rt6i_metric = 0;
1218
1219 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1220#ifdef CONFIG_IPV6_SUBTREES
1221 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1222#endif
1223
1224 dst_free(new);
1225 }
1226
69ead7af
DM
1227 dst_release(dst_orig);
1228 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1229}
14e50e57 1230
1da177e4
LT
1231/*
1232 * Destination cache support functions
1233 */
1234
4b32b5ad
MKL
1235static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1236{
1237 if (rt->dst.from &&
1238 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1239 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1240}
1241
3da59bd9
MKL
1242static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1243{
1244 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1245 return NULL;
1246
1247 if (rt6_check_expired(rt))
1248 return NULL;
1249
1250 return &rt->dst;
1251}
1252
1253static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1254{
1255 if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1256 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1257 return &rt->dst;
1258 else
1259 return NULL;
1260}
1261
1da177e4
LT
1262static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1263{
1264 struct rt6_info *rt;
1265
1266 rt = (struct rt6_info *) dst;
1267
6f3118b5
ND
1268 /* All IPV6 dsts are created with ->obsolete set to the value
1269 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1270 * into this function always.
1271 */
e3bc10bd 1272
4b32b5ad
MKL
1273 rt6_dst_from_metrics_check(rt);
1274
d52d3997 1275 if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
3da59bd9
MKL
1276 return rt6_dst_from_check(rt, cookie);
1277 else
1278 return rt6_check(rt, cookie);
1da177e4
LT
1279}
1280
1281static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1282{
1283 struct rt6_info *rt = (struct rt6_info *) dst;
1284
1285 if (rt) {
54c1a859
YH
1286 if (rt->rt6i_flags & RTF_CACHE) {
1287 if (rt6_check_expired(rt)) {
1288 ip6_del_rt(rt);
1289 dst = NULL;
1290 }
1291 } else {
1da177e4 1292 dst_release(dst);
54c1a859
YH
1293 dst = NULL;
1294 }
1da177e4 1295 }
54c1a859 1296 return dst;
1da177e4
LT
1297}
1298
1299static void ip6_link_failure(struct sk_buff *skb)
1300{
1301 struct rt6_info *rt;
1302
3ffe533c 1303 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1304
adf30907 1305 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1306 if (rt) {
1eb4f758
HFS
1307 if (rt->rt6i_flags & RTF_CACHE) {
1308 dst_hold(&rt->dst);
8e3d5be7 1309 ip6_del_rt(rt);
1eb4f758 1310 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1311 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1312 }
1da177e4
LT
1313 }
1314}
1315
45e4fd26
MKL
1316static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1317{
1318 struct net *net = dev_net(rt->dst.dev);
1319
1320 rt->rt6i_flags |= RTF_MODIFIED;
1321 rt->rt6i_pmtu = mtu;
1322 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1323}
1324
1325static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1326 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1327{
67ba4152 1328 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1329
45e4fd26
MKL
1330 if (rt6->rt6i_flags & RTF_LOCAL)
1331 return;
81aded24 1332
45e4fd26
MKL
1333 dst_confirm(dst);
1334 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1335 if (mtu >= dst_mtu(dst))
1336 return;
9d289715 1337
45e4fd26
MKL
1338 if (rt6->rt6i_flags & RTF_CACHE) {
1339 rt6_do_update_pmtu(rt6, mtu);
1340 } else {
1341 const struct in6_addr *daddr, *saddr;
1342 struct rt6_info *nrt6;
1343
1344 if (iph) {
1345 daddr = &iph->daddr;
1346 saddr = &iph->saddr;
1347 } else if (sk) {
1348 daddr = &sk->sk_v6_daddr;
1349 saddr = &inet6_sk(sk)->saddr;
1350 } else {
1351 return;
1352 }
1353 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1354 if (nrt6) {
1355 rt6_do_update_pmtu(nrt6, mtu);
1356
1357 /* ip6_ins_rt(nrt6) will bump the
1358 * rt6->rt6i_node->fn_sernum
1359 * which will fail the next rt6_check() and
1360 * invalidate the sk->sk_dst_cache.
1361 */
1362 ip6_ins_rt(nrt6);
1363 }
1da177e4
LT
1364 }
1365}
1366
45e4fd26
MKL
1367static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1368 struct sk_buff *skb, u32 mtu)
1369{
1370 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1371}
1372
42ae66c8
DM
1373void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1374 int oif, u32 mark)
81aded24
DM
1375{
1376 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1377 struct dst_entry *dst;
1378 struct flowi6 fl6;
1379
1380 memset(&fl6, 0, sizeof(fl6));
1381 fl6.flowi6_oif = oif;
1b3c61dc 1382 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1383 fl6.daddr = iph->daddr;
1384 fl6.saddr = iph->saddr;
6502ca52 1385 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1386
1387 dst = ip6_route_output(net, NULL, &fl6);
1388 if (!dst->error)
45e4fd26 1389 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1390 dst_release(dst);
1391}
1392EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1393
1394void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1395{
1396 ip6_update_pmtu(skb, sock_net(sk), mtu,
1397 sk->sk_bound_dev_if, sk->sk_mark);
1398}
1399EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1400
b55b76b2
DJ
1401/* Handle redirects */
1402struct ip6rd_flowi {
1403 struct flowi6 fl6;
1404 struct in6_addr gateway;
1405};
1406
1407static struct rt6_info *__ip6_route_redirect(struct net *net,
1408 struct fib6_table *table,
1409 struct flowi6 *fl6,
1410 int flags)
1411{
1412 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1413 struct rt6_info *rt;
1414 struct fib6_node *fn;
1415
1416 /* Get the "current" route for this destination and
1417 * check if the redirect has come from approriate router.
1418 *
1419 * RFC 4861 specifies that redirects should only be
1420 * accepted if they come from the nexthop to the target.
1421 * Due to the way the routes are chosen, this notion
1422 * is a bit fuzzy and one might need to check all possible
1423 * routes.
1424 */
1425
1426 read_lock_bh(&table->tb6_lock);
1427 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1428restart:
1429 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1430 if (rt6_check_expired(rt))
1431 continue;
1432 if (rt->dst.error)
1433 break;
1434 if (!(rt->rt6i_flags & RTF_GATEWAY))
1435 continue;
1436 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1437 continue;
1438 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1439 continue;
1440 break;
1441 }
1442
1443 if (!rt)
1444 rt = net->ipv6.ip6_null_entry;
1445 else if (rt->dst.error) {
1446 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1447 goto out;
1448 }
1449
1450 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1451 fn = fib6_backtrack(fn, &fl6->saddr);
1452 if (fn)
1453 goto restart;
b55b76b2 1454 }
a3c00e46 1455
b0a1ba59 1456out:
b55b76b2
DJ
1457 dst_hold(&rt->dst);
1458
1459 read_unlock_bh(&table->tb6_lock);
1460
1461 return rt;
1462};
1463
1464static struct dst_entry *ip6_route_redirect(struct net *net,
1465 const struct flowi6 *fl6,
1466 const struct in6_addr *gateway)
1467{
1468 int flags = RT6_LOOKUP_F_HAS_SADDR;
1469 struct ip6rd_flowi rdfl;
1470
1471 rdfl.fl6 = *fl6;
1472 rdfl.gateway = *gateway;
1473
1474 return fib6_rule_lookup(net, &rdfl.fl6,
1475 flags, __ip6_route_redirect);
1476}
1477
3a5ad2ee
DM
1478void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1479{
1480 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1481 struct dst_entry *dst;
1482 struct flowi6 fl6;
1483
1484 memset(&fl6, 0, sizeof(fl6));
e374c618 1485 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1486 fl6.flowi6_oif = oif;
1487 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1488 fl6.daddr = iph->daddr;
1489 fl6.saddr = iph->saddr;
6502ca52 1490 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1491
b55b76b2
DJ
1492 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1493 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1494 dst_release(dst);
1495}
1496EXPORT_SYMBOL_GPL(ip6_redirect);
1497
c92a59ec
DJ
1498void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1499 u32 mark)
1500{
1501 const struct ipv6hdr *iph = ipv6_hdr(skb);
1502 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1503 struct dst_entry *dst;
1504 struct flowi6 fl6;
1505
1506 memset(&fl6, 0, sizeof(fl6));
e374c618 1507 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1508 fl6.flowi6_oif = oif;
1509 fl6.flowi6_mark = mark;
c92a59ec
DJ
1510 fl6.daddr = msg->dest;
1511 fl6.saddr = iph->daddr;
1512
b55b76b2
DJ
1513 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1514 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1515 dst_release(dst);
1516}
1517
3a5ad2ee
DM
1518void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1519{
1520 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1521}
1522EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1523
0dbaee3b 1524static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1525{
0dbaee3b
DM
1526 struct net_device *dev = dst->dev;
1527 unsigned int mtu = dst_mtu(dst);
1528 struct net *net = dev_net(dev);
1529
1da177e4
LT
1530 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1531
5578689a
DL
1532 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1533 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1534
1535 /*
1ab1457c
YH
1536 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1537 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1538 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1539 * rely only on pmtu discovery"
1540 */
1541 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1542 mtu = IPV6_MAXPLEN;
1543 return mtu;
1544}
1545
ebb762f2 1546static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1547{
4b32b5ad
MKL
1548 const struct rt6_info *rt = (const struct rt6_info *)dst;
1549 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1550 struct inet6_dev *idev;
618f9bc7 1551
4b32b5ad
MKL
1552 if (mtu)
1553 goto out;
1554
1555 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1556 if (mtu)
30f78d8e 1557 goto out;
618f9bc7
SK
1558
1559 mtu = IPV6_MIN_MTU;
d33e4553
DM
1560
1561 rcu_read_lock();
1562 idev = __in6_dev_get(dst->dev);
1563 if (idev)
1564 mtu = idev->cnf.mtu6;
1565 rcu_read_unlock();
1566
30f78d8e
ED
1567out:
1568 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1569}
1570
3b00944c
YH
1571static struct dst_entry *icmp6_dst_gc_list;
1572static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1573
3b00944c 1574struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1575 struct flowi6 *fl6)
1da177e4 1576{
87a11578 1577 struct dst_entry *dst;
1da177e4
LT
1578 struct rt6_info *rt;
1579 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1580 struct net *net = dev_net(dev);
1da177e4 1581
38308473 1582 if (unlikely(!idev))
122bdf67 1583 return ERR_PTR(-ENODEV);
1da177e4 1584
ad706862 1585 rt = ip6_dst_alloc(net, dev, 0);
38308473 1586 if (unlikely(!rt)) {
1da177e4 1587 in6_dev_put(idev);
87a11578 1588 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1589 goto out;
1590 }
1591
8e2ec639
YZ
1592 rt->dst.flags |= DST_HOST;
1593 rt->dst.output = ip6_output;
d8d1f30b 1594 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1595 rt->rt6i_gateway = fl6->daddr;
87a11578 1596 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1597 rt->rt6i_dst.plen = 128;
1598 rt->rt6i_idev = idev;
14edd87d 1599 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1600
3b00944c 1601 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1602 rt->dst.next = icmp6_dst_gc_list;
1603 icmp6_dst_gc_list = &rt->dst;
3b00944c 1604 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1605
5578689a 1606 fib6_force_start_gc(net);
1da177e4 1607
87a11578
DM
1608 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1609
1da177e4 1610out:
87a11578 1611 return dst;
1da177e4
LT
1612}
1613
3d0f24a7 1614int icmp6_dst_gc(void)
1da177e4 1615{
e9476e95 1616 struct dst_entry *dst, **pprev;
3d0f24a7 1617 int more = 0;
1da177e4 1618
3b00944c
YH
1619 spin_lock_bh(&icmp6_dst_lock);
1620 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1621
1da177e4
LT
1622 while ((dst = *pprev) != NULL) {
1623 if (!atomic_read(&dst->__refcnt)) {
1624 *pprev = dst->next;
1625 dst_free(dst);
1da177e4
LT
1626 } else {
1627 pprev = &dst->next;
3d0f24a7 1628 ++more;
1da177e4
LT
1629 }
1630 }
1631
3b00944c 1632 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1633
3d0f24a7 1634 return more;
1da177e4
LT
1635}
1636
1e493d19
DM
1637static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1638 void *arg)
1639{
1640 struct dst_entry *dst, **pprev;
1641
1642 spin_lock_bh(&icmp6_dst_lock);
1643 pprev = &icmp6_dst_gc_list;
1644 while ((dst = *pprev) != NULL) {
1645 struct rt6_info *rt = (struct rt6_info *) dst;
1646 if (func(rt, arg)) {
1647 *pprev = dst->next;
1648 dst_free(dst);
1649 } else {
1650 pprev = &dst->next;
1651 }
1652 }
1653 spin_unlock_bh(&icmp6_dst_lock);
1654}
1655
569d3645 1656static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1657{
86393e52 1658 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1659 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1660 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1661 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1662 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1663 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1664 int entries;
7019b78e 1665
fc66f95c 1666 entries = dst_entries_get_fast(ops);
49a18d86 1667 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1668 entries <= rt_max_size)
1da177e4
LT
1669 goto out;
1670
6891a346 1671 net->ipv6.ip6_rt_gc_expire++;
14956643 1672 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1673 entries = dst_entries_get_slow(ops);
1674 if (entries < ops->gc_thresh)
7019b78e 1675 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1676out:
7019b78e 1677 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1678 return entries > rt_max_size;
1da177e4
LT
1679}
1680
e715b6d3
FW
1681static int ip6_convert_metrics(struct mx6_config *mxc,
1682 const struct fib6_config *cfg)
1683{
c3a8d947 1684 bool ecn_ca = false;
e715b6d3
FW
1685 struct nlattr *nla;
1686 int remaining;
1687 u32 *mp;
1688
63159f29 1689 if (!cfg->fc_mx)
e715b6d3
FW
1690 return 0;
1691
1692 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1693 if (unlikely(!mp))
1694 return -ENOMEM;
1695
1696 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1697 int type = nla_type(nla);
1bb14807 1698 u32 val;
e715b6d3 1699
1bb14807
DB
1700 if (!type)
1701 continue;
1702 if (unlikely(type > RTAX_MAX))
1703 goto err;
ea697639 1704
1bb14807
DB
1705 if (type == RTAX_CC_ALGO) {
1706 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1707
1bb14807 1708 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1709 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1710 if (val == TCP_CA_UNSPEC)
1711 goto err;
1712 } else {
1713 val = nla_get_u32(nla);
e715b6d3 1714 }
b8d3e416
DB
1715 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1716 goto err;
1bb14807
DB
1717
1718 mp[type - 1] = val;
1719 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1720 }
1721
c3a8d947
DB
1722 if (ecn_ca) {
1723 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1724 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1725 }
e715b6d3 1726
c3a8d947 1727 mxc->mx = mp;
e715b6d3
FW
1728 return 0;
1729 err:
1730 kfree(mp);
1731 return -EINVAL;
1732}
1da177e4 1733
8c5b83f0 1734static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1735{
5578689a 1736 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1737 struct rt6_info *rt = NULL;
1738 struct net_device *dev = NULL;
1739 struct inet6_dev *idev = NULL;
c71099ac 1740 struct fib6_table *table;
1da177e4 1741 int addr_type;
8c5b83f0 1742 int err = -EINVAL;
1da177e4 1743
86872cb5 1744 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1745 goto out;
1da177e4 1746#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1747 if (cfg->fc_src_len)
8c5b83f0 1748 goto out;
1da177e4 1749#endif
86872cb5 1750 if (cfg->fc_ifindex) {
1da177e4 1751 err = -ENODEV;
5578689a 1752 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1753 if (!dev)
1754 goto out;
1755 idev = in6_dev_get(dev);
1756 if (!idev)
1757 goto out;
1758 }
1759
86872cb5
TG
1760 if (cfg->fc_metric == 0)
1761 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1762
d71314b4 1763 err = -ENOBUFS;
38308473
DM
1764 if (cfg->fc_nlinfo.nlh &&
1765 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1766 table = fib6_get_table(net, cfg->fc_table);
38308473 1767 if (!table) {
f3213831 1768 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1769 table = fib6_new_table(net, cfg->fc_table);
1770 }
1771 } else {
1772 table = fib6_new_table(net, cfg->fc_table);
1773 }
38308473
DM
1774
1775 if (!table)
c71099ac 1776 goto out;
c71099ac 1777
ad706862
MKL
1778 rt = ip6_dst_alloc(net, NULL,
1779 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1780
38308473 1781 if (!rt) {
1da177e4
LT
1782 err = -ENOMEM;
1783 goto out;
1784 }
1785
1716a961
G
1786 if (cfg->fc_flags & RTF_EXPIRES)
1787 rt6_set_expires(rt, jiffies +
1788 clock_t_to_jiffies(cfg->fc_expires));
1789 else
1790 rt6_clean_expires(rt);
1da177e4 1791
86872cb5
TG
1792 if (cfg->fc_protocol == RTPROT_UNSPEC)
1793 cfg->fc_protocol = RTPROT_BOOT;
1794 rt->rt6i_protocol = cfg->fc_protocol;
1795
1796 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1797
1798 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1799 rt->dst.input = ip6_mc_input;
ab79ad14
1800 else if (cfg->fc_flags & RTF_LOCAL)
1801 rt->dst.input = ip6_input;
1da177e4 1802 else
d8d1f30b 1803 rt->dst.input = ip6_forward;
1da177e4 1804
d8d1f30b 1805 rt->dst.output = ip6_output;
1da177e4 1806
19e42e45
RP
1807 if (cfg->fc_encap) {
1808 struct lwtunnel_state *lwtstate;
1809
1810 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1811 cfg->fc_encap, AF_INET6, cfg,
1812 &lwtstate);
19e42e45
RP
1813 if (err)
1814 goto out;
61adedf3
JB
1815 rt->dst.lwtstate = lwtstate_get(lwtstate);
1816 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1817 rt->dst.lwtstate->orig_output = rt->dst.output;
1818 rt->dst.output = lwtunnel_output;
25368623 1819 }
61adedf3
JB
1820 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1821 rt->dst.lwtstate->orig_input = rt->dst.input;
1822 rt->dst.input = lwtunnel_input;
25368623 1823 }
19e42e45
RP
1824 }
1825
86872cb5
TG
1826 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1827 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1828 if (rt->rt6i_dst.plen == 128)
e5fd387a 1829 rt->dst.flags |= DST_HOST;
e5fd387a 1830
1da177e4 1831#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1832 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1833 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1834#endif
1835
86872cb5 1836 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1837
1838 /* We cannot add true routes via loopback here,
1839 they would result in kernel looping; promote them to reject routes
1840 */
86872cb5 1841 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1842 (dev && (dev->flags & IFF_LOOPBACK) &&
1843 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1844 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1845 /* hold loopback dev/idev if we haven't done so. */
5578689a 1846 if (dev != net->loopback_dev) {
1da177e4
LT
1847 if (dev) {
1848 dev_put(dev);
1849 in6_dev_put(idev);
1850 }
5578689a 1851 dev = net->loopback_dev;
1da177e4
LT
1852 dev_hold(dev);
1853 idev = in6_dev_get(dev);
1854 if (!idev) {
1855 err = -ENODEV;
1856 goto out;
1857 }
1858 }
1da177e4 1859 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1860 switch (cfg->fc_type) {
1861 case RTN_BLACKHOLE:
1862 rt->dst.error = -EINVAL;
ede2059d 1863 rt->dst.output = dst_discard_out;
7150aede 1864 rt->dst.input = dst_discard;
ef2c7d7b
ND
1865 break;
1866 case RTN_PROHIBIT:
1867 rt->dst.error = -EACCES;
7150aede
K
1868 rt->dst.output = ip6_pkt_prohibit_out;
1869 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1870 break;
b4949ab2 1871 case RTN_THROW:
0315e382 1872 case RTN_UNREACHABLE:
ef2c7d7b 1873 default:
7150aede 1874 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1875 : (cfg->fc_type == RTN_UNREACHABLE)
1876 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1877 rt->dst.output = ip6_pkt_discard_out;
1878 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1879 break;
1880 }
1da177e4
LT
1881 goto install_route;
1882 }
1883
86872cb5 1884 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1885 const struct in6_addr *gw_addr;
1da177e4
LT
1886 int gwa_type;
1887
86872cb5 1888 gw_addr = &cfg->fc_gateway;
330567b7 1889 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1890
1891 /* if gw_addr is local we will fail to detect this in case
1892 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1893 * will return already-added prefix route via interface that
1894 * prefix route was assigned to, which might be non-loopback.
1895 */
1896 err = -EINVAL;
330567b7
FW
1897 if (ipv6_chk_addr_and_flags(net, gw_addr,
1898 gwa_type & IPV6_ADDR_LINKLOCAL ?
1899 dev : NULL, 0, 0))
48ed7b26
FW
1900 goto out;
1901
4e3fd7a0 1902 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1903
1904 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1905 struct rt6_info *grt;
1906
1907 /* IPv6 strictly inhibits using not link-local
1908 addresses as nexthop address.
1909 Otherwise, router will not able to send redirects.
1910 It is very good, but in some (rare!) circumstances
1911 (SIT, PtP, NBMA NOARP links) it is handy to allow
1912 some exceptions. --ANK
1913 */
38308473 1914 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1915 goto out;
1916
5578689a 1917 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1918
1919 err = -EHOSTUNREACH;
38308473 1920 if (!grt)
1da177e4
LT
1921 goto out;
1922 if (dev) {
d1918542 1923 if (dev != grt->dst.dev) {
94e187c0 1924 ip6_rt_put(grt);
1da177e4
LT
1925 goto out;
1926 }
1927 } else {
d1918542 1928 dev = grt->dst.dev;
1da177e4
LT
1929 idev = grt->rt6i_idev;
1930 dev_hold(dev);
1931 in6_dev_hold(grt->rt6i_idev);
1932 }
38308473 1933 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1934 err = 0;
94e187c0 1935 ip6_rt_put(grt);
1da177e4
LT
1936
1937 if (err)
1938 goto out;
1939 }
1940 err = -EINVAL;
38308473 1941 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1942 goto out;
1943 }
1944
1945 err = -ENODEV;
38308473 1946 if (!dev)
1da177e4
LT
1947 goto out;
1948
c3968a85
DW
1949 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1950 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1951 err = -EINVAL;
1952 goto out;
1953 }
4e3fd7a0 1954 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1955 rt->rt6i_prefsrc.plen = 128;
1956 } else
1957 rt->rt6i_prefsrc.plen = 0;
1958
86872cb5 1959 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1960
1961install_route:
d8d1f30b 1962 rt->dst.dev = dev;
1da177e4 1963 rt->rt6i_idev = idev;
c71099ac 1964 rt->rt6i_table = table;
63152fc0 1965
c346dca1 1966 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1967
8c5b83f0 1968 return rt;
6b9ea5a6
RP
1969out:
1970 if (dev)
1971 dev_put(dev);
1972 if (idev)
1973 in6_dev_put(idev);
1974 if (rt)
1975 dst_free(&rt->dst);
1976
8c5b83f0 1977 return ERR_PTR(err);
6b9ea5a6
RP
1978}
1979
1980int ip6_route_add(struct fib6_config *cfg)
1981{
1982 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 1983 struct rt6_info *rt;
6b9ea5a6
RP
1984 int err;
1985
8c5b83f0
RP
1986 rt = ip6_route_info_create(cfg);
1987 if (IS_ERR(rt)) {
1988 err = PTR_ERR(rt);
1989 rt = NULL;
6b9ea5a6 1990 goto out;
8c5b83f0 1991 }
6b9ea5a6 1992
e715b6d3
FW
1993 err = ip6_convert_metrics(&mxc, cfg);
1994 if (err)
1995 goto out;
1da177e4 1996
e715b6d3
FW
1997 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
1998
1999 kfree(mxc.mx);
6b9ea5a6 2000
e715b6d3 2001 return err;
1da177e4 2002out:
1da177e4 2003 if (rt)
d8d1f30b 2004 dst_free(&rt->dst);
6b9ea5a6 2005
1da177e4
LT
2006 return err;
2007}
2008
86872cb5 2009static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2010{
2011 int err;
c71099ac 2012 struct fib6_table *table;
d1918542 2013 struct net *net = dev_net(rt->dst.dev);
1da177e4 2014
8e3d5be7
MKL
2015 if (rt == net->ipv6.ip6_null_entry ||
2016 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2017 err = -ENOENT;
2018 goto out;
2019 }
6c813a72 2020
c71099ac
TG
2021 table = rt->rt6i_table;
2022 write_lock_bh(&table->tb6_lock);
86872cb5 2023 err = fib6_del(rt, info);
c71099ac 2024 write_unlock_bh(&table->tb6_lock);
1da177e4 2025
6825a26c 2026out:
94e187c0 2027 ip6_rt_put(rt);
1da177e4
LT
2028 return err;
2029}
2030
e0a1ad73
TG
2031int ip6_del_rt(struct rt6_info *rt)
2032{
4d1169c1 2033 struct nl_info info = {
d1918542 2034 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2035 };
528c4ceb 2036 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2037}
2038
86872cb5 2039static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2040{
c71099ac 2041 struct fib6_table *table;
1da177e4
LT
2042 struct fib6_node *fn;
2043 struct rt6_info *rt;
2044 int err = -ESRCH;
2045
5578689a 2046 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2047 if (!table)
c71099ac
TG
2048 return err;
2049
2050 read_lock_bh(&table->tb6_lock);
1da177e4 2051
c71099ac 2052 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2053 &cfg->fc_dst, cfg->fc_dst_len,
2054 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2055
1da177e4 2056 if (fn) {
d8d1f30b 2057 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2058 if ((rt->rt6i_flags & RTF_CACHE) &&
2059 !(cfg->fc_flags & RTF_CACHE))
2060 continue;
86872cb5 2061 if (cfg->fc_ifindex &&
d1918542
DM
2062 (!rt->dst.dev ||
2063 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2064 continue;
86872cb5
TG
2065 if (cfg->fc_flags & RTF_GATEWAY &&
2066 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2067 continue;
86872cb5 2068 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2069 continue;
d8d1f30b 2070 dst_hold(&rt->dst);
c71099ac 2071 read_unlock_bh(&table->tb6_lock);
1da177e4 2072
86872cb5 2073 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2074 }
2075 }
c71099ac 2076 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2077
2078 return err;
2079}
2080
6700c270 2081static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2082{
e8599ff4 2083 struct net *net = dev_net(skb->dev);
a6279458 2084 struct netevent_redirect netevent;
e8599ff4 2085 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2086 struct ndisc_options ndopts;
2087 struct inet6_dev *in6_dev;
2088 struct neighbour *neigh;
71bcdba0 2089 struct rd_msg *msg;
6e157b6a
DM
2090 int optlen, on_link;
2091 u8 *lladdr;
e8599ff4 2092
29a3cad5 2093 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2094 optlen -= sizeof(*msg);
e8599ff4
DM
2095
2096 if (optlen < 0) {
6e157b6a 2097 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2098 return;
2099 }
2100
71bcdba0 2101 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2102
71bcdba0 2103 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2104 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2105 return;
2106 }
2107
6e157b6a 2108 on_link = 0;
71bcdba0 2109 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2110 on_link = 1;
71bcdba0 2111 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2112 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2113 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2114 return;
2115 }
2116
2117 in6_dev = __in6_dev_get(skb->dev);
2118 if (!in6_dev)
2119 return;
2120 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2121 return;
2122
2123 /* RFC2461 8.1:
2124 * The IP source address of the Redirect MUST be the same as the current
2125 * first-hop router for the specified ICMP Destination Address.
2126 */
2127
71bcdba0 2128 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2129 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2130 return;
2131 }
6e157b6a
DM
2132
2133 lladdr = NULL;
e8599ff4
DM
2134 if (ndopts.nd_opts_tgt_lladdr) {
2135 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2136 skb->dev);
2137 if (!lladdr) {
2138 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2139 return;
2140 }
2141 }
2142
6e157b6a
DM
2143 rt = (struct rt6_info *) dst;
2144 if (rt == net->ipv6.ip6_null_entry) {
2145 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2146 return;
6e157b6a 2147 }
e8599ff4 2148
6e157b6a
DM
2149 /* Redirect received -> path was valid.
2150 * Look, redirects are sent only in response to data packets,
2151 * so that this nexthop apparently is reachable. --ANK
2152 */
2153 dst_confirm(&rt->dst);
a6279458 2154
71bcdba0 2155 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2156 if (!neigh)
2157 return;
a6279458 2158
1da177e4
LT
2159 /*
2160 * We have finally decided to accept it.
2161 */
2162
1ab1457c 2163 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2164 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2165 NEIGH_UPDATE_F_OVERRIDE|
2166 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2167 NEIGH_UPDATE_F_ISROUTER))
2168 );
2169
83a09abd 2170 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2171 if (!nrt)
1da177e4
LT
2172 goto out;
2173
2174 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2175 if (on_link)
2176 nrt->rt6i_flags &= ~RTF_GATEWAY;
2177
4e3fd7a0 2178 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2179
40e22e8f 2180 if (ip6_ins_rt(nrt))
1da177e4
LT
2181 goto out;
2182
d8d1f30b
CG
2183 netevent.old = &rt->dst;
2184 netevent.new = &nrt->dst;
71bcdba0 2185 netevent.daddr = &msg->dest;
60592833 2186 netevent.neigh = neigh;
8d71740c
TT
2187 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2188
38308473 2189 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2190 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2191 ip6_del_rt(rt);
1da177e4
LT
2192 }
2193
2194out:
e8599ff4 2195 neigh_release(neigh);
6e157b6a
DM
2196}
2197
1da177e4
LT
2198/*
2199 * Misc support functions
2200 */
2201
4b32b5ad
MKL
2202static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2203{
2204 BUG_ON(from->dst.from);
2205
2206 rt->rt6i_flags &= ~RTF_EXPIRES;
2207 dst_hold(&from->dst);
2208 rt->dst.from = &from->dst;
2209 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2210}
2211
83a09abd
MKL
2212static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2213{
2214 rt->dst.input = ort->dst.input;
2215 rt->dst.output = ort->dst.output;
2216 rt->rt6i_dst = ort->rt6i_dst;
2217 rt->dst.error = ort->dst.error;
2218 rt->rt6i_idev = ort->rt6i_idev;
2219 if (rt->rt6i_idev)
2220 in6_dev_hold(rt->rt6i_idev);
2221 rt->dst.lastuse = jiffies;
2222 rt->rt6i_gateway = ort->rt6i_gateway;
2223 rt->rt6i_flags = ort->rt6i_flags;
2224 rt6_set_from(rt, ort);
2225 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2226#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2227 rt->rt6i_src = ort->rt6i_src;
1da177e4 2228#endif
83a09abd
MKL
2229 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2230 rt->rt6i_table = ort->rt6i_table;
61adedf3 2231 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2232}
2233
70ceb4f5 2234#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2235static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2236 const struct in6_addr *prefix, int prefixlen,
2237 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2238{
2239 struct fib6_node *fn;
2240 struct rt6_info *rt = NULL;
c71099ac
TG
2241 struct fib6_table *table;
2242
efa2cea0 2243 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2244 if (!table)
c71099ac 2245 return NULL;
70ceb4f5 2246
5744dd9b 2247 read_lock_bh(&table->tb6_lock);
67ba4152 2248 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2249 if (!fn)
2250 goto out;
2251
d8d1f30b 2252 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2253 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2254 continue;
2255 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2256 continue;
2257 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2258 continue;
d8d1f30b 2259 dst_hold(&rt->dst);
70ceb4f5
YH
2260 break;
2261 }
2262out:
5744dd9b 2263 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2264 return rt;
2265}
2266
efa2cea0 2267static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2268 const struct in6_addr *prefix, int prefixlen,
2269 const struct in6_addr *gwaddr, int ifindex,
95c96174 2270 unsigned int pref)
70ceb4f5 2271{
86872cb5 2272 struct fib6_config cfg = {
238fc7ea 2273 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2274 .fc_ifindex = ifindex,
2275 .fc_dst_len = prefixlen,
2276 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2277 RTF_UP | RTF_PREF(pref),
15e47304 2278 .fc_nlinfo.portid = 0,
efa2cea0
DL
2279 .fc_nlinfo.nlh = NULL,
2280 .fc_nlinfo.nl_net = net,
86872cb5
TG
2281 };
2282
ca254490 2283 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
2284 cfg.fc_dst = *prefix;
2285 cfg.fc_gateway = *gwaddr;
70ceb4f5 2286
e317da96
YH
2287 /* We should treat it as a default route if prefix length is 0. */
2288 if (!prefixlen)
86872cb5 2289 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2290
86872cb5 2291 ip6_route_add(&cfg);
70ceb4f5 2292
efa2cea0 2293 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2294}
2295#endif
2296
b71d1d42 2297struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2298{
1da177e4 2299 struct rt6_info *rt;
c71099ac 2300 struct fib6_table *table;
1da177e4 2301
c346dca1 2302 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2303 if (!table)
c71099ac 2304 return NULL;
1da177e4 2305
5744dd9b 2306 read_lock_bh(&table->tb6_lock);
67ba4152 2307 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2308 if (dev == rt->dst.dev &&
045927ff 2309 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2310 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2311 break;
2312 }
2313 if (rt)
d8d1f30b 2314 dst_hold(&rt->dst);
5744dd9b 2315 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2316 return rt;
2317}
2318
b71d1d42 2319struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2320 struct net_device *dev,
2321 unsigned int pref)
1da177e4 2322{
86872cb5 2323 struct fib6_config cfg = {
ca254490 2324 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2325 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2326 .fc_ifindex = dev->ifindex,
2327 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2328 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2329 .fc_nlinfo.portid = 0,
5578689a 2330 .fc_nlinfo.nlh = NULL,
c346dca1 2331 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2332 };
1da177e4 2333
4e3fd7a0 2334 cfg.fc_gateway = *gwaddr;
1da177e4 2335
86872cb5 2336 ip6_route_add(&cfg);
1da177e4 2337
1da177e4
LT
2338 return rt6_get_dflt_router(gwaddr, dev);
2339}
2340
7b4da532 2341void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2342{
2343 struct rt6_info *rt;
c71099ac
TG
2344 struct fib6_table *table;
2345
2346 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2347 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2348 if (!table)
c71099ac 2349 return;
1da177e4
LT
2350
2351restart:
c71099ac 2352 read_lock_bh(&table->tb6_lock);
d8d1f30b 2353 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2354 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2355 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2356 dst_hold(&rt->dst);
c71099ac 2357 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2358 ip6_del_rt(rt);
1da177e4
LT
2359 goto restart;
2360 }
2361 }
c71099ac 2362 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2363}
2364
5578689a
DL
2365static void rtmsg_to_fib6_config(struct net *net,
2366 struct in6_rtmsg *rtmsg,
86872cb5
TG
2367 struct fib6_config *cfg)
2368{
2369 memset(cfg, 0, sizeof(*cfg));
2370
ca254490
DA
2371 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2372 : RT6_TABLE_MAIN;
86872cb5
TG
2373 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2374 cfg->fc_metric = rtmsg->rtmsg_metric;
2375 cfg->fc_expires = rtmsg->rtmsg_info;
2376 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2377 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2378 cfg->fc_flags = rtmsg->rtmsg_flags;
2379
5578689a 2380 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2381
4e3fd7a0
AD
2382 cfg->fc_dst = rtmsg->rtmsg_dst;
2383 cfg->fc_src = rtmsg->rtmsg_src;
2384 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2385}
2386
5578689a 2387int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2388{
86872cb5 2389 struct fib6_config cfg;
1da177e4
LT
2390 struct in6_rtmsg rtmsg;
2391 int err;
2392
67ba4152 2393 switch (cmd) {
1da177e4
LT
2394 case SIOCADDRT: /* Add a route */
2395 case SIOCDELRT: /* Delete a route */
af31f412 2396 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2397 return -EPERM;
2398 err = copy_from_user(&rtmsg, arg,
2399 sizeof(struct in6_rtmsg));
2400 if (err)
2401 return -EFAULT;
86872cb5 2402
5578689a 2403 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2404
1da177e4
LT
2405 rtnl_lock();
2406 switch (cmd) {
2407 case SIOCADDRT:
86872cb5 2408 err = ip6_route_add(&cfg);
1da177e4
LT
2409 break;
2410 case SIOCDELRT:
86872cb5 2411 err = ip6_route_del(&cfg);
1da177e4
LT
2412 break;
2413 default:
2414 err = -EINVAL;
2415 }
2416 rtnl_unlock();
2417
2418 return err;
3ff50b79 2419 }
1da177e4
LT
2420
2421 return -EINVAL;
2422}
2423
2424/*
2425 * Drop the packet on the floor
2426 */
2427
d5fdd6ba 2428static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2429{
612f09e8 2430 int type;
adf30907 2431 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2432 switch (ipstats_mib_noroutes) {
2433 case IPSTATS_MIB_INNOROUTES:
0660e03f 2434 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2435 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2436 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2437 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2438 break;
2439 }
2440 /* FALLTHROUGH */
2441 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2442 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2443 ipstats_mib_noroutes);
612f09e8
YH
2444 break;
2445 }
3ffe533c 2446 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2447 kfree_skb(skb);
2448 return 0;
2449}
2450
9ce8ade0
TG
2451static int ip6_pkt_discard(struct sk_buff *skb)
2452{
612f09e8 2453 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2454}
2455
ede2059d 2456static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2457{
adf30907 2458 skb->dev = skb_dst(skb)->dev;
612f09e8 2459 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2460}
2461
9ce8ade0
TG
2462static int ip6_pkt_prohibit(struct sk_buff *skb)
2463{
612f09e8 2464 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2465}
2466
ede2059d 2467static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2468{
adf30907 2469 skb->dev = skb_dst(skb)->dev;
612f09e8 2470 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2471}
2472
1da177e4
LT
2473/*
2474 * Allocate a dst for local (unicast / anycast) address.
2475 */
2476
2477struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2478 const struct in6_addr *addr,
8f031519 2479 bool anycast)
1da177e4 2480{
ca254490 2481 u32 tb_id;
c346dca1 2482 struct net *net = dev_net(idev->dev);
a3300ef4 2483 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2484 DST_NOCOUNT);
a3300ef4 2485 if (!rt)
1da177e4
LT
2486 return ERR_PTR(-ENOMEM);
2487
1da177e4
LT
2488 in6_dev_hold(idev);
2489
11d53b49 2490 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2491 rt->dst.input = ip6_input;
2492 rt->dst.output = ip6_output;
1da177e4 2493 rt->rt6i_idev = idev;
1da177e4
LT
2494
2495 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2496 if (anycast)
2497 rt->rt6i_flags |= RTF_ANYCAST;
2498 else
1da177e4 2499 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2500
550bab42 2501 rt->rt6i_gateway = *addr;
4e3fd7a0 2502 rt->rt6i_dst.addr = *addr;
1da177e4 2503 rt->rt6i_dst.plen = 128;
ca254490
DA
2504 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2505 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2506 rt->dst.flags |= DST_NOCACHE;
1da177e4 2507
d8d1f30b 2508 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2509
2510 return rt;
2511}
2512
c3968a85
DW
2513int ip6_route_get_saddr(struct net *net,
2514 struct rt6_info *rt,
b71d1d42 2515 const struct in6_addr *daddr,
c3968a85
DW
2516 unsigned int prefs,
2517 struct in6_addr *saddr)
2518{
e16e888b
MS
2519 struct inet6_dev *idev =
2520 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2521 int err = 0;
e16e888b 2522 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2523 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2524 else
2525 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2526 daddr, prefs, saddr);
2527 return err;
2528}
2529
2530/* remove deleted ip from prefsrc entries */
2531struct arg_dev_net_ip {
2532 struct net_device *dev;
2533 struct net *net;
2534 struct in6_addr *addr;
2535};
2536
2537static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2538{
2539 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2540 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2541 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2542
d1918542 2543 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2544 rt != net->ipv6.ip6_null_entry &&
2545 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2546 /* remove prefsrc entry */
2547 rt->rt6i_prefsrc.plen = 0;
2548 }
2549 return 0;
2550}
2551
2552void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2553{
2554 struct net *net = dev_net(ifp->idev->dev);
2555 struct arg_dev_net_ip adni = {
2556 .dev = ifp->idev->dev,
2557 .net = net,
2558 .addr = &ifp->addr,
2559 };
0c3584d5 2560 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2561}
2562
be7a010d
DJ
2563#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2564#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2565
2566/* Remove routers and update dst entries when gateway turn into host. */
2567static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2568{
2569 struct in6_addr *gateway = (struct in6_addr *)arg;
2570
2571 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2572 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2573 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2574 return -1;
2575 }
2576 return 0;
2577}
2578
2579void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2580{
2581 fib6_clean_all(net, fib6_clean_tohost, gateway);
2582}
2583
8ed67789
DL
2584struct arg_dev_net {
2585 struct net_device *dev;
2586 struct net *net;
2587};
2588
1da177e4
LT
2589static int fib6_ifdown(struct rt6_info *rt, void *arg)
2590{
bc3ef660 2591 const struct arg_dev_net *adn = arg;
2592 const struct net_device *dev = adn->dev;
8ed67789 2593
d1918542 2594 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2595 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2596 return -1;
c159d30c 2597
1da177e4
LT
2598 return 0;
2599}
2600
f3db4851 2601void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2602{
8ed67789
DL
2603 struct arg_dev_net adn = {
2604 .dev = dev,
2605 .net = net,
2606 };
2607
0c3584d5 2608 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2609 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2610 if (dev)
2611 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2612}
2613
95c96174 2614struct rt6_mtu_change_arg {
1da177e4 2615 struct net_device *dev;
95c96174 2616 unsigned int mtu;
1da177e4
LT
2617};
2618
2619static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2620{
2621 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2622 struct inet6_dev *idev;
2623
2624 /* In IPv6 pmtu discovery is not optional,
2625 so that RTAX_MTU lock cannot disable it.
2626 We still use this lock to block changes
2627 caused by addrconf/ndisc.
2628 */
2629
2630 idev = __in6_dev_get(arg->dev);
38308473 2631 if (!idev)
1da177e4
LT
2632 return 0;
2633
2634 /* For administrative MTU increase, there is no way to discover
2635 IPv6 PMTU increase, so PMTU increase should be updated here.
2636 Since RFC 1981 doesn't include administrative MTU increase
2637 update PMTU increase is a MUST. (i.e. jumbo frame)
2638 */
2639 /*
2640 If new MTU is less than route PMTU, this new MTU will be the
2641 lowest MTU in the path, update the route PMTU to reflect PMTU
2642 decreases; if new MTU is greater than route PMTU, and the
2643 old MTU is the lowest MTU in the path, update the route PMTU
2644 to reflect the increase. In this case if the other nodes' MTU
2645 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2646 PMTU discouvery.
2647 */
d1918542 2648 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2649 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2650 if (rt->rt6i_flags & RTF_CACHE) {
2651 /* For RTF_CACHE with rt6i_pmtu == 0
2652 * (i.e. a redirected route),
2653 * the metrics of its rt->dst.from has already
2654 * been updated.
2655 */
2656 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2657 rt->rt6i_pmtu = arg->mtu;
2658 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2659 (dst_mtu(&rt->dst) < arg->mtu &&
2660 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2661 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2662 }
566cfd8f 2663 }
1da177e4
LT
2664 return 0;
2665}
2666
95c96174 2667void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2668{
c71099ac
TG
2669 struct rt6_mtu_change_arg arg = {
2670 .dev = dev,
2671 .mtu = mtu,
2672 };
1da177e4 2673
0c3584d5 2674 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2675}
2676
ef7c79ed 2677static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2678 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2679 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2680 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2681 [RTA_PRIORITY] = { .type = NLA_U32 },
2682 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2683 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2684 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2685 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2686 [RTA_ENCAP] = { .type = NLA_NESTED },
86872cb5
TG
2687};
2688
2689static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2690 struct fib6_config *cfg)
1da177e4 2691{
86872cb5
TG
2692 struct rtmsg *rtm;
2693 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2694 unsigned int pref;
86872cb5 2695 int err;
1da177e4 2696
86872cb5
TG
2697 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2698 if (err < 0)
2699 goto errout;
1da177e4 2700
86872cb5
TG
2701 err = -EINVAL;
2702 rtm = nlmsg_data(nlh);
2703 memset(cfg, 0, sizeof(*cfg));
2704
2705 cfg->fc_table = rtm->rtm_table;
2706 cfg->fc_dst_len = rtm->rtm_dst_len;
2707 cfg->fc_src_len = rtm->rtm_src_len;
2708 cfg->fc_flags = RTF_UP;
2709 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2710 cfg->fc_type = rtm->rtm_type;
86872cb5 2711
ef2c7d7b
ND
2712 if (rtm->rtm_type == RTN_UNREACHABLE ||
2713 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2714 rtm->rtm_type == RTN_PROHIBIT ||
2715 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2716 cfg->fc_flags |= RTF_REJECT;
2717
ab79ad14
2718 if (rtm->rtm_type == RTN_LOCAL)
2719 cfg->fc_flags |= RTF_LOCAL;
2720
1f56a01f
MKL
2721 if (rtm->rtm_flags & RTM_F_CLONED)
2722 cfg->fc_flags |= RTF_CACHE;
2723
15e47304 2724 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2725 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2726 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2727
2728 if (tb[RTA_GATEWAY]) {
67b61f6c 2729 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2730 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2731 }
86872cb5
TG
2732
2733 if (tb[RTA_DST]) {
2734 int plen = (rtm->rtm_dst_len + 7) >> 3;
2735
2736 if (nla_len(tb[RTA_DST]) < plen)
2737 goto errout;
2738
2739 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2740 }
86872cb5
TG
2741
2742 if (tb[RTA_SRC]) {
2743 int plen = (rtm->rtm_src_len + 7) >> 3;
2744
2745 if (nla_len(tb[RTA_SRC]) < plen)
2746 goto errout;
2747
2748 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2749 }
86872cb5 2750
c3968a85 2751 if (tb[RTA_PREFSRC])
67b61f6c 2752 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2753
86872cb5
TG
2754 if (tb[RTA_OIF])
2755 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2756
2757 if (tb[RTA_PRIORITY])
2758 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2759
2760 if (tb[RTA_METRICS]) {
2761 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2762 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2763 }
86872cb5
TG
2764
2765 if (tb[RTA_TABLE])
2766 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2767
51ebd318
ND
2768 if (tb[RTA_MULTIPATH]) {
2769 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2770 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2771 }
2772
c78ba6d6
LR
2773 if (tb[RTA_PREF]) {
2774 pref = nla_get_u8(tb[RTA_PREF]);
2775 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2776 pref != ICMPV6_ROUTER_PREF_HIGH)
2777 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2778 cfg->fc_flags |= RTF_PREF(pref);
2779 }
2780
19e42e45
RP
2781 if (tb[RTA_ENCAP])
2782 cfg->fc_encap = tb[RTA_ENCAP];
2783
2784 if (tb[RTA_ENCAP_TYPE])
2785 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2786
86872cb5
TG
2787 err = 0;
2788errout:
2789 return err;
1da177e4
LT
2790}
2791
6b9ea5a6
RP
2792struct rt6_nh {
2793 struct rt6_info *rt6_info;
2794 struct fib6_config r_cfg;
2795 struct mx6_config mxc;
2796 struct list_head next;
2797};
2798
2799static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2800{
2801 struct rt6_nh *nh;
2802
2803 list_for_each_entry(nh, rt6_nh_list, next) {
2804 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2805 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2806 nh->r_cfg.fc_ifindex);
2807 }
2808}
2809
2810static int ip6_route_info_append(struct list_head *rt6_nh_list,
2811 struct rt6_info *rt, struct fib6_config *r_cfg)
2812{
2813 struct rt6_nh *nh;
2814 struct rt6_info *rtnh;
2815 int err = -EEXIST;
2816
2817 list_for_each_entry(nh, rt6_nh_list, next) {
2818 /* check if rt6_info already exists */
2819 rtnh = nh->rt6_info;
2820
2821 if (rtnh->dst.dev == rt->dst.dev &&
2822 rtnh->rt6i_idev == rt->rt6i_idev &&
2823 ipv6_addr_equal(&rtnh->rt6i_gateway,
2824 &rt->rt6i_gateway))
2825 return err;
2826 }
2827
2828 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2829 if (!nh)
2830 return -ENOMEM;
2831 nh->rt6_info = rt;
2832 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2833 if (err) {
2834 kfree(nh);
2835 return err;
2836 }
2837 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2838 list_add_tail(&nh->next, rt6_nh_list);
2839
2840 return 0;
2841}
2842
2843static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2844{
2845 struct fib6_config r_cfg;
2846 struct rtnexthop *rtnh;
6b9ea5a6
RP
2847 struct rt6_info *rt;
2848 struct rt6_nh *err_nh;
2849 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2850 int remaining;
2851 int attrlen;
6b9ea5a6
RP
2852 int err = 1;
2853 int nhn = 0;
2854 int replace = (cfg->fc_nlinfo.nlh &&
2855 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2856 LIST_HEAD(rt6_nh_list);
51ebd318 2857
35f1b4e9 2858 remaining = cfg->fc_mp_len;
51ebd318 2859 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2860
6b9ea5a6
RP
2861 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2862 * rt6_info structs per nexthop
2863 */
51ebd318
ND
2864 while (rtnh_ok(rtnh, remaining)) {
2865 memcpy(&r_cfg, cfg, sizeof(*cfg));
2866 if (rtnh->rtnh_ifindex)
2867 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2868
2869 attrlen = rtnh_attrlen(rtnh);
2870 if (attrlen > 0) {
2871 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2872
2873 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2874 if (nla) {
67b61f6c 2875 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2876 r_cfg.fc_flags |= RTF_GATEWAY;
2877 }
19e42e45
RP
2878 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2879 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2880 if (nla)
2881 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2882 }
6b9ea5a6 2883
8c5b83f0
RP
2884 rt = ip6_route_info_create(&r_cfg);
2885 if (IS_ERR(rt)) {
2886 err = PTR_ERR(rt);
2887 rt = NULL;
6b9ea5a6 2888 goto cleanup;
8c5b83f0 2889 }
6b9ea5a6
RP
2890
2891 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2892 if (err) {
6b9ea5a6
RP
2893 dst_free(&rt->dst);
2894 goto cleanup;
2895 }
2896
2897 rtnh = rtnh_next(rtnh, &remaining);
2898 }
2899
2900 err_nh = NULL;
2901 list_for_each_entry(nh, &rt6_nh_list, next) {
2902 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2903 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2904 nh->rt6_info = NULL;
2905 if (err) {
2906 if (replace && nhn)
2907 ip6_print_replace_route_err(&rt6_nh_list);
2908 err_nh = nh;
2909 goto add_errout;
51ebd318 2910 }
6b9ea5a6 2911
1a72418b 2912 /* Because each route is added like a single route we remove
27596472
MK
2913 * these flags after the first nexthop: if there is a collision,
2914 * we have already failed to add the first nexthop:
2915 * fib6_add_rt2node() has rejected it; when replacing, old
2916 * nexthops have been replaced by first new, the rest should
2917 * be added to it.
1a72418b 2918 */
27596472
MK
2919 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2920 NLM_F_REPLACE);
6b9ea5a6
RP
2921 nhn++;
2922 }
2923
2924 goto cleanup;
2925
2926add_errout:
2927 /* Delete routes that were already added */
2928 list_for_each_entry(nh, &rt6_nh_list, next) {
2929 if (err_nh == nh)
2930 break;
2931 ip6_route_del(&nh->r_cfg);
2932 }
2933
2934cleanup:
2935 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2936 if (nh->rt6_info)
2937 dst_free(&nh->rt6_info->dst);
52fe51f8 2938 kfree(nh->mxc.mx);
6b9ea5a6
RP
2939 list_del(&nh->next);
2940 kfree(nh);
2941 }
2942
2943 return err;
2944}
2945
2946static int ip6_route_multipath_del(struct fib6_config *cfg)
2947{
2948 struct fib6_config r_cfg;
2949 struct rtnexthop *rtnh;
2950 int remaining;
2951 int attrlen;
2952 int err = 1, last_err = 0;
2953
2954 remaining = cfg->fc_mp_len;
2955 rtnh = (struct rtnexthop *)cfg->fc_mp;
2956
2957 /* Parse a Multipath Entry */
2958 while (rtnh_ok(rtnh, remaining)) {
2959 memcpy(&r_cfg, cfg, sizeof(*cfg));
2960 if (rtnh->rtnh_ifindex)
2961 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2962
2963 attrlen = rtnh_attrlen(rtnh);
2964 if (attrlen > 0) {
2965 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2966
2967 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2968 if (nla) {
2969 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2970 r_cfg.fc_flags |= RTF_GATEWAY;
2971 }
2972 }
2973 err = ip6_route_del(&r_cfg);
2974 if (err)
2975 last_err = err;
2976
51ebd318
ND
2977 rtnh = rtnh_next(rtnh, &remaining);
2978 }
2979
2980 return last_err;
2981}
2982
67ba4152 2983static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2984{
86872cb5
TG
2985 struct fib6_config cfg;
2986 int err;
1da177e4 2987
86872cb5
TG
2988 err = rtm_to_fib6_config(skb, nlh, &cfg);
2989 if (err < 0)
2990 return err;
2991
51ebd318 2992 if (cfg.fc_mp)
6b9ea5a6 2993 return ip6_route_multipath_del(&cfg);
51ebd318
ND
2994 else
2995 return ip6_route_del(&cfg);
1da177e4
LT
2996}
2997
67ba4152 2998static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 2999{
86872cb5
TG
3000 struct fib6_config cfg;
3001 int err;
1da177e4 3002
86872cb5
TG
3003 err = rtm_to_fib6_config(skb, nlh, &cfg);
3004 if (err < 0)
3005 return err;
3006
51ebd318 3007 if (cfg.fc_mp)
6b9ea5a6 3008 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3009 else
3010 return ip6_route_add(&cfg);
1da177e4
LT
3011}
3012
19e42e45 3013static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3014{
3015 return NLMSG_ALIGN(sizeof(struct rtmsg))
3016 + nla_total_size(16) /* RTA_SRC */
3017 + nla_total_size(16) /* RTA_DST */
3018 + nla_total_size(16) /* RTA_GATEWAY */
3019 + nla_total_size(16) /* RTA_PREFSRC */
3020 + nla_total_size(4) /* RTA_TABLE */
3021 + nla_total_size(4) /* RTA_IIF */
3022 + nla_total_size(4) /* RTA_OIF */
3023 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3024 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3025 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3026 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3027 + nla_total_size(1) /* RTA_PREF */
61adedf3 3028 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3029}
3030
191cd582
BH
3031static int rt6_fill_node(struct net *net,
3032 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3033 struct in6_addr *dst, struct in6_addr *src,
15e47304 3034 int iif, int type, u32 portid, u32 seq,
7bc570c8 3035 int prefix, int nowait, unsigned int flags)
1da177e4 3036{
4b32b5ad 3037 u32 metrics[RTAX_MAX];
1da177e4 3038 struct rtmsg *rtm;
2d7202bf 3039 struct nlmsghdr *nlh;
e3703b3d 3040 long expires;
9e762a4a 3041 u32 table;
1da177e4
LT
3042
3043 if (prefix) { /* user wants prefix routes only */
3044 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3045 /* success since this is not a prefix route */
3046 return 1;
3047 }
3048 }
3049
15e47304 3050 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3051 if (!nlh)
26932566 3052 return -EMSGSIZE;
2d7202bf
TG
3053
3054 rtm = nlmsg_data(nlh);
1da177e4
LT
3055 rtm->rtm_family = AF_INET6;
3056 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3057 rtm->rtm_src_len = rt->rt6i_src.plen;
3058 rtm->rtm_tos = 0;
c71099ac 3059 if (rt->rt6i_table)
9e762a4a 3060 table = rt->rt6i_table->tb6_id;
c71099ac 3061 else
9e762a4a
PM
3062 table = RT6_TABLE_UNSPEC;
3063 rtm->rtm_table = table;
c78679e8
DM
3064 if (nla_put_u32(skb, RTA_TABLE, table))
3065 goto nla_put_failure;
ef2c7d7b
ND
3066 if (rt->rt6i_flags & RTF_REJECT) {
3067 switch (rt->dst.error) {
3068 case -EINVAL:
3069 rtm->rtm_type = RTN_BLACKHOLE;
3070 break;
3071 case -EACCES:
3072 rtm->rtm_type = RTN_PROHIBIT;
3073 break;
b4949ab2
ND
3074 case -EAGAIN:
3075 rtm->rtm_type = RTN_THROW;
3076 break;
ef2c7d7b
ND
3077 default:
3078 rtm->rtm_type = RTN_UNREACHABLE;
3079 break;
3080 }
3081 }
38308473 3082 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3083 rtm->rtm_type = RTN_LOCAL;
d1918542 3084 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3085 rtm->rtm_type = RTN_LOCAL;
3086 else
3087 rtm->rtm_type = RTN_UNICAST;
3088 rtm->rtm_flags = 0;
35103d11 3089 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3090 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3091 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3092 rtm->rtm_flags |= RTNH_F_DEAD;
3093 }
1da177e4
LT
3094 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3095 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3096 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3097 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3098 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3099 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3100 rtm->rtm_protocol = RTPROT_RA;
3101 else
3102 rtm->rtm_protocol = RTPROT_KERNEL;
3103 }
1da177e4 3104
38308473 3105 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3106 rtm->rtm_flags |= RTM_F_CLONED;
3107
3108 if (dst) {
930345ea 3109 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3110 goto nla_put_failure;
1ab1457c 3111 rtm->rtm_dst_len = 128;
1da177e4 3112 } else if (rtm->rtm_dst_len)
930345ea 3113 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3114 goto nla_put_failure;
1da177e4
LT
3115#ifdef CONFIG_IPV6_SUBTREES
3116 if (src) {
930345ea 3117 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3118 goto nla_put_failure;
1ab1457c 3119 rtm->rtm_src_len = 128;
c78679e8 3120 } else if (rtm->rtm_src_len &&
930345ea 3121 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3122 goto nla_put_failure;
1da177e4 3123#endif
7bc570c8
YH
3124 if (iif) {
3125#ifdef CONFIG_IPV6_MROUTE
3126 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3127 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3128 if (err <= 0) {
3129 if (!nowait) {
3130 if (err == 0)
3131 return 0;
3132 goto nla_put_failure;
3133 } else {
3134 if (err == -EMSGSIZE)
3135 goto nla_put_failure;
3136 }
3137 }
3138 } else
3139#endif
c78679e8
DM
3140 if (nla_put_u32(skb, RTA_IIF, iif))
3141 goto nla_put_failure;
7bc570c8 3142 } else if (dst) {
1da177e4 3143 struct in6_addr saddr_buf;
c78679e8 3144 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3145 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3146 goto nla_put_failure;
1da177e4 3147 }
2d7202bf 3148
c3968a85
DW
3149 if (rt->rt6i_prefsrc.plen) {
3150 struct in6_addr saddr_buf;
4e3fd7a0 3151 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3152 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3153 goto nla_put_failure;
c3968a85
DW
3154 }
3155
4b32b5ad
MKL
3156 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3157 if (rt->rt6i_pmtu)
3158 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3159 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3160 goto nla_put_failure;
3161
dd0cbf29 3162 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3163 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3164 goto nla_put_failure;
94f826b8 3165 }
2d7202bf 3166
c78679e8
DM
3167 if (rt->dst.dev &&
3168 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3169 goto nla_put_failure;
3170 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3171 goto nla_put_failure;
8253947e
LW
3172
3173 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3174
87a50699 3175 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3176 goto nla_put_failure;
2d7202bf 3177
c78ba6d6
LR
3178 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3179 goto nla_put_failure;
3180
61adedf3 3181 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3182
053c095a
JB
3183 nlmsg_end(skb, nlh);
3184 return 0;
2d7202bf
TG
3185
3186nla_put_failure:
26932566
PM
3187 nlmsg_cancel(skb, nlh);
3188 return -EMSGSIZE;
1da177e4
LT
3189}
3190
1b43af54 3191int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3192{
3193 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3194 int prefix;
3195
2d7202bf
TG
3196 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3197 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3198 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3199 } else
3200 prefix = 0;
3201
191cd582
BH
3202 return rt6_fill_node(arg->net,
3203 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3204 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3205 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3206}
3207
67ba4152 3208static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3209{
3b1e0a65 3210 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3211 struct nlattr *tb[RTA_MAX+1];
3212 struct rt6_info *rt;
1da177e4 3213 struct sk_buff *skb;
ab364a6f 3214 struct rtmsg *rtm;
4c9483b2 3215 struct flowi6 fl6;
72331bc0 3216 int err, iif = 0, oif = 0;
1da177e4 3217
ab364a6f
TG
3218 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3219 if (err < 0)
3220 goto errout;
1da177e4 3221
ab364a6f 3222 err = -EINVAL;
4c9483b2 3223 memset(&fl6, 0, sizeof(fl6));
1da177e4 3224
ab364a6f
TG
3225 if (tb[RTA_SRC]) {
3226 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3227 goto errout;
3228
4e3fd7a0 3229 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3230 }
3231
3232 if (tb[RTA_DST]) {
3233 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3234 goto errout;
3235
4e3fd7a0 3236 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3237 }
3238
3239 if (tb[RTA_IIF])
3240 iif = nla_get_u32(tb[RTA_IIF]);
3241
3242 if (tb[RTA_OIF])
72331bc0 3243 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3244
2e47b291
LC
3245 if (tb[RTA_MARK])
3246 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3247
1da177e4
LT
3248 if (iif) {
3249 struct net_device *dev;
72331bc0
SL
3250 int flags = 0;
3251
5578689a 3252 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3253 if (!dev) {
3254 err = -ENODEV;
ab364a6f 3255 goto errout;
1da177e4 3256 }
72331bc0
SL
3257
3258 fl6.flowi6_iif = iif;
3259
3260 if (!ipv6_addr_any(&fl6.saddr))
3261 flags |= RT6_LOOKUP_F_HAS_SADDR;
3262
3263 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3264 flags);
3265 } else {
3266 fl6.flowi6_oif = oif;
3267
ca254490
DA
3268 if (netif_index_is_l3_master(net, oif)) {
3269 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3270 FLOWI_FLAG_SKIP_NH_OIF;
3271 }
3272
72331bc0 3273 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3274 }
3275
ab364a6f 3276 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3277 if (!skb) {
94e187c0 3278 ip6_rt_put(rt);
ab364a6f
TG
3279 err = -ENOBUFS;
3280 goto errout;
3281 }
1da177e4 3282
ab364a6f
TG
3283 /* Reserve room for dummy headers, this skb can pass
3284 through good chunk of routing engine.
3285 */
459a98ed 3286 skb_reset_mac_header(skb);
ab364a6f 3287 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3288
d8d1f30b 3289 skb_dst_set(skb, &rt->dst);
1da177e4 3290
4c9483b2 3291 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3292 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3293 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3294 if (err < 0) {
ab364a6f
TG
3295 kfree_skb(skb);
3296 goto errout;
1da177e4
LT
3297 }
3298
15e47304 3299 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3300errout:
1da177e4 3301 return err;
1da177e4
LT
3302}
3303
37a1d361
RP
3304void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3305 unsigned int nlm_flags)
1da177e4
LT
3306{
3307 struct sk_buff *skb;
5578689a 3308 struct net *net = info->nl_net;
528c4ceb
DL
3309 u32 seq;
3310 int err;
3311
3312 err = -ENOBUFS;
38308473 3313 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3314
19e42e45 3315 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3316 if (!skb)
21713ebc
TG
3317 goto errout;
3318
191cd582 3319 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3320 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3321 if (err < 0) {
3322 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3323 WARN_ON(err == -EMSGSIZE);
3324 kfree_skb(skb);
3325 goto errout;
3326 }
15e47304 3327 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3328 info->nlh, gfp_any());
3329 return;
21713ebc
TG
3330errout:
3331 if (err < 0)
5578689a 3332 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3333}
3334
8ed67789 3335static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3336 unsigned long event, void *ptr)
8ed67789 3337{
351638e7 3338 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3339 struct net *net = dev_net(dev);
8ed67789
DL
3340
3341 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3342 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3343 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3344#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3345 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3346 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3347 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3348 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3349#endif
3350 }
3351
3352 return NOTIFY_OK;
3353}
3354
1da177e4
LT
3355/*
3356 * /proc
3357 */
3358
3359#ifdef CONFIG_PROC_FS
3360
33120b30
AD
3361static const struct file_operations ipv6_route_proc_fops = {
3362 .owner = THIS_MODULE,
3363 .open = ipv6_route_open,
3364 .read = seq_read,
3365 .llseek = seq_lseek,
8d2ca1d7 3366 .release = seq_release_net,
33120b30
AD
3367};
3368
1da177e4
LT
3369static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3370{
69ddb805 3371 struct net *net = (struct net *)seq->private;
1da177e4 3372 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3373 net->ipv6.rt6_stats->fib_nodes,
3374 net->ipv6.rt6_stats->fib_route_nodes,
3375 net->ipv6.rt6_stats->fib_rt_alloc,
3376 net->ipv6.rt6_stats->fib_rt_entries,
3377 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3378 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3379 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3380
3381 return 0;
3382}
3383
3384static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3385{
de05c557 3386 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3387}
3388
9a32144e 3389static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3390 .owner = THIS_MODULE,
3391 .open = rt6_stats_seq_open,
3392 .read = seq_read,
3393 .llseek = seq_lseek,
b6fcbdb4 3394 .release = single_release_net,
1da177e4
LT
3395};
3396#endif /* CONFIG_PROC_FS */
3397
3398#ifdef CONFIG_SYSCTL
3399
1da177e4 3400static
fe2c6338 3401int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3402 void __user *buffer, size_t *lenp, loff_t *ppos)
3403{
c486da34
LAG
3404 struct net *net;
3405 int delay;
3406 if (!write)
1da177e4 3407 return -EINVAL;
c486da34
LAG
3408
3409 net = (struct net *)ctl->extra1;
3410 delay = net->ipv6.sysctl.flush_delay;
3411 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3412 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3413 return 0;
1da177e4
LT
3414}
3415
fe2c6338 3416struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3417 {
1da177e4 3418 .procname = "flush",
4990509f 3419 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3420 .maxlen = sizeof(int),
89c8b3a1 3421 .mode = 0200,
6d9f239a 3422 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3423 },
3424 {
1da177e4 3425 .procname = "gc_thresh",
9a7ec3a9 3426 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3427 .maxlen = sizeof(int),
3428 .mode = 0644,
6d9f239a 3429 .proc_handler = proc_dointvec,
1da177e4
LT
3430 },
3431 {
1da177e4 3432 .procname = "max_size",
4990509f 3433 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3434 .maxlen = sizeof(int),
3435 .mode = 0644,
6d9f239a 3436 .proc_handler = proc_dointvec,
1da177e4
LT
3437 },
3438 {
1da177e4 3439 .procname = "gc_min_interval",
4990509f 3440 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3441 .maxlen = sizeof(int),
3442 .mode = 0644,
6d9f239a 3443 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3444 },
3445 {
1da177e4 3446 .procname = "gc_timeout",
4990509f 3447 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3448 .maxlen = sizeof(int),
3449 .mode = 0644,
6d9f239a 3450 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3451 },
3452 {
1da177e4 3453 .procname = "gc_interval",
4990509f 3454 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3455 .maxlen = sizeof(int),
3456 .mode = 0644,
6d9f239a 3457 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3458 },
3459 {
1da177e4 3460 .procname = "gc_elasticity",
4990509f 3461 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3462 .maxlen = sizeof(int),
3463 .mode = 0644,
f3d3f616 3464 .proc_handler = proc_dointvec,
1da177e4
LT
3465 },
3466 {
1da177e4 3467 .procname = "mtu_expires",
4990509f 3468 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3469 .maxlen = sizeof(int),
3470 .mode = 0644,
6d9f239a 3471 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3472 },
3473 {
1da177e4 3474 .procname = "min_adv_mss",
4990509f 3475 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3476 .maxlen = sizeof(int),
3477 .mode = 0644,
f3d3f616 3478 .proc_handler = proc_dointvec,
1da177e4
LT
3479 },
3480 {
1da177e4 3481 .procname = "gc_min_interval_ms",
4990509f 3482 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3483 .maxlen = sizeof(int),
3484 .mode = 0644,
6d9f239a 3485 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3486 },
f8572d8f 3487 { }
1da177e4
LT
3488};
3489
2c8c1e72 3490struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3491{
3492 struct ctl_table *table;
3493
3494 table = kmemdup(ipv6_route_table_template,
3495 sizeof(ipv6_route_table_template),
3496 GFP_KERNEL);
5ee09105
YH
3497
3498 if (table) {
3499 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3500 table[0].extra1 = net;
86393e52 3501 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3502 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3503 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3504 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3505 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3506 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3507 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3508 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3509 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3510
3511 /* Don't export sysctls to unprivileged users */
3512 if (net->user_ns != &init_user_ns)
3513 table[0].procname = NULL;
5ee09105
YH
3514 }
3515
760f2d01
DL
3516 return table;
3517}
1da177e4
LT
3518#endif
3519
2c8c1e72 3520static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3521{
633d424b 3522 int ret = -ENOMEM;
8ed67789 3523
86393e52
AD
3524 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3525 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3526
fc66f95c
ED
3527 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3528 goto out_ip6_dst_ops;
3529
8ed67789
DL
3530 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3531 sizeof(*net->ipv6.ip6_null_entry),
3532 GFP_KERNEL);
3533 if (!net->ipv6.ip6_null_entry)
fc66f95c 3534 goto out_ip6_dst_entries;
d8d1f30b 3535 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3536 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3537 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3538 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3539 ip6_template_metrics, true);
8ed67789
DL
3540
3541#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3542 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3543 sizeof(*net->ipv6.ip6_prohibit_entry),
3544 GFP_KERNEL);
68fffc67
PZ
3545 if (!net->ipv6.ip6_prohibit_entry)
3546 goto out_ip6_null_entry;
d8d1f30b 3547 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3548 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3549 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3550 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3551 ip6_template_metrics, true);
8ed67789
DL
3552
3553 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3554 sizeof(*net->ipv6.ip6_blk_hole_entry),
3555 GFP_KERNEL);
68fffc67
PZ
3556 if (!net->ipv6.ip6_blk_hole_entry)
3557 goto out_ip6_prohibit_entry;
d8d1f30b 3558 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3559 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3560 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3561 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3562 ip6_template_metrics, true);
8ed67789
DL
3563#endif
3564
b339a47c
PZ
3565 net->ipv6.sysctl.flush_delay = 0;
3566 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3567 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3568 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3569 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3570 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3571 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3572 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3573
6891a346
BT
3574 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3575
8ed67789
DL
3576 ret = 0;
3577out:
3578 return ret;
f2fc6a54 3579
68fffc67
PZ
3580#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3581out_ip6_prohibit_entry:
3582 kfree(net->ipv6.ip6_prohibit_entry);
3583out_ip6_null_entry:
3584 kfree(net->ipv6.ip6_null_entry);
3585#endif
fc66f95c
ED
3586out_ip6_dst_entries:
3587 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3588out_ip6_dst_ops:
f2fc6a54 3589 goto out;
cdb18761
DL
3590}
3591
2c8c1e72 3592static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3593{
8ed67789
DL
3594 kfree(net->ipv6.ip6_null_entry);
3595#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3596 kfree(net->ipv6.ip6_prohibit_entry);
3597 kfree(net->ipv6.ip6_blk_hole_entry);
3598#endif
41bb78b4 3599 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3600}
3601
d189634e
TG
3602static int __net_init ip6_route_net_init_late(struct net *net)
3603{
3604#ifdef CONFIG_PROC_FS
d4beaa66
G
3605 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3606 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3607#endif
3608 return 0;
3609}
3610
3611static void __net_exit ip6_route_net_exit_late(struct net *net)
3612{
3613#ifdef CONFIG_PROC_FS
ece31ffd
G
3614 remove_proc_entry("ipv6_route", net->proc_net);
3615 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3616#endif
3617}
3618
cdb18761
DL
3619static struct pernet_operations ip6_route_net_ops = {
3620 .init = ip6_route_net_init,
3621 .exit = ip6_route_net_exit,
3622};
3623
c3426b47
DM
3624static int __net_init ipv6_inetpeer_init(struct net *net)
3625{
3626 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3627
3628 if (!bp)
3629 return -ENOMEM;
3630 inet_peer_base_init(bp);
3631 net->ipv6.peers = bp;
3632 return 0;
3633}
3634
3635static void __net_exit ipv6_inetpeer_exit(struct net *net)
3636{
3637 struct inet_peer_base *bp = net->ipv6.peers;
3638
3639 net->ipv6.peers = NULL;
56a6b248 3640 inetpeer_invalidate_tree(bp);
c3426b47
DM
3641 kfree(bp);
3642}
3643
2b823f72 3644static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3645 .init = ipv6_inetpeer_init,
3646 .exit = ipv6_inetpeer_exit,
3647};
3648
d189634e
TG
3649static struct pernet_operations ip6_route_net_late_ops = {
3650 .init = ip6_route_net_init_late,
3651 .exit = ip6_route_net_exit_late,
3652};
3653
8ed67789
DL
3654static struct notifier_block ip6_route_dev_notifier = {
3655 .notifier_call = ip6_route_dev_notify,
3656 .priority = 0,
3657};
3658
433d49c3 3659int __init ip6_route_init(void)
1da177e4 3660{
433d49c3 3661 int ret;
8d0b94af 3662 int cpu;
433d49c3 3663
9a7ec3a9
DL
3664 ret = -ENOMEM;
3665 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3666 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3667 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3668 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3669 goto out;
14e50e57 3670
fc66f95c 3671 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3672 if (ret)
bdb3289f 3673 goto out_kmem_cache;
bdb3289f 3674
c3426b47
DM
3675 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3676 if (ret)
e8803b6c 3677 goto out_dst_entries;
2a0c451a 3678
7e52b33b
DM
3679 ret = register_pernet_subsys(&ip6_route_net_ops);
3680 if (ret)
3681 goto out_register_inetpeer;
c3426b47 3682
5dc121e9
AE
3683 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3684
8ed67789
DL
3685 /* Registering of the loopback is done before this portion of code,
3686 * the loopback reference in rt6_info will not be taken, do it
3687 * manually for init_net */
d8d1f30b 3688 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3689 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3690 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3691 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3692 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3693 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3694 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3695 #endif
e8803b6c 3696 ret = fib6_init();
433d49c3 3697 if (ret)
8ed67789 3698 goto out_register_subsys;
433d49c3 3699
433d49c3
DL
3700 ret = xfrm6_init();
3701 if (ret)
e8803b6c 3702 goto out_fib6_init;
c35b7e72 3703
433d49c3
DL
3704 ret = fib6_rules_init();
3705 if (ret)
3706 goto xfrm6_init;
7e5449c2 3707
d189634e
TG
3708 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3709 if (ret)
3710 goto fib6_rules_init;
3711
433d49c3 3712 ret = -ENOBUFS;
c7ac8679
GR
3713 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3714 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3715 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3716 goto out_register_late_subsys;
c127ea2c 3717
8ed67789 3718 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3719 if (ret)
d189634e 3720 goto out_register_late_subsys;
8ed67789 3721
8d0b94af
MKL
3722 for_each_possible_cpu(cpu) {
3723 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3724
3725 INIT_LIST_HEAD(&ul->head);
3726 spin_lock_init(&ul->lock);
3727 }
3728
433d49c3
DL
3729out:
3730 return ret;
3731
d189634e
TG
3732out_register_late_subsys:
3733 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3734fib6_rules_init:
433d49c3
DL
3735 fib6_rules_cleanup();
3736xfrm6_init:
433d49c3 3737 xfrm6_fini();
2a0c451a
TG
3738out_fib6_init:
3739 fib6_gc_cleanup();
8ed67789
DL
3740out_register_subsys:
3741 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3742out_register_inetpeer:
3743 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3744out_dst_entries:
3745 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3746out_kmem_cache:
f2fc6a54 3747 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3748 goto out;
1da177e4
LT
3749}
3750
3751void ip6_route_cleanup(void)
3752{
8ed67789 3753 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3754 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3755 fib6_rules_cleanup();
1da177e4 3756 xfrm6_fini();
1da177e4 3757 fib6_gc_cleanup();
c3426b47 3758 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3759 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3760 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3761 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3762}