]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
Merge branch 'tipc-fixes'
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4
LT
66
67#include <asm/uaccess.h>
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr, int ifindex,
95c96174 106 unsigned int pref);
efa2cea0 107static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
108 const struct in6_addr *prefix, int prefixlen,
109 const struct in6_addr *gwaddr, int ifindex);
70ceb4f5
YH
110#endif
111
8d0b94af
MKL
112struct uncached_list {
113 spinlock_t lock;
114 struct list_head head;
115};
116
117static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
118
119static void rt6_uncached_list_add(struct rt6_info *rt)
120{
121 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
122
123 rt->dst.flags |= DST_NOCACHE;
124 rt->rt6i_uncached_list = ul;
125
126 spin_lock_bh(&ul->lock);
127 list_add_tail(&rt->rt6i_uncached, &ul->head);
128 spin_unlock_bh(&ul->lock);
129}
130
131static void rt6_uncached_list_del(struct rt6_info *rt)
132{
133 if (!list_empty(&rt->rt6i_uncached)) {
134 struct uncached_list *ul = rt->rt6i_uncached_list;
135
136 spin_lock_bh(&ul->lock);
137 list_del(&rt->rt6i_uncached);
138 spin_unlock_bh(&ul->lock);
139 }
140}
141
142static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
143{
144 struct net_device *loopback_dev = net->loopback_dev;
145 int cpu;
146
e332bc67
EB
147 if (dev == loopback_dev)
148 return;
149
8d0b94af
MKL
150 for_each_possible_cpu(cpu) {
151 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
152 struct rt6_info *rt;
153
154 spin_lock_bh(&ul->lock);
155 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
156 struct inet6_dev *rt_idev = rt->rt6i_idev;
157 struct net_device *rt_dev = rt->dst.dev;
158
e332bc67 159 if (rt_idev->dev == dev) {
8d0b94af
MKL
160 rt->rt6i_idev = in6_dev_get(loopback_dev);
161 in6_dev_put(rt_idev);
162 }
163
e332bc67 164 if (rt_dev == dev) {
8d0b94af
MKL
165 rt->dst.dev = loopback_dev;
166 dev_hold(rt->dst.dev);
167 dev_put(rt_dev);
168 }
169 }
170 spin_unlock_bh(&ul->lock);
171 }
172}
173
d52d3997
MKL
174static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
175{
176 return dst_metrics_write_ptr(rt->dst.from);
177}
178
06582540
DM
179static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
180{
4b32b5ad 181 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 182
d52d3997
MKL
183 if (rt->rt6i_flags & RTF_PCPU)
184 return rt6_pcpu_cow_metrics(rt);
185 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
186 return NULL;
187 else
3b471175 188 return dst_cow_metrics_generic(dst, old);
06582540
DM
189}
190
f894cbf8
DM
191static inline const void *choose_neigh_daddr(struct rt6_info *rt,
192 struct sk_buff *skb,
193 const void *daddr)
39232973
DM
194{
195 struct in6_addr *p = &rt->rt6i_gateway;
196
a7563f34 197 if (!ipv6_addr_any(p))
39232973 198 return (const void *) p;
f894cbf8
DM
199 else if (skb)
200 return &ipv6_hdr(skb)->daddr;
39232973
DM
201 return daddr;
202}
203
f894cbf8
DM
204static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
205 struct sk_buff *skb,
206 const void *daddr)
d3aaeb38 207{
39232973
DM
208 struct rt6_info *rt = (struct rt6_info *) dst;
209 struct neighbour *n;
210
f894cbf8 211 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 212 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
213 if (n)
214 return n;
215 return neigh_create(&nd_tbl, daddr, dst->dev);
216}
217
9a7ec3a9 218static struct dst_ops ip6_dst_ops_template = {
1da177e4 219 .family = AF_INET6,
1da177e4
LT
220 .gc = ip6_dst_gc,
221 .gc_thresh = 1024,
222 .check = ip6_dst_check,
0dbaee3b 223 .default_advmss = ip6_default_advmss,
ebb762f2 224 .mtu = ip6_mtu,
06582540 225 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
226 .destroy = ip6_dst_destroy,
227 .ifdown = ip6_dst_ifdown,
228 .negative_advice = ip6_negative_advice,
229 .link_failure = ip6_link_failure,
230 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 231 .redirect = rt6_do_redirect,
9f8955cc 232 .local_out = __ip6_local_out,
d3aaeb38 233 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
234};
235
ebb762f2 236static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 237{
618f9bc7
SK
238 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
239
240 return mtu ? : dst->dev->mtu;
ec831ea7
RD
241}
242
6700c270
DM
243static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
244 struct sk_buff *skb, u32 mtu)
14e50e57
DM
245{
246}
247
6700c270
DM
248static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
249 struct sk_buff *skb)
b587ee3b
DM
250{
251}
252
14e50e57
DM
253static struct dst_ops ip6_dst_blackhole_ops = {
254 .family = AF_INET6,
14e50e57
DM
255 .destroy = ip6_dst_destroy,
256 .check = ip6_dst_check,
ebb762f2 257 .mtu = ip6_blackhole_mtu,
214f45c9 258 .default_advmss = ip6_default_advmss,
14e50e57 259 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 260 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 261 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 262 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
263};
264
62fa8a84 265static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 266 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
267};
268
fb0af4c7 269static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
270 .dst = {
271 .__refcnt = ATOMIC_INIT(1),
272 .__use = 1,
2c20cbd7 273 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 274 .error = -ENETUNREACH,
d8d1f30b
CG
275 .input = ip6_pkt_discard,
276 .output = ip6_pkt_discard_out,
1da177e4
LT
277 },
278 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 279 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
280 .rt6i_metric = ~(u32) 0,
281 .rt6i_ref = ATOMIC_INIT(1),
282};
283
101367c2
TG
284#ifdef CONFIG_IPV6_MULTIPLE_TABLES
285
fb0af4c7 286static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
287 .dst = {
288 .__refcnt = ATOMIC_INIT(1),
289 .__use = 1,
2c20cbd7 290 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 291 .error = -EACCES,
d8d1f30b
CG
292 .input = ip6_pkt_prohibit,
293 .output = ip6_pkt_prohibit_out,
101367c2
TG
294 },
295 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 296 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
297 .rt6i_metric = ~(u32) 0,
298 .rt6i_ref = ATOMIC_INIT(1),
299};
300
fb0af4c7 301static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -EINVAL,
d8d1f30b 307 .input = dst_discard,
ede2059d 308 .output = dst_discard_out,
101367c2
TG
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 311 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
312 .rt6i_metric = ~(u32) 0,
313 .rt6i_ref = ATOMIC_INIT(1),
314};
315
316#endif
317
ebfa45f0
MKL
318static void rt6_info_init(struct rt6_info *rt)
319{
320 struct dst_entry *dst = &rt->dst;
321
322 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
323 INIT_LIST_HEAD(&rt->rt6i_siblings);
324 INIT_LIST_HEAD(&rt->rt6i_uncached);
325}
326
1da177e4 327/* allocate dst with ip6_dst_ops */
d52d3997
MKL
328static struct rt6_info *__ip6_dst_alloc(struct net *net,
329 struct net_device *dev,
ad706862 330 int flags)
1da177e4 331{
97bab73f 332 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 333 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 334
ebfa45f0
MKL
335 if (rt)
336 rt6_info_init(rt);
8104891b 337
cf911662 338 return rt;
1da177e4
LT
339}
340
d52d3997
MKL
341static struct rt6_info *ip6_dst_alloc(struct net *net,
342 struct net_device *dev,
ad706862 343 int flags)
d52d3997 344{
ad706862 345 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
346
347 if (rt) {
348 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
349 if (rt->rt6i_pcpu) {
350 int cpu;
351
352 for_each_possible_cpu(cpu) {
353 struct rt6_info **p;
354
355 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
356 /* no one shares rt */
357 *p = NULL;
358 }
359 } else {
360 dst_destroy((struct dst_entry *)rt);
361 return NULL;
362 }
363 }
364
365 return rt;
366}
367
1da177e4
LT
368static void ip6_dst_destroy(struct dst_entry *dst)
369{
370 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 371 struct dst_entry *from = dst->from;
8d0b94af 372 struct inet6_dev *idev;
1da177e4 373
4b32b5ad 374 dst_destroy_metrics_generic(dst);
87775312 375 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
376 rt6_uncached_list_del(rt);
377
378 idev = rt->rt6i_idev;
38308473 379 if (idev) {
1da177e4
LT
380 rt->rt6i_idev = NULL;
381 in6_dev_put(idev);
1ab1457c 382 }
1716a961 383
ecd98837
YH
384 dst->from = NULL;
385 dst_release(from);
b3419363
DM
386}
387
1da177e4
LT
388static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
389 int how)
390{
391 struct rt6_info *rt = (struct rt6_info *)dst;
392 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 393 struct net_device *loopback_dev =
c346dca1 394 dev_net(dev)->loopback_dev;
1da177e4 395
97cac082
DM
396 if (dev != loopback_dev) {
397 if (idev && idev->dev == dev) {
398 struct inet6_dev *loopback_idev =
399 in6_dev_get(loopback_dev);
400 if (loopback_idev) {
401 rt->rt6i_idev = loopback_idev;
402 in6_dev_put(idev);
403 }
404 }
1da177e4
LT
405 }
406}
407
5973fb1e
MKL
408static bool __rt6_check_expired(const struct rt6_info *rt)
409{
410 if (rt->rt6i_flags & RTF_EXPIRES)
411 return time_after(jiffies, rt->dst.expires);
412 else
413 return false;
414}
415
a50feda5 416static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 417{
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
1716a961 421 } else if (rt->dst.from) {
3fd91fb3 422 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 423 }
a50feda5 424 return false;
1da177e4
LT
425}
426
51ebd318
ND
427/* Multipath route selection:
428 * Hash based function using packet header and flowlabel.
429 * Adapted from fib_info_hashfn()
430 */
431static int rt6_info_hash_nhsfn(unsigned int candidate_count,
432 const struct flowi6 *fl6)
433{
644d0e65 434 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
435}
436
437static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
438 struct flowi6 *fl6, int oif,
439 int strict)
51ebd318
ND
440{
441 struct rt6_info *sibling, *next_sibling;
442 int route_choosen;
443
444 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
445 /* Don't change the route, if route_choosen == 0
446 * (siblings does not include ourself)
447 */
448 if (route_choosen)
449 list_for_each_entry_safe(sibling, next_sibling,
450 &match->rt6i_siblings, rt6i_siblings) {
451 route_choosen--;
452 if (route_choosen == 0) {
52bd4c0c
ND
453 if (rt6_score_route(sibling, oif, strict) < 0)
454 break;
51ebd318
ND
455 match = sibling;
456 break;
457 }
458 }
459 return match;
460}
461
1da177e4 462/*
c71099ac 463 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
464 */
465
8ed67789
DL
466static inline struct rt6_info *rt6_device_match(struct net *net,
467 struct rt6_info *rt,
b71d1d42 468 const struct in6_addr *saddr,
1da177e4 469 int oif,
d420895e 470 int flags)
1da177e4
LT
471{
472 struct rt6_info *local = NULL;
473 struct rt6_info *sprt;
474
dd3abc4e
YH
475 if (!oif && ipv6_addr_any(saddr))
476 goto out;
477
d8d1f30b 478 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 479 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
480
481 if (oif) {
1da177e4
LT
482 if (dev->ifindex == oif)
483 return sprt;
484 if (dev->flags & IFF_LOOPBACK) {
38308473 485 if (!sprt->rt6i_idev ||
1da177e4 486 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 487 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 488 continue;
17fb0b2b
DA
489 if (local &&
490 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
491 continue;
492 }
493 local = sprt;
494 }
dd3abc4e
YH
495 } else {
496 if (ipv6_chk_addr(net, saddr, dev,
497 flags & RT6_LOOKUP_F_IFACE))
498 return sprt;
1da177e4 499 }
dd3abc4e 500 }
1da177e4 501
dd3abc4e 502 if (oif) {
1da177e4
LT
503 if (local)
504 return local;
505
d420895e 506 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 507 return net->ipv6.ip6_null_entry;
1da177e4 508 }
dd3abc4e 509out:
1da177e4
LT
510 return rt;
511}
512
27097255 513#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
514struct __rt6_probe_work {
515 struct work_struct work;
516 struct in6_addr target;
517 struct net_device *dev;
518};
519
520static void rt6_probe_deferred(struct work_struct *w)
521{
522 struct in6_addr mcaddr;
523 struct __rt6_probe_work *work =
524 container_of(w, struct __rt6_probe_work, work);
525
526 addrconf_addr_solict_mult(&work->target, &mcaddr);
304d888b 527 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL);
c2f17e82 528 dev_put(work->dev);
662f5533 529 kfree(work);
c2f17e82
HFS
530}
531
27097255
YH
532static void rt6_probe(struct rt6_info *rt)
533{
990edb42 534 struct __rt6_probe_work *work;
f2c31e32 535 struct neighbour *neigh;
27097255
YH
536 /*
537 * Okay, this does not seem to be appropriate
538 * for now, however, we need to check if it
539 * is really so; aka Router Reachability Probing.
540 *
541 * Router Reachability Probe MUST be rate-limited
542 * to no more than one per minute.
543 */
2152caea 544 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 545 return;
2152caea
YH
546 rcu_read_lock_bh();
547 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
548 if (neigh) {
8d6c31bf
MKL
549 if (neigh->nud_state & NUD_VALID)
550 goto out;
551
990edb42 552 work = NULL;
2152caea 553 write_lock(&neigh->lock);
990edb42
MKL
554 if (!(neigh->nud_state & NUD_VALID) &&
555 time_after(jiffies,
556 neigh->updated +
557 rt->rt6i_idev->cnf.rtr_probe_interval)) {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
559 if (work)
560 __neigh_set_probe_once(neigh);
c2f17e82 561 }
2152caea 562 write_unlock(&neigh->lock);
990edb42
MKL
563 } else {
564 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 565 }
990edb42
MKL
566
567 if (work) {
568 INIT_WORK(&work->work, rt6_probe_deferred);
569 work->target = rt->rt6i_gateway;
570 dev_hold(rt->dst.dev);
571 work->dev = rt->dst.dev;
572 schedule_work(&work->work);
573 }
574
8d6c31bf 575out:
2152caea 576 rcu_read_unlock_bh();
27097255
YH
577}
578#else
579static inline void rt6_probe(struct rt6_info *rt)
580{
27097255
YH
581}
582#endif
583
1da177e4 584/*
554cfb7e 585 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 586 */
b6f99a21 587static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 588{
d1918542 589 struct net_device *dev = rt->dst.dev;
161980f4 590 if (!oif || dev->ifindex == oif)
554cfb7e 591 return 2;
161980f4
DM
592 if ((dev->flags & IFF_LOOPBACK) &&
593 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
594 return 1;
595 return 0;
554cfb7e 596}
1da177e4 597
afc154e9 598static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 599{
f2c31e32 600 struct neighbour *neigh;
afc154e9 601 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 602
4d0c5911
YH
603 if (rt->rt6i_flags & RTF_NONEXTHOP ||
604 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 605 return RT6_NUD_SUCCEED;
145a3621
YH
606
607 rcu_read_lock_bh();
608 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
609 if (neigh) {
610 read_lock(&neigh->lock);
554cfb7e 611 if (neigh->nud_state & NUD_VALID)
afc154e9 612 ret = RT6_NUD_SUCCEED;
398bcbeb 613#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 614 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 615 ret = RT6_NUD_SUCCEED;
7e980569
JB
616 else
617 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 618#endif
145a3621 619 read_unlock(&neigh->lock);
afc154e9
HFS
620 } else {
621 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 622 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 623 }
145a3621
YH
624 rcu_read_unlock_bh();
625
a5a81f0b 626 return ret;
1da177e4
LT
627}
628
554cfb7e
YH
629static int rt6_score_route(struct rt6_info *rt, int oif,
630 int strict)
1da177e4 631{
a5a81f0b 632 int m;
1ab1457c 633
4d0c5911 634 m = rt6_check_dev(rt, oif);
77d16f45 635 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 636 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
637#ifdef CONFIG_IPV6_ROUTER_PREF
638 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
639#endif
afc154e9
HFS
640 if (strict & RT6_LOOKUP_F_REACHABLE) {
641 int n = rt6_check_neigh(rt);
642 if (n < 0)
643 return n;
644 }
554cfb7e
YH
645 return m;
646}
647
f11e6659 648static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
649 int *mpri, struct rt6_info *match,
650 bool *do_rr)
554cfb7e 651{
f11e6659 652 int m;
afc154e9 653 bool match_do_rr = false;
35103d11
AG
654 struct inet6_dev *idev = rt->rt6i_idev;
655 struct net_device *dev = rt->dst.dev;
656
657 if (dev && !netif_carrier_ok(dev) &&
658 idev->cnf.ignore_routes_with_linkdown)
659 goto out;
f11e6659
DM
660
661 if (rt6_check_expired(rt))
662 goto out;
663
664 m = rt6_score_route(rt, oif, strict);
7e980569 665 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
666 match_do_rr = true;
667 m = 0; /* lowest valid score */
7e980569 668 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 669 goto out;
afc154e9
HFS
670 }
671
672 if (strict & RT6_LOOKUP_F_REACHABLE)
673 rt6_probe(rt);
f11e6659 674
7e980569 675 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 676 if (m > *mpri) {
afc154e9 677 *do_rr = match_do_rr;
f11e6659
DM
678 *mpri = m;
679 match = rt;
f11e6659 680 }
f11e6659
DM
681out:
682 return match;
683}
684
685static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
686 struct rt6_info *rr_head,
afc154e9
HFS
687 u32 metric, int oif, int strict,
688 bool *do_rr)
f11e6659 689{
9fbdcfaf 690 struct rt6_info *rt, *match, *cont;
554cfb7e 691 int mpri = -1;
1da177e4 692
f11e6659 693 match = NULL;
9fbdcfaf
SK
694 cont = NULL;
695 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
696 if (rt->rt6i_metric != metric) {
697 cont = rt;
698 break;
699 }
700
701 match = find_match(rt, oif, strict, &mpri, match, do_rr);
702 }
703
704 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
705 if (rt->rt6i_metric != metric) {
706 cont = rt;
707 break;
708 }
709
afc154e9 710 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
711 }
712
713 if (match || !cont)
714 return match;
715
716 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 717 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 718
f11e6659
DM
719 return match;
720}
1da177e4 721
f11e6659
DM
722static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
723{
724 struct rt6_info *match, *rt0;
8ed67789 725 struct net *net;
afc154e9 726 bool do_rr = false;
1da177e4 727
f11e6659
DM
728 rt0 = fn->rr_ptr;
729 if (!rt0)
730 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 731
afc154e9
HFS
732 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
733 &do_rr);
1da177e4 734
afc154e9 735 if (do_rr) {
d8d1f30b 736 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 737
554cfb7e 738 /* no entries matched; do round-robin */
f11e6659
DM
739 if (!next || next->rt6i_metric != rt0->rt6i_metric)
740 next = fn->leaf;
741
742 if (next != rt0)
743 fn->rr_ptr = next;
1da177e4 744 }
1da177e4 745
d1918542 746 net = dev_net(rt0->dst.dev);
a02cec21 747 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
748}
749
8b9df265
MKL
750static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
751{
752 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
753}
754
70ceb4f5
YH
755#ifdef CONFIG_IPV6_ROUTE_INFO
756int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 757 const struct in6_addr *gwaddr)
70ceb4f5 758{
c346dca1 759 struct net *net = dev_net(dev);
70ceb4f5
YH
760 struct route_info *rinfo = (struct route_info *) opt;
761 struct in6_addr prefix_buf, *prefix;
762 unsigned int pref;
4bed72e4 763 unsigned long lifetime;
70ceb4f5
YH
764 struct rt6_info *rt;
765
766 if (len < sizeof(struct route_info)) {
767 return -EINVAL;
768 }
769
770 /* Sanity check for prefix_len and length */
771 if (rinfo->length > 3) {
772 return -EINVAL;
773 } else if (rinfo->prefix_len > 128) {
774 return -EINVAL;
775 } else if (rinfo->prefix_len > 64) {
776 if (rinfo->length < 2) {
777 return -EINVAL;
778 }
779 } else if (rinfo->prefix_len > 0) {
780 if (rinfo->length < 1) {
781 return -EINVAL;
782 }
783 }
784
785 pref = rinfo->route_pref;
786 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 787 return -EINVAL;
70ceb4f5 788
4bed72e4 789 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
790
791 if (rinfo->length == 3)
792 prefix = (struct in6_addr *)rinfo->prefix;
793 else {
794 /* this function is safe */
795 ipv6_addr_prefix(&prefix_buf,
796 (struct in6_addr *)rinfo->prefix,
797 rinfo->prefix_len);
798 prefix = &prefix_buf;
799 }
800
f104a567
DJ
801 if (rinfo->prefix_len == 0)
802 rt = rt6_get_dflt_router(gwaddr, dev);
803 else
804 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
805 gwaddr, dev->ifindex);
70ceb4f5
YH
806
807 if (rt && !lifetime) {
e0a1ad73 808 ip6_del_rt(rt);
70ceb4f5
YH
809 rt = NULL;
810 }
811
812 if (!rt && lifetime)
efa2cea0 813 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
70ceb4f5
YH
814 pref);
815 else if (rt)
816 rt->rt6i_flags = RTF_ROUTEINFO |
817 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
818
819 if (rt) {
1716a961
G
820 if (!addrconf_finite_timeout(lifetime))
821 rt6_clean_expires(rt);
822 else
823 rt6_set_expires(rt, jiffies + HZ * lifetime);
824
94e187c0 825 ip6_rt_put(rt);
70ceb4f5
YH
826 }
827 return 0;
828}
829#endif
830
a3c00e46
MKL
831static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
832 struct in6_addr *saddr)
833{
834 struct fib6_node *pn;
835 while (1) {
836 if (fn->fn_flags & RTN_TL_ROOT)
837 return NULL;
838 pn = fn->parent;
839 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
840 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
841 else
842 fn = pn;
843 if (fn->fn_flags & RTN_RTINFO)
844 return fn;
845 }
846}
c71099ac 847
8ed67789
DL
848static struct rt6_info *ip6_pol_route_lookup(struct net *net,
849 struct fib6_table *table,
4c9483b2 850 struct flowi6 *fl6, int flags)
1da177e4
LT
851{
852 struct fib6_node *fn;
853 struct rt6_info *rt;
854
c71099ac 855 read_lock_bh(&table->tb6_lock);
4c9483b2 856 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
857restart:
858 rt = fn->leaf;
4c9483b2 859 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 860 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 861 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
862 if (rt == net->ipv6.ip6_null_entry) {
863 fn = fib6_backtrack(fn, &fl6->saddr);
864 if (fn)
865 goto restart;
866 }
d8d1f30b 867 dst_use(&rt->dst, jiffies);
c71099ac 868 read_unlock_bh(&table->tb6_lock);
b811580d
DA
869
870 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
871
c71099ac
TG
872 return rt;
873
874}
875
67ba4152 876struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
877 int flags)
878{
879 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
880}
881EXPORT_SYMBOL_GPL(ip6_route_lookup);
882
9acd9f3a
YH
883struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
884 const struct in6_addr *saddr, int oif, int strict)
c71099ac 885{
4c9483b2
DM
886 struct flowi6 fl6 = {
887 .flowi6_oif = oif,
888 .daddr = *daddr,
c71099ac
TG
889 };
890 struct dst_entry *dst;
77d16f45 891 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 892
adaa70bb 893 if (saddr) {
4c9483b2 894 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
895 flags |= RT6_LOOKUP_F_HAS_SADDR;
896 }
897
4c9483b2 898 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
899 if (dst->error == 0)
900 return (struct rt6_info *) dst;
901
902 dst_release(dst);
903
1da177e4
LT
904 return NULL;
905}
7159039a
YH
906EXPORT_SYMBOL(rt6_lookup);
907
c71099ac 908/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
909 It takes new route entry, the addition fails by any reason the
910 route is freed. In any case, if caller does not hold it, it may
911 be destroyed.
912 */
913
e5fd387a 914static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 915 struct mx6_config *mxc)
1da177e4
LT
916{
917 int err;
c71099ac 918 struct fib6_table *table;
1da177e4 919
c71099ac
TG
920 table = rt->rt6i_table;
921 write_lock_bh(&table->tb6_lock);
e715b6d3 922 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 923 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
924
925 return err;
926}
927
40e22e8f
TG
928int ip6_ins_rt(struct rt6_info *rt)
929{
e715b6d3
FW
930 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
931 struct mx6_config mxc = { .mx = NULL, };
932
933 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
934}
935
8b9df265
MKL
936static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
937 const struct in6_addr *daddr,
938 const struct in6_addr *saddr)
1da177e4 939{
1da177e4
LT
940 struct rt6_info *rt;
941
942 /*
943 * Clone the route.
944 */
945
d52d3997 946 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 947 ort = (struct rt6_info *)ort->dst.from;
1da177e4 948
ad706862 949 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
950
951 if (!rt)
952 return NULL;
953
954 ip6_rt_copy_init(rt, ort);
955 rt->rt6i_flags |= RTF_CACHE;
956 rt->rt6i_metric = 0;
957 rt->dst.flags |= DST_HOST;
958 rt->rt6i_dst.addr = *daddr;
959 rt->rt6i_dst.plen = 128;
1da177e4 960
83a09abd
MKL
961 if (!rt6_is_gw_or_nonexthop(ort)) {
962 if (ort->rt6i_dst.plen != 128 &&
963 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
964 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 965#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
966 if (rt->rt6i_src.plen && saddr) {
967 rt->rt6i_src.addr = *saddr;
968 rt->rt6i_src.plen = 128;
8b9df265 969 }
83a09abd 970#endif
95a9a5ba 971 }
1da177e4 972
95a9a5ba
YH
973 return rt;
974}
1da177e4 975
d52d3997
MKL
976static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
977{
978 struct rt6_info *pcpu_rt;
979
980 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 981 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
982
983 if (!pcpu_rt)
984 return NULL;
985 ip6_rt_copy_init(pcpu_rt, rt);
986 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
987 pcpu_rt->rt6i_flags |= RTF_PCPU;
988 return pcpu_rt;
989}
990
991/* It should be called with read_lock_bh(&tb6_lock) acquired */
992static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
993{
a73e4195 994 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
995
996 p = this_cpu_ptr(rt->rt6i_pcpu);
997 pcpu_rt = *p;
998
a73e4195
MKL
999 if (pcpu_rt) {
1000 dst_hold(&pcpu_rt->dst);
1001 rt6_dst_from_metrics_check(pcpu_rt);
1002 }
1003 return pcpu_rt;
1004}
1005
1006static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1007{
9c7370a1 1008 struct fib6_table *table = rt->rt6i_table;
a73e4195 1009 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1010
1011 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1012 if (!pcpu_rt) {
1013 struct net *net = dev_net(rt->dst.dev);
1014
9c7370a1
MKL
1015 dst_hold(&net->ipv6.ip6_null_entry->dst);
1016 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1017 }
1018
9c7370a1
MKL
1019 read_lock_bh(&table->tb6_lock);
1020 if (rt->rt6i_pcpu) {
1021 p = this_cpu_ptr(rt->rt6i_pcpu);
1022 prev = cmpxchg(p, NULL, pcpu_rt);
1023 if (prev) {
1024 /* If someone did it before us, return prev instead */
1025 dst_destroy(&pcpu_rt->dst);
1026 pcpu_rt = prev;
1027 }
1028 } else {
1029 /* rt has been removed from the fib6 tree
1030 * before we have a chance to acquire the read_lock.
1031 * In this case, don't brother to create a pcpu rt
1032 * since rt is going away anyway. The next
1033 * dst_check() will trigger a re-lookup.
1034 */
d52d3997 1035 dst_destroy(&pcpu_rt->dst);
9c7370a1 1036 pcpu_rt = rt;
d52d3997 1037 }
d52d3997
MKL
1038 dst_hold(&pcpu_rt->dst);
1039 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1040 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1041 return pcpu_rt;
1042}
1043
8ed67789 1044static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
4c9483b2 1045 struct flowi6 *fl6, int flags)
1da177e4 1046{
367efcb9 1047 struct fib6_node *fn, *saved_fn;
45e4fd26 1048 struct rt6_info *rt;
c71099ac 1049 int strict = 0;
1da177e4 1050
77d16f45 1051 strict |= flags & RT6_LOOKUP_F_IFACE;
367efcb9
MKL
1052 if (net->ipv6.devconf_all->forwarding == 0)
1053 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1054
c71099ac 1055 read_lock_bh(&table->tb6_lock);
1da177e4 1056
4c9483b2 1057 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1058 saved_fn = fn;
1da177e4 1059
ca254490
DA
1060 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1061 oif = 0;
1062
a3c00e46 1063redo_rt6_select:
367efcb9 1064 rt = rt6_select(fn, oif, strict);
52bd4c0c 1065 if (rt->rt6i_nsiblings)
367efcb9 1066 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1067 if (rt == net->ipv6.ip6_null_entry) {
1068 fn = fib6_backtrack(fn, &fl6->saddr);
1069 if (fn)
1070 goto redo_rt6_select;
367efcb9
MKL
1071 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1072 /* also consider unreachable route */
1073 strict &= ~RT6_LOOKUP_F_REACHABLE;
1074 fn = saved_fn;
1075 goto redo_rt6_select;
367efcb9 1076 }
a3c00e46
MKL
1077 }
1078
fb9de91e 1079
3da59bd9 1080 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1081 dst_use(&rt->dst, jiffies);
1082 read_unlock_bh(&table->tb6_lock);
1083
1084 rt6_dst_from_metrics_check(rt);
b811580d
DA
1085
1086 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1087 return rt;
3da59bd9
MKL
1088 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1089 !(rt->rt6i_flags & RTF_GATEWAY))) {
1090 /* Create a RTF_CACHE clone which will not be
1091 * owned by the fib6 tree. It is for the special case where
1092 * the daddr in the skb during the neighbor look-up is different
1093 * from the fl6->daddr used to look-up route here.
1094 */
1095
1096 struct rt6_info *uncached_rt;
1097
d52d3997
MKL
1098 dst_use(&rt->dst, jiffies);
1099 read_unlock_bh(&table->tb6_lock);
1100
3da59bd9
MKL
1101 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1102 dst_release(&rt->dst);
c71099ac 1103
3da59bd9 1104 if (uncached_rt)
8d0b94af 1105 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1106 else
1107 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1108
3da59bd9 1109 dst_hold(&uncached_rt->dst);
b811580d
DA
1110
1111 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1112 return uncached_rt;
3da59bd9 1113
d52d3997
MKL
1114 } else {
1115 /* Get a percpu copy */
1116
1117 struct rt6_info *pcpu_rt;
1118
1119 rt->dst.lastuse = jiffies;
1120 rt->dst.__use++;
1121 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1122
9c7370a1
MKL
1123 if (pcpu_rt) {
1124 read_unlock_bh(&table->tb6_lock);
1125 } else {
1126 /* We have to do the read_unlock first
1127 * because rt6_make_pcpu_route() may trigger
1128 * ip6_dst_gc() which will take the write_lock.
1129 */
1130 dst_hold(&rt->dst);
1131 read_unlock_bh(&table->tb6_lock);
a73e4195 1132 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1133 dst_release(&rt->dst);
1134 }
d52d3997 1135
b811580d 1136 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1137 return pcpu_rt;
9c7370a1 1138
d52d3997 1139 }
1da177e4
LT
1140}
1141
8ed67789 1142static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1143 struct flowi6 *fl6, int flags)
4acad72d 1144{
4c9483b2 1145 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1146}
1147
72331bc0
SL
1148static struct dst_entry *ip6_route_input_lookup(struct net *net,
1149 struct net_device *dev,
1150 struct flowi6 *fl6, int flags)
1151{
1152 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1153 flags |= RT6_LOOKUP_F_IFACE;
1154
1155 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1156}
1157
c71099ac
TG
1158void ip6_route_input(struct sk_buff *skb)
1159{
b71d1d42 1160 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1161 struct net *net = dev_net(skb->dev);
adaa70bb 1162 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1163 struct ip_tunnel_info *tun_info;
4c9483b2 1164 struct flowi6 fl6 = {
ca254490 1165 .flowi6_iif = l3mdev_fib_oif(skb->dev),
4c9483b2
DM
1166 .daddr = iph->daddr,
1167 .saddr = iph->saddr,
6502ca52 1168 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1169 .flowi6_mark = skb->mark,
1170 .flowi6_proto = iph->nexthdr,
c71099ac 1171 };
adaa70bb 1172
904af04d 1173 tun_info = skb_tunnel_info(skb);
46fa062a 1174 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1175 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1176 skb_dst_drop(skb);
72331bc0 1177 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1178}
1179
8ed67789 1180static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1181 struct flowi6 *fl6, int flags)
1da177e4 1182{
4c9483b2 1183 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1184}
1185
6f21c96a
PA
1186struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1187 struct flowi6 *fl6, int flags)
c71099ac 1188{
ca254490 1189 struct dst_entry *dst;
d46a9d67 1190 bool any_src;
c71099ac 1191
ca254490
DA
1192 dst = l3mdev_rt6_dst_by_oif(net, fl6);
1193 if (dst)
1194 return dst;
1195
1fb9489b 1196 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1197
d46a9d67 1198 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1199 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1200 (fl6->flowi6_oif && any_src))
77d16f45 1201 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1202
d46a9d67 1203 if (!any_src)
adaa70bb 1204 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1205 else if (sk)
1206 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1207
4c9483b2 1208 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1209}
6f21c96a 1210EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1211
2774c131 1212struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1213{
5c1e6aa3 1214 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1215 struct dst_entry *new = NULL;
1216
f5b0a874 1217 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1218 if (rt) {
0a1f5962 1219 rt6_info_init(rt);
8104891b 1220
0a1f5962 1221 new = &rt->dst;
14e50e57 1222 new->__use = 1;
352e512c 1223 new->input = dst_discard;
ede2059d 1224 new->output = dst_discard_out;
14e50e57 1225
0a1f5962 1226 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1227 rt->rt6i_idev = ort->rt6i_idev;
1228 if (rt->rt6i_idev)
1229 in6_dev_hold(rt->rt6i_idev);
14e50e57 1230
4e3fd7a0 1231 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1232 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1233 rt->rt6i_metric = 0;
1234
1235 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1236#ifdef CONFIG_IPV6_SUBTREES
1237 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1238#endif
1239
1240 dst_free(new);
1241 }
1242
69ead7af
DM
1243 dst_release(dst_orig);
1244 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1245}
14e50e57 1246
1da177e4
LT
1247/*
1248 * Destination cache support functions
1249 */
1250
4b32b5ad
MKL
1251static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1252{
1253 if (rt->dst.from &&
1254 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1255 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1256}
1257
3da59bd9
MKL
1258static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1259{
1260 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1261 return NULL;
1262
1263 if (rt6_check_expired(rt))
1264 return NULL;
1265
1266 return &rt->dst;
1267}
1268
1269static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1270{
5973fb1e
MKL
1271 if (!__rt6_check_expired(rt) &&
1272 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1273 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1274 return &rt->dst;
1275 else
1276 return NULL;
1277}
1278
1da177e4
LT
1279static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1280{
1281 struct rt6_info *rt;
1282
1283 rt = (struct rt6_info *) dst;
1284
6f3118b5
ND
1285 /* All IPV6 dsts are created with ->obsolete set to the value
1286 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1287 * into this function always.
1288 */
e3bc10bd 1289
4b32b5ad
MKL
1290 rt6_dst_from_metrics_check(rt);
1291
02bcf4e0
MKL
1292 if (rt->rt6i_flags & RTF_PCPU ||
1293 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1294 return rt6_dst_from_check(rt, cookie);
1295 else
1296 return rt6_check(rt, cookie);
1da177e4
LT
1297}
1298
1299static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1300{
1301 struct rt6_info *rt = (struct rt6_info *) dst;
1302
1303 if (rt) {
54c1a859
YH
1304 if (rt->rt6i_flags & RTF_CACHE) {
1305 if (rt6_check_expired(rt)) {
1306 ip6_del_rt(rt);
1307 dst = NULL;
1308 }
1309 } else {
1da177e4 1310 dst_release(dst);
54c1a859
YH
1311 dst = NULL;
1312 }
1da177e4 1313 }
54c1a859 1314 return dst;
1da177e4
LT
1315}
1316
1317static void ip6_link_failure(struct sk_buff *skb)
1318{
1319 struct rt6_info *rt;
1320
3ffe533c 1321 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1322
adf30907 1323 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1324 if (rt) {
1eb4f758
HFS
1325 if (rt->rt6i_flags & RTF_CACHE) {
1326 dst_hold(&rt->dst);
8e3d5be7 1327 ip6_del_rt(rt);
1eb4f758 1328 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1329 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1330 }
1da177e4
LT
1331 }
1332}
1333
45e4fd26
MKL
1334static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1335{
1336 struct net *net = dev_net(rt->dst.dev);
1337
1338 rt->rt6i_flags |= RTF_MODIFIED;
1339 rt->rt6i_pmtu = mtu;
1340 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1341}
1342
0d3f6d29
MKL
1343static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1344{
1345 return !(rt->rt6i_flags & RTF_CACHE) &&
1346 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1347}
1348
45e4fd26
MKL
1349static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1350 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1351{
67ba4152 1352 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1353
45e4fd26
MKL
1354 if (rt6->rt6i_flags & RTF_LOCAL)
1355 return;
81aded24 1356
45e4fd26
MKL
1357 dst_confirm(dst);
1358 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1359 if (mtu >= dst_mtu(dst))
1360 return;
9d289715 1361
0d3f6d29 1362 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1363 rt6_do_update_pmtu(rt6, mtu);
1364 } else {
1365 const struct in6_addr *daddr, *saddr;
1366 struct rt6_info *nrt6;
1367
1368 if (iph) {
1369 daddr = &iph->daddr;
1370 saddr = &iph->saddr;
1371 } else if (sk) {
1372 daddr = &sk->sk_v6_daddr;
1373 saddr = &inet6_sk(sk)->saddr;
1374 } else {
1375 return;
1376 }
1377 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1378 if (nrt6) {
1379 rt6_do_update_pmtu(nrt6, mtu);
1380
1381 /* ip6_ins_rt(nrt6) will bump the
1382 * rt6->rt6i_node->fn_sernum
1383 * which will fail the next rt6_check() and
1384 * invalidate the sk->sk_dst_cache.
1385 */
1386 ip6_ins_rt(nrt6);
1387 }
1da177e4
LT
1388 }
1389}
1390
45e4fd26
MKL
1391static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1392 struct sk_buff *skb, u32 mtu)
1393{
1394 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1395}
1396
42ae66c8
DM
1397void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1398 int oif, u32 mark)
81aded24
DM
1399{
1400 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1401 struct dst_entry *dst;
1402 struct flowi6 fl6;
1403
1404 memset(&fl6, 0, sizeof(fl6));
1405 fl6.flowi6_oif = oif;
1b3c61dc 1406 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1407 fl6.daddr = iph->daddr;
1408 fl6.saddr = iph->saddr;
6502ca52 1409 fl6.flowlabel = ip6_flowinfo(iph);
81aded24
DM
1410
1411 dst = ip6_route_output(net, NULL, &fl6);
1412 if (!dst->error)
45e4fd26 1413 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1414 dst_release(dst);
1415}
1416EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1417
1418void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1419{
1420 ip6_update_pmtu(skb, sock_net(sk), mtu,
1421 sk->sk_bound_dev_if, sk->sk_mark);
1422}
1423EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1424
b55b76b2
DJ
1425/* Handle redirects */
1426struct ip6rd_flowi {
1427 struct flowi6 fl6;
1428 struct in6_addr gateway;
1429};
1430
1431static struct rt6_info *__ip6_route_redirect(struct net *net,
1432 struct fib6_table *table,
1433 struct flowi6 *fl6,
1434 int flags)
1435{
1436 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1437 struct rt6_info *rt;
1438 struct fib6_node *fn;
1439
1440 /* Get the "current" route for this destination and
1441 * check if the redirect has come from approriate router.
1442 *
1443 * RFC 4861 specifies that redirects should only be
1444 * accepted if they come from the nexthop to the target.
1445 * Due to the way the routes are chosen, this notion
1446 * is a bit fuzzy and one might need to check all possible
1447 * routes.
1448 */
1449
1450 read_lock_bh(&table->tb6_lock);
1451 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1452restart:
1453 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1454 if (rt6_check_expired(rt))
1455 continue;
1456 if (rt->dst.error)
1457 break;
1458 if (!(rt->rt6i_flags & RTF_GATEWAY))
1459 continue;
1460 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1461 continue;
1462 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1463 continue;
1464 break;
1465 }
1466
1467 if (!rt)
1468 rt = net->ipv6.ip6_null_entry;
1469 else if (rt->dst.error) {
1470 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1471 goto out;
1472 }
1473
1474 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1475 fn = fib6_backtrack(fn, &fl6->saddr);
1476 if (fn)
1477 goto restart;
b55b76b2 1478 }
a3c00e46 1479
b0a1ba59 1480out:
b55b76b2
DJ
1481 dst_hold(&rt->dst);
1482
1483 read_unlock_bh(&table->tb6_lock);
1484
b811580d 1485 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1486 return rt;
1487};
1488
1489static struct dst_entry *ip6_route_redirect(struct net *net,
1490 const struct flowi6 *fl6,
1491 const struct in6_addr *gateway)
1492{
1493 int flags = RT6_LOOKUP_F_HAS_SADDR;
1494 struct ip6rd_flowi rdfl;
1495
1496 rdfl.fl6 = *fl6;
1497 rdfl.gateway = *gateway;
1498
1499 return fib6_rule_lookup(net, &rdfl.fl6,
1500 flags, __ip6_route_redirect);
1501}
1502
3a5ad2ee
DM
1503void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1504{
1505 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1506 struct dst_entry *dst;
1507 struct flowi6 fl6;
1508
1509 memset(&fl6, 0, sizeof(fl6));
e374c618 1510 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1511 fl6.flowi6_oif = oif;
1512 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1513 fl6.daddr = iph->daddr;
1514 fl6.saddr = iph->saddr;
6502ca52 1515 fl6.flowlabel = ip6_flowinfo(iph);
3a5ad2ee 1516
b55b76b2
DJ
1517 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1518 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1519 dst_release(dst);
1520}
1521EXPORT_SYMBOL_GPL(ip6_redirect);
1522
c92a59ec
DJ
1523void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1524 u32 mark)
1525{
1526 const struct ipv6hdr *iph = ipv6_hdr(skb);
1527 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1528 struct dst_entry *dst;
1529 struct flowi6 fl6;
1530
1531 memset(&fl6, 0, sizeof(fl6));
e374c618 1532 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1533 fl6.flowi6_oif = oif;
1534 fl6.flowi6_mark = mark;
c92a59ec
DJ
1535 fl6.daddr = msg->dest;
1536 fl6.saddr = iph->daddr;
1537
b55b76b2
DJ
1538 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1539 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1540 dst_release(dst);
1541}
1542
3a5ad2ee
DM
1543void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1544{
1545 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1546}
1547EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1548
0dbaee3b 1549static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1550{
0dbaee3b
DM
1551 struct net_device *dev = dst->dev;
1552 unsigned int mtu = dst_mtu(dst);
1553 struct net *net = dev_net(dev);
1554
1da177e4
LT
1555 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1556
5578689a
DL
1557 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1558 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1559
1560 /*
1ab1457c
YH
1561 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1562 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1563 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1564 * rely only on pmtu discovery"
1565 */
1566 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1567 mtu = IPV6_MAXPLEN;
1568 return mtu;
1569}
1570
ebb762f2 1571static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1572{
4b32b5ad
MKL
1573 const struct rt6_info *rt = (const struct rt6_info *)dst;
1574 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1575 struct inet6_dev *idev;
618f9bc7 1576
4b32b5ad
MKL
1577 if (mtu)
1578 goto out;
1579
1580 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1581 if (mtu)
30f78d8e 1582 goto out;
618f9bc7
SK
1583
1584 mtu = IPV6_MIN_MTU;
d33e4553
DM
1585
1586 rcu_read_lock();
1587 idev = __in6_dev_get(dst->dev);
1588 if (idev)
1589 mtu = idev->cnf.mtu6;
1590 rcu_read_unlock();
1591
30f78d8e
ED
1592out:
1593 return min_t(unsigned int, mtu, IP6_MAX_MTU);
d33e4553
DM
1594}
1595
3b00944c
YH
1596static struct dst_entry *icmp6_dst_gc_list;
1597static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1598
3b00944c 1599struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1600 struct flowi6 *fl6)
1da177e4 1601{
87a11578 1602 struct dst_entry *dst;
1da177e4
LT
1603 struct rt6_info *rt;
1604 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1605 struct net *net = dev_net(dev);
1da177e4 1606
38308473 1607 if (unlikely(!idev))
122bdf67 1608 return ERR_PTR(-ENODEV);
1da177e4 1609
ad706862 1610 rt = ip6_dst_alloc(net, dev, 0);
38308473 1611 if (unlikely(!rt)) {
1da177e4 1612 in6_dev_put(idev);
87a11578 1613 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1614 goto out;
1615 }
1616
8e2ec639
YZ
1617 rt->dst.flags |= DST_HOST;
1618 rt->dst.output = ip6_output;
d8d1f30b 1619 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1620 rt->rt6i_gateway = fl6->daddr;
87a11578 1621 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1622 rt->rt6i_dst.plen = 128;
1623 rt->rt6i_idev = idev;
14edd87d 1624 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1625
3b00944c 1626 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1627 rt->dst.next = icmp6_dst_gc_list;
1628 icmp6_dst_gc_list = &rt->dst;
3b00944c 1629 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1630
5578689a 1631 fib6_force_start_gc(net);
1da177e4 1632
87a11578
DM
1633 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1634
1da177e4 1635out:
87a11578 1636 return dst;
1da177e4
LT
1637}
1638
3d0f24a7 1639int icmp6_dst_gc(void)
1da177e4 1640{
e9476e95 1641 struct dst_entry *dst, **pprev;
3d0f24a7 1642 int more = 0;
1da177e4 1643
3b00944c
YH
1644 spin_lock_bh(&icmp6_dst_lock);
1645 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1646
1da177e4
LT
1647 while ((dst = *pprev) != NULL) {
1648 if (!atomic_read(&dst->__refcnt)) {
1649 *pprev = dst->next;
1650 dst_free(dst);
1da177e4
LT
1651 } else {
1652 pprev = &dst->next;
3d0f24a7 1653 ++more;
1da177e4
LT
1654 }
1655 }
1656
3b00944c 1657 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1658
3d0f24a7 1659 return more;
1da177e4
LT
1660}
1661
1e493d19
DM
1662static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1663 void *arg)
1664{
1665 struct dst_entry *dst, **pprev;
1666
1667 spin_lock_bh(&icmp6_dst_lock);
1668 pprev = &icmp6_dst_gc_list;
1669 while ((dst = *pprev) != NULL) {
1670 struct rt6_info *rt = (struct rt6_info *) dst;
1671 if (func(rt, arg)) {
1672 *pprev = dst->next;
1673 dst_free(dst);
1674 } else {
1675 pprev = &dst->next;
1676 }
1677 }
1678 spin_unlock_bh(&icmp6_dst_lock);
1679}
1680
569d3645 1681static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1682{
86393e52 1683 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1684 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1685 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1686 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1687 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1688 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1689 int entries;
7019b78e 1690
fc66f95c 1691 entries = dst_entries_get_fast(ops);
49a18d86 1692 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1693 entries <= rt_max_size)
1da177e4
LT
1694 goto out;
1695
6891a346 1696 net->ipv6.ip6_rt_gc_expire++;
14956643 1697 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1698 entries = dst_entries_get_slow(ops);
1699 if (entries < ops->gc_thresh)
7019b78e 1700 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1701out:
7019b78e 1702 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1703 return entries > rt_max_size;
1da177e4
LT
1704}
1705
e715b6d3
FW
1706static int ip6_convert_metrics(struct mx6_config *mxc,
1707 const struct fib6_config *cfg)
1708{
c3a8d947 1709 bool ecn_ca = false;
e715b6d3
FW
1710 struct nlattr *nla;
1711 int remaining;
1712 u32 *mp;
1713
63159f29 1714 if (!cfg->fc_mx)
e715b6d3
FW
1715 return 0;
1716
1717 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1718 if (unlikely(!mp))
1719 return -ENOMEM;
1720
1721 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1722 int type = nla_type(nla);
1bb14807 1723 u32 val;
e715b6d3 1724
1bb14807
DB
1725 if (!type)
1726 continue;
1727 if (unlikely(type > RTAX_MAX))
1728 goto err;
ea697639 1729
1bb14807
DB
1730 if (type == RTAX_CC_ALGO) {
1731 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1732
1bb14807 1733 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1734 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1735 if (val == TCP_CA_UNSPEC)
1736 goto err;
1737 } else {
1738 val = nla_get_u32(nla);
e715b6d3 1739 }
b8d3e416
DB
1740 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1741 goto err;
1bb14807
DB
1742
1743 mp[type - 1] = val;
1744 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1745 }
1746
c3a8d947
DB
1747 if (ecn_ca) {
1748 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1749 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1750 }
e715b6d3 1751
c3a8d947 1752 mxc->mx = mp;
e715b6d3
FW
1753 return 0;
1754 err:
1755 kfree(mp);
1756 return -EINVAL;
1757}
1da177e4 1758
8c5b83f0 1759static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1760{
5578689a 1761 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1762 struct rt6_info *rt = NULL;
1763 struct net_device *dev = NULL;
1764 struct inet6_dev *idev = NULL;
c71099ac 1765 struct fib6_table *table;
1da177e4 1766 int addr_type;
8c5b83f0 1767 int err = -EINVAL;
1da177e4 1768
86872cb5 1769 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1770 goto out;
1da177e4 1771#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1772 if (cfg->fc_src_len)
8c5b83f0 1773 goto out;
1da177e4 1774#endif
86872cb5 1775 if (cfg->fc_ifindex) {
1da177e4 1776 err = -ENODEV;
5578689a 1777 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1778 if (!dev)
1779 goto out;
1780 idev = in6_dev_get(dev);
1781 if (!idev)
1782 goto out;
1783 }
1784
86872cb5
TG
1785 if (cfg->fc_metric == 0)
1786 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1787
d71314b4 1788 err = -ENOBUFS;
38308473
DM
1789 if (cfg->fc_nlinfo.nlh &&
1790 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1791 table = fib6_get_table(net, cfg->fc_table);
38308473 1792 if (!table) {
f3213831 1793 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1794 table = fib6_new_table(net, cfg->fc_table);
1795 }
1796 } else {
1797 table = fib6_new_table(net, cfg->fc_table);
1798 }
38308473
DM
1799
1800 if (!table)
c71099ac 1801 goto out;
c71099ac 1802
ad706862
MKL
1803 rt = ip6_dst_alloc(net, NULL,
1804 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1805
38308473 1806 if (!rt) {
1da177e4
LT
1807 err = -ENOMEM;
1808 goto out;
1809 }
1810
1716a961
G
1811 if (cfg->fc_flags & RTF_EXPIRES)
1812 rt6_set_expires(rt, jiffies +
1813 clock_t_to_jiffies(cfg->fc_expires));
1814 else
1815 rt6_clean_expires(rt);
1da177e4 1816
86872cb5
TG
1817 if (cfg->fc_protocol == RTPROT_UNSPEC)
1818 cfg->fc_protocol = RTPROT_BOOT;
1819 rt->rt6i_protocol = cfg->fc_protocol;
1820
1821 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1822
1823 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1824 rt->dst.input = ip6_mc_input;
ab79ad14
1825 else if (cfg->fc_flags & RTF_LOCAL)
1826 rt->dst.input = ip6_input;
1da177e4 1827 else
d8d1f30b 1828 rt->dst.input = ip6_forward;
1da177e4 1829
d8d1f30b 1830 rt->dst.output = ip6_output;
1da177e4 1831
19e42e45
RP
1832 if (cfg->fc_encap) {
1833 struct lwtunnel_state *lwtstate;
1834
1835 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1836 cfg->fc_encap, AF_INET6, cfg,
1837 &lwtstate);
19e42e45
RP
1838 if (err)
1839 goto out;
61adedf3
JB
1840 rt->dst.lwtstate = lwtstate_get(lwtstate);
1841 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1842 rt->dst.lwtstate->orig_output = rt->dst.output;
1843 rt->dst.output = lwtunnel_output;
25368623 1844 }
61adedf3
JB
1845 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1846 rt->dst.lwtstate->orig_input = rt->dst.input;
1847 rt->dst.input = lwtunnel_input;
25368623 1848 }
19e42e45
RP
1849 }
1850
86872cb5
TG
1851 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1852 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1853 if (rt->rt6i_dst.plen == 128)
e5fd387a 1854 rt->dst.flags |= DST_HOST;
e5fd387a 1855
1da177e4 1856#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1857 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1858 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1859#endif
1860
86872cb5 1861 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1862
1863 /* We cannot add true routes via loopback here,
1864 they would result in kernel looping; promote them to reject routes
1865 */
86872cb5 1866 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1867 (dev && (dev->flags & IFF_LOOPBACK) &&
1868 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1869 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1870 /* hold loopback dev/idev if we haven't done so. */
5578689a 1871 if (dev != net->loopback_dev) {
1da177e4
LT
1872 if (dev) {
1873 dev_put(dev);
1874 in6_dev_put(idev);
1875 }
5578689a 1876 dev = net->loopback_dev;
1da177e4
LT
1877 dev_hold(dev);
1878 idev = in6_dev_get(dev);
1879 if (!idev) {
1880 err = -ENODEV;
1881 goto out;
1882 }
1883 }
1da177e4 1884 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1885 switch (cfg->fc_type) {
1886 case RTN_BLACKHOLE:
1887 rt->dst.error = -EINVAL;
ede2059d 1888 rt->dst.output = dst_discard_out;
7150aede 1889 rt->dst.input = dst_discard;
ef2c7d7b
ND
1890 break;
1891 case RTN_PROHIBIT:
1892 rt->dst.error = -EACCES;
7150aede
K
1893 rt->dst.output = ip6_pkt_prohibit_out;
1894 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1895 break;
b4949ab2 1896 case RTN_THROW:
0315e382 1897 case RTN_UNREACHABLE:
ef2c7d7b 1898 default:
7150aede 1899 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1900 : (cfg->fc_type == RTN_UNREACHABLE)
1901 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1902 rt->dst.output = ip6_pkt_discard_out;
1903 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1904 break;
1905 }
1da177e4
LT
1906 goto install_route;
1907 }
1908
86872cb5 1909 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1910 const struct in6_addr *gw_addr;
1da177e4
LT
1911 int gwa_type;
1912
86872cb5 1913 gw_addr = &cfg->fc_gateway;
330567b7 1914 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1915
1916 /* if gw_addr is local we will fail to detect this in case
1917 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1918 * will return already-added prefix route via interface that
1919 * prefix route was assigned to, which might be non-loopback.
1920 */
1921 err = -EINVAL;
330567b7
FW
1922 if (ipv6_chk_addr_and_flags(net, gw_addr,
1923 gwa_type & IPV6_ADDR_LINKLOCAL ?
1924 dev : NULL, 0, 0))
48ed7b26
FW
1925 goto out;
1926
4e3fd7a0 1927 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1928
1929 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1930 struct rt6_info *grt;
1931
1932 /* IPv6 strictly inhibits using not link-local
1933 addresses as nexthop address.
1934 Otherwise, router will not able to send redirects.
1935 It is very good, but in some (rare!) circumstances
1936 (SIT, PtP, NBMA NOARP links) it is handy to allow
1937 some exceptions. --ANK
1938 */
38308473 1939 if (!(gwa_type & IPV6_ADDR_UNICAST))
1da177e4
LT
1940 goto out;
1941
5578689a 1942 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1da177e4
LT
1943
1944 err = -EHOSTUNREACH;
38308473 1945 if (!grt)
1da177e4
LT
1946 goto out;
1947 if (dev) {
d1918542 1948 if (dev != grt->dst.dev) {
94e187c0 1949 ip6_rt_put(grt);
1da177e4
LT
1950 goto out;
1951 }
1952 } else {
d1918542 1953 dev = grt->dst.dev;
1da177e4
LT
1954 idev = grt->rt6i_idev;
1955 dev_hold(dev);
1956 in6_dev_hold(grt->rt6i_idev);
1957 }
38308473 1958 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 1959 err = 0;
94e187c0 1960 ip6_rt_put(grt);
1da177e4
LT
1961
1962 if (err)
1963 goto out;
1964 }
1965 err = -EINVAL;
38308473 1966 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
1967 goto out;
1968 }
1969
1970 err = -ENODEV;
38308473 1971 if (!dev)
1da177e4
LT
1972 goto out;
1973
c3968a85
DW
1974 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1975 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1976 err = -EINVAL;
1977 goto out;
1978 }
4e3fd7a0 1979 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
1980 rt->rt6i_prefsrc.plen = 128;
1981 } else
1982 rt->rt6i_prefsrc.plen = 0;
1983
86872cb5 1984 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
1985
1986install_route:
d8d1f30b 1987 rt->dst.dev = dev;
1da177e4 1988 rt->rt6i_idev = idev;
c71099ac 1989 rt->rt6i_table = table;
63152fc0 1990
c346dca1 1991 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 1992
8c5b83f0 1993 return rt;
6b9ea5a6
RP
1994out:
1995 if (dev)
1996 dev_put(dev);
1997 if (idev)
1998 in6_dev_put(idev);
1999 if (rt)
2000 dst_free(&rt->dst);
2001
8c5b83f0 2002 return ERR_PTR(err);
6b9ea5a6
RP
2003}
2004
2005int ip6_route_add(struct fib6_config *cfg)
2006{
2007 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2008 struct rt6_info *rt;
6b9ea5a6
RP
2009 int err;
2010
8c5b83f0
RP
2011 rt = ip6_route_info_create(cfg);
2012 if (IS_ERR(rt)) {
2013 err = PTR_ERR(rt);
2014 rt = NULL;
6b9ea5a6 2015 goto out;
8c5b83f0 2016 }
6b9ea5a6 2017
e715b6d3
FW
2018 err = ip6_convert_metrics(&mxc, cfg);
2019 if (err)
2020 goto out;
1da177e4 2021
e715b6d3
FW
2022 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2023
2024 kfree(mxc.mx);
6b9ea5a6 2025
e715b6d3 2026 return err;
1da177e4 2027out:
1da177e4 2028 if (rt)
d8d1f30b 2029 dst_free(&rt->dst);
6b9ea5a6 2030
1da177e4
LT
2031 return err;
2032}
2033
86872cb5 2034static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2035{
2036 int err;
c71099ac 2037 struct fib6_table *table;
d1918542 2038 struct net *net = dev_net(rt->dst.dev);
1da177e4 2039
8e3d5be7
MKL
2040 if (rt == net->ipv6.ip6_null_entry ||
2041 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2042 err = -ENOENT;
2043 goto out;
2044 }
6c813a72 2045
c71099ac
TG
2046 table = rt->rt6i_table;
2047 write_lock_bh(&table->tb6_lock);
86872cb5 2048 err = fib6_del(rt, info);
c71099ac 2049 write_unlock_bh(&table->tb6_lock);
1da177e4 2050
6825a26c 2051out:
94e187c0 2052 ip6_rt_put(rt);
1da177e4
LT
2053 return err;
2054}
2055
e0a1ad73
TG
2056int ip6_del_rt(struct rt6_info *rt)
2057{
4d1169c1 2058 struct nl_info info = {
d1918542 2059 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2060 };
528c4ceb 2061 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2062}
2063
86872cb5 2064static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2065{
c71099ac 2066 struct fib6_table *table;
1da177e4
LT
2067 struct fib6_node *fn;
2068 struct rt6_info *rt;
2069 int err = -ESRCH;
2070
5578689a 2071 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2072 if (!table)
c71099ac
TG
2073 return err;
2074
2075 read_lock_bh(&table->tb6_lock);
1da177e4 2076
c71099ac 2077 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2078 &cfg->fc_dst, cfg->fc_dst_len,
2079 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2080
1da177e4 2081 if (fn) {
d8d1f30b 2082 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2083 if ((rt->rt6i_flags & RTF_CACHE) &&
2084 !(cfg->fc_flags & RTF_CACHE))
2085 continue;
86872cb5 2086 if (cfg->fc_ifindex &&
d1918542
DM
2087 (!rt->dst.dev ||
2088 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2089 continue;
86872cb5
TG
2090 if (cfg->fc_flags & RTF_GATEWAY &&
2091 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2092 continue;
86872cb5 2093 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2094 continue;
d8d1f30b 2095 dst_hold(&rt->dst);
c71099ac 2096 read_unlock_bh(&table->tb6_lock);
1da177e4 2097
86872cb5 2098 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2099 }
2100 }
c71099ac 2101 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2102
2103 return err;
2104}
2105
6700c270 2106static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2107{
a6279458 2108 struct netevent_redirect netevent;
e8599ff4 2109 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2110 struct ndisc_options ndopts;
2111 struct inet6_dev *in6_dev;
2112 struct neighbour *neigh;
71bcdba0 2113 struct rd_msg *msg;
6e157b6a
DM
2114 int optlen, on_link;
2115 u8 *lladdr;
e8599ff4 2116
29a3cad5 2117 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2118 optlen -= sizeof(*msg);
e8599ff4
DM
2119
2120 if (optlen < 0) {
6e157b6a 2121 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2122 return;
2123 }
2124
71bcdba0 2125 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2126
71bcdba0 2127 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2128 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2129 return;
2130 }
2131
6e157b6a 2132 on_link = 0;
71bcdba0 2133 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2134 on_link = 1;
71bcdba0 2135 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2136 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2137 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2138 return;
2139 }
2140
2141 in6_dev = __in6_dev_get(skb->dev);
2142 if (!in6_dev)
2143 return;
2144 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2145 return;
2146
2147 /* RFC2461 8.1:
2148 * The IP source address of the Redirect MUST be the same as the current
2149 * first-hop router for the specified ICMP Destination Address.
2150 */
2151
71bcdba0 2152 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2153 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2154 return;
2155 }
6e157b6a
DM
2156
2157 lladdr = NULL;
e8599ff4
DM
2158 if (ndopts.nd_opts_tgt_lladdr) {
2159 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2160 skb->dev);
2161 if (!lladdr) {
2162 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2163 return;
2164 }
2165 }
2166
6e157b6a 2167 rt = (struct rt6_info *) dst;
ec13ad1d 2168 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2169 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2170 return;
6e157b6a 2171 }
e8599ff4 2172
6e157b6a
DM
2173 /* Redirect received -> path was valid.
2174 * Look, redirects are sent only in response to data packets,
2175 * so that this nexthop apparently is reachable. --ANK
2176 */
2177 dst_confirm(&rt->dst);
a6279458 2178
71bcdba0 2179 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2180 if (!neigh)
2181 return;
a6279458 2182
1da177e4
LT
2183 /*
2184 * We have finally decided to accept it.
2185 */
2186
1ab1457c 2187 neigh_update(neigh, lladdr, NUD_STALE,
1da177e4
LT
2188 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2189 NEIGH_UPDATE_F_OVERRIDE|
2190 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2191 NEIGH_UPDATE_F_ISROUTER))
2192 );
2193
83a09abd 2194 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2195 if (!nrt)
1da177e4
LT
2196 goto out;
2197
2198 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2199 if (on_link)
2200 nrt->rt6i_flags &= ~RTF_GATEWAY;
2201
4e3fd7a0 2202 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2203
40e22e8f 2204 if (ip6_ins_rt(nrt))
1da177e4
LT
2205 goto out;
2206
d8d1f30b
CG
2207 netevent.old = &rt->dst;
2208 netevent.new = &nrt->dst;
71bcdba0 2209 netevent.daddr = &msg->dest;
60592833 2210 netevent.neigh = neigh;
8d71740c
TT
2211 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2212
38308473 2213 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2214 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2215 ip6_del_rt(rt);
1da177e4
LT
2216 }
2217
2218out:
e8599ff4 2219 neigh_release(neigh);
6e157b6a
DM
2220}
2221
1da177e4
LT
2222/*
2223 * Misc support functions
2224 */
2225
4b32b5ad
MKL
2226static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2227{
2228 BUG_ON(from->dst.from);
2229
2230 rt->rt6i_flags &= ~RTF_EXPIRES;
2231 dst_hold(&from->dst);
2232 rt->dst.from = &from->dst;
2233 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2234}
2235
83a09abd
MKL
2236static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2237{
2238 rt->dst.input = ort->dst.input;
2239 rt->dst.output = ort->dst.output;
2240 rt->rt6i_dst = ort->rt6i_dst;
2241 rt->dst.error = ort->dst.error;
2242 rt->rt6i_idev = ort->rt6i_idev;
2243 if (rt->rt6i_idev)
2244 in6_dev_hold(rt->rt6i_idev);
2245 rt->dst.lastuse = jiffies;
2246 rt->rt6i_gateway = ort->rt6i_gateway;
2247 rt->rt6i_flags = ort->rt6i_flags;
2248 rt6_set_from(rt, ort);
2249 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2250#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2251 rt->rt6i_src = ort->rt6i_src;
1da177e4 2252#endif
83a09abd
MKL
2253 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2254 rt->rt6i_table = ort->rt6i_table;
61adedf3 2255 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2256}
2257
70ceb4f5 2258#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2259static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42
ED
2260 const struct in6_addr *prefix, int prefixlen,
2261 const struct in6_addr *gwaddr, int ifindex)
70ceb4f5
YH
2262{
2263 struct fib6_node *fn;
2264 struct rt6_info *rt = NULL;
c71099ac
TG
2265 struct fib6_table *table;
2266
efa2cea0 2267 table = fib6_get_table(net, RT6_TABLE_INFO);
38308473 2268 if (!table)
c71099ac 2269 return NULL;
70ceb4f5 2270
5744dd9b 2271 read_lock_bh(&table->tb6_lock);
67ba4152 2272 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2273 if (!fn)
2274 goto out;
2275
d8d1f30b 2276 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2277 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2278 continue;
2279 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2280 continue;
2281 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2282 continue;
d8d1f30b 2283 dst_hold(&rt->dst);
70ceb4f5
YH
2284 break;
2285 }
2286out:
5744dd9b 2287 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2288 return rt;
2289}
2290
efa2cea0 2291static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42
ED
2292 const struct in6_addr *prefix, int prefixlen,
2293 const struct in6_addr *gwaddr, int ifindex,
95c96174 2294 unsigned int pref)
70ceb4f5 2295{
86872cb5 2296 struct fib6_config cfg = {
238fc7ea 2297 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2298 .fc_ifindex = ifindex,
2299 .fc_dst_len = prefixlen,
2300 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2301 RTF_UP | RTF_PREF(pref),
15e47304 2302 .fc_nlinfo.portid = 0,
efa2cea0
DL
2303 .fc_nlinfo.nlh = NULL,
2304 .fc_nlinfo.nl_net = net,
86872cb5
TG
2305 };
2306
ca254490 2307 cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
4e3fd7a0
AD
2308 cfg.fc_dst = *prefix;
2309 cfg.fc_gateway = *gwaddr;
70ceb4f5 2310
e317da96
YH
2311 /* We should treat it as a default route if prefix length is 0. */
2312 if (!prefixlen)
86872cb5 2313 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2314
86872cb5 2315 ip6_route_add(&cfg);
70ceb4f5 2316
efa2cea0 2317 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
70ceb4f5
YH
2318}
2319#endif
2320
b71d1d42 2321struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2322{
1da177e4 2323 struct rt6_info *rt;
c71099ac 2324 struct fib6_table *table;
1da177e4 2325
c346dca1 2326 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
38308473 2327 if (!table)
c71099ac 2328 return NULL;
1da177e4 2329
5744dd9b 2330 read_lock_bh(&table->tb6_lock);
67ba4152 2331 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2332 if (dev == rt->dst.dev &&
045927ff 2333 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2334 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2335 break;
2336 }
2337 if (rt)
d8d1f30b 2338 dst_hold(&rt->dst);
5744dd9b 2339 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2340 return rt;
2341}
2342
b71d1d42 2343struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2344 struct net_device *dev,
2345 unsigned int pref)
1da177e4 2346{
86872cb5 2347 struct fib6_config cfg = {
ca254490 2348 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2349 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2350 .fc_ifindex = dev->ifindex,
2351 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2352 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2353 .fc_nlinfo.portid = 0,
5578689a 2354 .fc_nlinfo.nlh = NULL,
c346dca1 2355 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2356 };
1da177e4 2357
4e3fd7a0 2358 cfg.fc_gateway = *gwaddr;
1da177e4 2359
86872cb5 2360 ip6_route_add(&cfg);
1da177e4 2361
1da177e4
LT
2362 return rt6_get_dflt_router(gwaddr, dev);
2363}
2364
7b4da532 2365void rt6_purge_dflt_routers(struct net *net)
1da177e4
LT
2366{
2367 struct rt6_info *rt;
c71099ac
TG
2368 struct fib6_table *table;
2369
2370 /* NOTE: Keep consistent with rt6_get_dflt_router */
7b4da532 2371 table = fib6_get_table(net, RT6_TABLE_DFLT);
38308473 2372 if (!table)
c71099ac 2373 return;
1da177e4
LT
2374
2375restart:
c71099ac 2376 read_lock_bh(&table->tb6_lock);
d8d1f30b 2377 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2378 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2379 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2380 dst_hold(&rt->dst);
c71099ac 2381 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2382 ip6_del_rt(rt);
1da177e4
LT
2383 goto restart;
2384 }
2385 }
c71099ac 2386 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2387}
2388
5578689a
DL
2389static void rtmsg_to_fib6_config(struct net *net,
2390 struct in6_rtmsg *rtmsg,
86872cb5
TG
2391 struct fib6_config *cfg)
2392{
2393 memset(cfg, 0, sizeof(*cfg));
2394
ca254490
DA
2395 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2396 : RT6_TABLE_MAIN;
86872cb5
TG
2397 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2398 cfg->fc_metric = rtmsg->rtmsg_metric;
2399 cfg->fc_expires = rtmsg->rtmsg_info;
2400 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2401 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2402 cfg->fc_flags = rtmsg->rtmsg_flags;
2403
5578689a 2404 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2405
4e3fd7a0
AD
2406 cfg->fc_dst = rtmsg->rtmsg_dst;
2407 cfg->fc_src = rtmsg->rtmsg_src;
2408 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2409}
2410
5578689a 2411int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2412{
86872cb5 2413 struct fib6_config cfg;
1da177e4
LT
2414 struct in6_rtmsg rtmsg;
2415 int err;
2416
67ba4152 2417 switch (cmd) {
1da177e4
LT
2418 case SIOCADDRT: /* Add a route */
2419 case SIOCDELRT: /* Delete a route */
af31f412 2420 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2421 return -EPERM;
2422 err = copy_from_user(&rtmsg, arg,
2423 sizeof(struct in6_rtmsg));
2424 if (err)
2425 return -EFAULT;
86872cb5 2426
5578689a 2427 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2428
1da177e4
LT
2429 rtnl_lock();
2430 switch (cmd) {
2431 case SIOCADDRT:
86872cb5 2432 err = ip6_route_add(&cfg);
1da177e4
LT
2433 break;
2434 case SIOCDELRT:
86872cb5 2435 err = ip6_route_del(&cfg);
1da177e4
LT
2436 break;
2437 default:
2438 err = -EINVAL;
2439 }
2440 rtnl_unlock();
2441
2442 return err;
3ff50b79 2443 }
1da177e4
LT
2444
2445 return -EINVAL;
2446}
2447
2448/*
2449 * Drop the packet on the floor
2450 */
2451
d5fdd6ba 2452static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2453{
612f09e8 2454 int type;
adf30907 2455 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2456 switch (ipstats_mib_noroutes) {
2457 case IPSTATS_MIB_INNOROUTES:
0660e03f 2458 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2459 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2460 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2461 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2462 break;
2463 }
2464 /* FALLTHROUGH */
2465 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2466 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2467 ipstats_mib_noroutes);
612f09e8
YH
2468 break;
2469 }
3ffe533c 2470 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2471 kfree_skb(skb);
2472 return 0;
2473}
2474
9ce8ade0
TG
2475static int ip6_pkt_discard(struct sk_buff *skb)
2476{
612f09e8 2477 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2478}
2479
ede2059d 2480static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2481{
adf30907 2482 skb->dev = skb_dst(skb)->dev;
612f09e8 2483 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2484}
2485
9ce8ade0
TG
2486static int ip6_pkt_prohibit(struct sk_buff *skb)
2487{
612f09e8 2488 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2489}
2490
ede2059d 2491static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2492{
adf30907 2493 skb->dev = skb_dst(skb)->dev;
612f09e8 2494 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2495}
2496
1da177e4
LT
2497/*
2498 * Allocate a dst for local (unicast / anycast) address.
2499 */
2500
2501struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2502 const struct in6_addr *addr,
8f031519 2503 bool anycast)
1da177e4 2504{
ca254490 2505 u32 tb_id;
c346dca1 2506 struct net *net = dev_net(idev->dev);
a3300ef4 2507 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
ad706862 2508 DST_NOCOUNT);
a3300ef4 2509 if (!rt)
1da177e4
LT
2510 return ERR_PTR(-ENOMEM);
2511
1da177e4
LT
2512 in6_dev_hold(idev);
2513
11d53b49 2514 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2515 rt->dst.input = ip6_input;
2516 rt->dst.output = ip6_output;
1da177e4 2517 rt->rt6i_idev = idev;
1da177e4
LT
2518
2519 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2520 if (anycast)
2521 rt->rt6i_flags |= RTF_ANYCAST;
2522 else
1da177e4 2523 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2524
550bab42 2525 rt->rt6i_gateway = *addr;
4e3fd7a0 2526 rt->rt6i_dst.addr = *addr;
1da177e4 2527 rt->rt6i_dst.plen = 128;
ca254490
DA
2528 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2529 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2530 rt->dst.flags |= DST_NOCACHE;
1da177e4 2531
d8d1f30b 2532 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2533
2534 return rt;
2535}
2536
c3968a85
DW
2537int ip6_route_get_saddr(struct net *net,
2538 struct rt6_info *rt,
b71d1d42 2539 const struct in6_addr *daddr,
c3968a85
DW
2540 unsigned int prefs,
2541 struct in6_addr *saddr)
2542{
e16e888b
MS
2543 struct inet6_dev *idev =
2544 rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL;
c3968a85 2545 int err = 0;
e16e888b 2546 if (rt && rt->rt6i_prefsrc.plen)
4e3fd7a0 2547 *saddr = rt->rt6i_prefsrc.addr;
c3968a85
DW
2548 else
2549 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2550 daddr, prefs, saddr);
2551 return err;
2552}
2553
2554/* remove deleted ip from prefsrc entries */
2555struct arg_dev_net_ip {
2556 struct net_device *dev;
2557 struct net *net;
2558 struct in6_addr *addr;
2559};
2560
2561static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2562{
2563 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2564 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2565 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2566
d1918542 2567 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2568 rt != net->ipv6.ip6_null_entry &&
2569 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2570 /* remove prefsrc entry */
2571 rt->rt6i_prefsrc.plen = 0;
2572 }
2573 return 0;
2574}
2575
2576void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2577{
2578 struct net *net = dev_net(ifp->idev->dev);
2579 struct arg_dev_net_ip adni = {
2580 .dev = ifp->idev->dev,
2581 .net = net,
2582 .addr = &ifp->addr,
2583 };
0c3584d5 2584 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2585}
2586
be7a010d
DJ
2587#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2588#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2589
2590/* Remove routers and update dst entries when gateway turn into host. */
2591static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2592{
2593 struct in6_addr *gateway = (struct in6_addr *)arg;
2594
2595 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2596 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2597 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2598 return -1;
2599 }
2600 return 0;
2601}
2602
2603void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2604{
2605 fib6_clean_all(net, fib6_clean_tohost, gateway);
2606}
2607
8ed67789
DL
2608struct arg_dev_net {
2609 struct net_device *dev;
2610 struct net *net;
2611};
2612
1da177e4
LT
2613static int fib6_ifdown(struct rt6_info *rt, void *arg)
2614{
bc3ef660 2615 const struct arg_dev_net *adn = arg;
2616 const struct net_device *dev = adn->dev;
8ed67789 2617
d1918542 2618 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2619 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2620 return -1;
c159d30c 2621
1da177e4
LT
2622 return 0;
2623}
2624
f3db4851 2625void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2626{
8ed67789
DL
2627 struct arg_dev_net adn = {
2628 .dev = dev,
2629 .net = net,
2630 };
2631
0c3584d5 2632 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2633 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2634 if (dev)
2635 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2636}
2637
95c96174 2638struct rt6_mtu_change_arg {
1da177e4 2639 struct net_device *dev;
95c96174 2640 unsigned int mtu;
1da177e4
LT
2641};
2642
2643static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2644{
2645 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2646 struct inet6_dev *idev;
2647
2648 /* In IPv6 pmtu discovery is not optional,
2649 so that RTAX_MTU lock cannot disable it.
2650 We still use this lock to block changes
2651 caused by addrconf/ndisc.
2652 */
2653
2654 idev = __in6_dev_get(arg->dev);
38308473 2655 if (!idev)
1da177e4
LT
2656 return 0;
2657
2658 /* For administrative MTU increase, there is no way to discover
2659 IPv6 PMTU increase, so PMTU increase should be updated here.
2660 Since RFC 1981 doesn't include administrative MTU increase
2661 update PMTU increase is a MUST. (i.e. jumbo frame)
2662 */
2663 /*
2664 If new MTU is less than route PMTU, this new MTU will be the
2665 lowest MTU in the path, update the route PMTU to reflect PMTU
2666 decreases; if new MTU is greater than route PMTU, and the
2667 old MTU is the lowest MTU in the path, update the route PMTU
2668 to reflect the increase. In this case if the other nodes' MTU
2669 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2670 PMTU discouvery.
2671 */
d1918542 2672 if (rt->dst.dev == arg->dev &&
4b32b5ad
MKL
2673 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2674 if (rt->rt6i_flags & RTF_CACHE) {
2675 /* For RTF_CACHE with rt6i_pmtu == 0
2676 * (i.e. a redirected route),
2677 * the metrics of its rt->dst.from has already
2678 * been updated.
2679 */
2680 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2681 rt->rt6i_pmtu = arg->mtu;
2682 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2683 (dst_mtu(&rt->dst) < arg->mtu &&
2684 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2685 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2686 }
566cfd8f 2687 }
1da177e4
LT
2688 return 0;
2689}
2690
95c96174 2691void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2692{
c71099ac
TG
2693 struct rt6_mtu_change_arg arg = {
2694 .dev = dev,
2695 .mtu = mtu,
2696 };
1da177e4 2697
0c3584d5 2698 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2699}
2700
ef7c79ed 2701static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2702 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2703 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2704 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2705 [RTA_PRIORITY] = { .type = NLA_U32 },
2706 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2707 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2708 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2709 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2710 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2711 [RTA_EXPIRES] = { .type = NLA_U32 },
86872cb5
TG
2712};
2713
2714static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2715 struct fib6_config *cfg)
1da177e4 2716{
86872cb5
TG
2717 struct rtmsg *rtm;
2718 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2719 unsigned int pref;
86872cb5 2720 int err;
1da177e4 2721
86872cb5
TG
2722 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2723 if (err < 0)
2724 goto errout;
1da177e4 2725
86872cb5
TG
2726 err = -EINVAL;
2727 rtm = nlmsg_data(nlh);
2728 memset(cfg, 0, sizeof(*cfg));
2729
2730 cfg->fc_table = rtm->rtm_table;
2731 cfg->fc_dst_len = rtm->rtm_dst_len;
2732 cfg->fc_src_len = rtm->rtm_src_len;
2733 cfg->fc_flags = RTF_UP;
2734 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2735 cfg->fc_type = rtm->rtm_type;
86872cb5 2736
ef2c7d7b
ND
2737 if (rtm->rtm_type == RTN_UNREACHABLE ||
2738 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2739 rtm->rtm_type == RTN_PROHIBIT ||
2740 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2741 cfg->fc_flags |= RTF_REJECT;
2742
ab79ad14
2743 if (rtm->rtm_type == RTN_LOCAL)
2744 cfg->fc_flags |= RTF_LOCAL;
2745
1f56a01f
MKL
2746 if (rtm->rtm_flags & RTM_F_CLONED)
2747 cfg->fc_flags |= RTF_CACHE;
2748
15e47304 2749 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2750 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2751 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2752
2753 if (tb[RTA_GATEWAY]) {
67b61f6c 2754 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2755 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2756 }
86872cb5
TG
2757
2758 if (tb[RTA_DST]) {
2759 int plen = (rtm->rtm_dst_len + 7) >> 3;
2760
2761 if (nla_len(tb[RTA_DST]) < plen)
2762 goto errout;
2763
2764 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2765 }
86872cb5
TG
2766
2767 if (tb[RTA_SRC]) {
2768 int plen = (rtm->rtm_src_len + 7) >> 3;
2769
2770 if (nla_len(tb[RTA_SRC]) < plen)
2771 goto errout;
2772
2773 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2774 }
86872cb5 2775
c3968a85 2776 if (tb[RTA_PREFSRC])
67b61f6c 2777 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2778
86872cb5
TG
2779 if (tb[RTA_OIF])
2780 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2781
2782 if (tb[RTA_PRIORITY])
2783 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2784
2785 if (tb[RTA_METRICS]) {
2786 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2787 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2788 }
86872cb5
TG
2789
2790 if (tb[RTA_TABLE])
2791 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2792
51ebd318
ND
2793 if (tb[RTA_MULTIPATH]) {
2794 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2795 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2796 }
2797
c78ba6d6
LR
2798 if (tb[RTA_PREF]) {
2799 pref = nla_get_u8(tb[RTA_PREF]);
2800 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2801 pref != ICMPV6_ROUTER_PREF_HIGH)
2802 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2803 cfg->fc_flags |= RTF_PREF(pref);
2804 }
2805
19e42e45
RP
2806 if (tb[RTA_ENCAP])
2807 cfg->fc_encap = tb[RTA_ENCAP];
2808
2809 if (tb[RTA_ENCAP_TYPE])
2810 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2811
32bc201e
XL
2812 if (tb[RTA_EXPIRES]) {
2813 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2814
2815 if (addrconf_finite_timeout(timeout)) {
2816 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2817 cfg->fc_flags |= RTF_EXPIRES;
2818 }
2819 }
2820
86872cb5
TG
2821 err = 0;
2822errout:
2823 return err;
1da177e4
LT
2824}
2825
6b9ea5a6
RP
2826struct rt6_nh {
2827 struct rt6_info *rt6_info;
2828 struct fib6_config r_cfg;
2829 struct mx6_config mxc;
2830 struct list_head next;
2831};
2832
2833static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2834{
2835 struct rt6_nh *nh;
2836
2837 list_for_each_entry(nh, rt6_nh_list, next) {
2838 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2839 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2840 nh->r_cfg.fc_ifindex);
2841 }
2842}
2843
2844static int ip6_route_info_append(struct list_head *rt6_nh_list,
2845 struct rt6_info *rt, struct fib6_config *r_cfg)
2846{
2847 struct rt6_nh *nh;
2848 struct rt6_info *rtnh;
2849 int err = -EEXIST;
2850
2851 list_for_each_entry(nh, rt6_nh_list, next) {
2852 /* check if rt6_info already exists */
2853 rtnh = nh->rt6_info;
2854
2855 if (rtnh->dst.dev == rt->dst.dev &&
2856 rtnh->rt6i_idev == rt->rt6i_idev &&
2857 ipv6_addr_equal(&rtnh->rt6i_gateway,
2858 &rt->rt6i_gateway))
2859 return err;
2860 }
2861
2862 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2863 if (!nh)
2864 return -ENOMEM;
2865 nh->rt6_info = rt;
2866 err = ip6_convert_metrics(&nh->mxc, r_cfg);
2867 if (err) {
2868 kfree(nh);
2869 return err;
2870 }
2871 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
2872 list_add_tail(&nh->next, rt6_nh_list);
2873
2874 return 0;
2875}
2876
2877static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
2878{
2879 struct fib6_config r_cfg;
2880 struct rtnexthop *rtnh;
6b9ea5a6
RP
2881 struct rt6_info *rt;
2882 struct rt6_nh *err_nh;
2883 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
2884 int remaining;
2885 int attrlen;
6b9ea5a6
RP
2886 int err = 1;
2887 int nhn = 0;
2888 int replace = (cfg->fc_nlinfo.nlh &&
2889 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
2890 LIST_HEAD(rt6_nh_list);
51ebd318 2891
35f1b4e9 2892 remaining = cfg->fc_mp_len;
51ebd318 2893 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 2894
6b9ea5a6
RP
2895 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
2896 * rt6_info structs per nexthop
2897 */
51ebd318
ND
2898 while (rtnh_ok(rtnh, remaining)) {
2899 memcpy(&r_cfg, cfg, sizeof(*cfg));
2900 if (rtnh->rtnh_ifindex)
2901 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2902
2903 attrlen = rtnh_attrlen(rtnh);
2904 if (attrlen > 0) {
2905 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2906
2907 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2908 if (nla) {
67b61f6c 2909 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
2910 r_cfg.fc_flags |= RTF_GATEWAY;
2911 }
19e42e45
RP
2912 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
2913 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
2914 if (nla)
2915 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 2916 }
6b9ea5a6 2917
8c5b83f0
RP
2918 rt = ip6_route_info_create(&r_cfg);
2919 if (IS_ERR(rt)) {
2920 err = PTR_ERR(rt);
2921 rt = NULL;
6b9ea5a6 2922 goto cleanup;
8c5b83f0 2923 }
6b9ea5a6
RP
2924
2925 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 2926 if (err) {
6b9ea5a6
RP
2927 dst_free(&rt->dst);
2928 goto cleanup;
2929 }
2930
2931 rtnh = rtnh_next(rtnh, &remaining);
2932 }
2933
2934 err_nh = NULL;
2935 list_for_each_entry(nh, &rt6_nh_list, next) {
2936 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
2937 /* nh->rt6_info is used or freed at this point, reset to NULL*/
2938 nh->rt6_info = NULL;
2939 if (err) {
2940 if (replace && nhn)
2941 ip6_print_replace_route_err(&rt6_nh_list);
2942 err_nh = nh;
2943 goto add_errout;
51ebd318 2944 }
6b9ea5a6 2945
1a72418b 2946 /* Because each route is added like a single route we remove
27596472
MK
2947 * these flags after the first nexthop: if there is a collision,
2948 * we have already failed to add the first nexthop:
2949 * fib6_add_rt2node() has rejected it; when replacing, old
2950 * nexthops have been replaced by first new, the rest should
2951 * be added to it.
1a72418b 2952 */
27596472
MK
2953 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
2954 NLM_F_REPLACE);
6b9ea5a6
RP
2955 nhn++;
2956 }
2957
2958 goto cleanup;
2959
2960add_errout:
2961 /* Delete routes that were already added */
2962 list_for_each_entry(nh, &rt6_nh_list, next) {
2963 if (err_nh == nh)
2964 break;
2965 ip6_route_del(&nh->r_cfg);
2966 }
2967
2968cleanup:
2969 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
2970 if (nh->rt6_info)
2971 dst_free(&nh->rt6_info->dst);
52fe51f8 2972 kfree(nh->mxc.mx);
6b9ea5a6
RP
2973 list_del(&nh->next);
2974 kfree(nh);
2975 }
2976
2977 return err;
2978}
2979
2980static int ip6_route_multipath_del(struct fib6_config *cfg)
2981{
2982 struct fib6_config r_cfg;
2983 struct rtnexthop *rtnh;
2984 int remaining;
2985 int attrlen;
2986 int err = 1, last_err = 0;
2987
2988 remaining = cfg->fc_mp_len;
2989 rtnh = (struct rtnexthop *)cfg->fc_mp;
2990
2991 /* Parse a Multipath Entry */
2992 while (rtnh_ok(rtnh, remaining)) {
2993 memcpy(&r_cfg, cfg, sizeof(*cfg));
2994 if (rtnh->rtnh_ifindex)
2995 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2996
2997 attrlen = rtnh_attrlen(rtnh);
2998 if (attrlen > 0) {
2999 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3000
3001 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3002 if (nla) {
3003 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3004 r_cfg.fc_flags |= RTF_GATEWAY;
3005 }
3006 }
3007 err = ip6_route_del(&r_cfg);
3008 if (err)
3009 last_err = err;
3010
51ebd318
ND
3011 rtnh = rtnh_next(rtnh, &remaining);
3012 }
3013
3014 return last_err;
3015}
3016
67ba4152 3017static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3018{
86872cb5
TG
3019 struct fib6_config cfg;
3020 int err;
1da177e4 3021
86872cb5
TG
3022 err = rtm_to_fib6_config(skb, nlh, &cfg);
3023 if (err < 0)
3024 return err;
3025
51ebd318 3026 if (cfg.fc_mp)
6b9ea5a6 3027 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3028 else
3029 return ip6_route_del(&cfg);
1da177e4
LT
3030}
3031
67ba4152 3032static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3033{
86872cb5
TG
3034 struct fib6_config cfg;
3035 int err;
1da177e4 3036
86872cb5
TG
3037 err = rtm_to_fib6_config(skb, nlh, &cfg);
3038 if (err < 0)
3039 return err;
3040
51ebd318 3041 if (cfg.fc_mp)
6b9ea5a6 3042 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3043 else
3044 return ip6_route_add(&cfg);
1da177e4
LT
3045}
3046
19e42e45 3047static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3048{
3049 return NLMSG_ALIGN(sizeof(struct rtmsg))
3050 + nla_total_size(16) /* RTA_SRC */
3051 + nla_total_size(16) /* RTA_DST */
3052 + nla_total_size(16) /* RTA_GATEWAY */
3053 + nla_total_size(16) /* RTA_PREFSRC */
3054 + nla_total_size(4) /* RTA_TABLE */
3055 + nla_total_size(4) /* RTA_IIF */
3056 + nla_total_size(4) /* RTA_OIF */
3057 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3058 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3059 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3060 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3061 + nla_total_size(1) /* RTA_PREF */
61adedf3 3062 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3063}
3064
191cd582
BH
3065static int rt6_fill_node(struct net *net,
3066 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3067 struct in6_addr *dst, struct in6_addr *src,
15e47304 3068 int iif, int type, u32 portid, u32 seq,
7bc570c8 3069 int prefix, int nowait, unsigned int flags)
1da177e4 3070{
4b32b5ad 3071 u32 metrics[RTAX_MAX];
1da177e4 3072 struct rtmsg *rtm;
2d7202bf 3073 struct nlmsghdr *nlh;
e3703b3d 3074 long expires;
9e762a4a 3075 u32 table;
1da177e4
LT
3076
3077 if (prefix) { /* user wants prefix routes only */
3078 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3079 /* success since this is not a prefix route */
3080 return 1;
3081 }
3082 }
3083
15e47304 3084 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3085 if (!nlh)
26932566 3086 return -EMSGSIZE;
2d7202bf
TG
3087
3088 rtm = nlmsg_data(nlh);
1da177e4
LT
3089 rtm->rtm_family = AF_INET6;
3090 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3091 rtm->rtm_src_len = rt->rt6i_src.plen;
3092 rtm->rtm_tos = 0;
c71099ac 3093 if (rt->rt6i_table)
9e762a4a 3094 table = rt->rt6i_table->tb6_id;
c71099ac 3095 else
9e762a4a
PM
3096 table = RT6_TABLE_UNSPEC;
3097 rtm->rtm_table = table;
c78679e8
DM
3098 if (nla_put_u32(skb, RTA_TABLE, table))
3099 goto nla_put_failure;
ef2c7d7b
ND
3100 if (rt->rt6i_flags & RTF_REJECT) {
3101 switch (rt->dst.error) {
3102 case -EINVAL:
3103 rtm->rtm_type = RTN_BLACKHOLE;
3104 break;
3105 case -EACCES:
3106 rtm->rtm_type = RTN_PROHIBIT;
3107 break;
b4949ab2
ND
3108 case -EAGAIN:
3109 rtm->rtm_type = RTN_THROW;
3110 break;
ef2c7d7b
ND
3111 default:
3112 rtm->rtm_type = RTN_UNREACHABLE;
3113 break;
3114 }
3115 }
38308473 3116 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3117 rtm->rtm_type = RTN_LOCAL;
d1918542 3118 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3119 rtm->rtm_type = RTN_LOCAL;
3120 else
3121 rtm->rtm_type = RTN_UNICAST;
3122 rtm->rtm_flags = 0;
35103d11 3123 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3124 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3125 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3126 rtm->rtm_flags |= RTNH_F_DEAD;
3127 }
1da177e4
LT
3128 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3129 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3130 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3131 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3132 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3133 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3134 rtm->rtm_protocol = RTPROT_RA;
3135 else
3136 rtm->rtm_protocol = RTPROT_KERNEL;
3137 }
1da177e4 3138
38308473 3139 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3140 rtm->rtm_flags |= RTM_F_CLONED;
3141
3142 if (dst) {
930345ea 3143 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3144 goto nla_put_failure;
1ab1457c 3145 rtm->rtm_dst_len = 128;
1da177e4 3146 } else if (rtm->rtm_dst_len)
930345ea 3147 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3148 goto nla_put_failure;
1da177e4
LT
3149#ifdef CONFIG_IPV6_SUBTREES
3150 if (src) {
930345ea 3151 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3152 goto nla_put_failure;
1ab1457c 3153 rtm->rtm_src_len = 128;
c78679e8 3154 } else if (rtm->rtm_src_len &&
930345ea 3155 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3156 goto nla_put_failure;
1da177e4 3157#endif
7bc570c8
YH
3158 if (iif) {
3159#ifdef CONFIG_IPV6_MROUTE
3160 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
8229efda 3161 int err = ip6mr_get_route(net, skb, rtm, nowait);
7bc570c8
YH
3162 if (err <= 0) {
3163 if (!nowait) {
3164 if (err == 0)
3165 return 0;
3166 goto nla_put_failure;
3167 } else {
3168 if (err == -EMSGSIZE)
3169 goto nla_put_failure;
3170 }
3171 }
3172 } else
3173#endif
c78679e8
DM
3174 if (nla_put_u32(skb, RTA_IIF, iif))
3175 goto nla_put_failure;
7bc570c8 3176 } else if (dst) {
1da177e4 3177 struct in6_addr saddr_buf;
c78679e8 3178 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3179 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3180 goto nla_put_failure;
1da177e4 3181 }
2d7202bf 3182
c3968a85
DW
3183 if (rt->rt6i_prefsrc.plen) {
3184 struct in6_addr saddr_buf;
4e3fd7a0 3185 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3186 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3187 goto nla_put_failure;
c3968a85
DW
3188 }
3189
4b32b5ad
MKL
3190 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3191 if (rt->rt6i_pmtu)
3192 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3193 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3194 goto nla_put_failure;
3195
dd0cbf29 3196 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3197 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3198 goto nla_put_failure;
94f826b8 3199 }
2d7202bf 3200
c78679e8
DM
3201 if (rt->dst.dev &&
3202 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3203 goto nla_put_failure;
3204 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3205 goto nla_put_failure;
8253947e
LW
3206
3207 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3208
87a50699 3209 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3210 goto nla_put_failure;
2d7202bf 3211
c78ba6d6
LR
3212 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3213 goto nla_put_failure;
3214
61adedf3 3215 lwtunnel_fill_encap(skb, rt->dst.lwtstate);
19e42e45 3216
053c095a
JB
3217 nlmsg_end(skb, nlh);
3218 return 0;
2d7202bf
TG
3219
3220nla_put_failure:
26932566
PM
3221 nlmsg_cancel(skb, nlh);
3222 return -EMSGSIZE;
1da177e4
LT
3223}
3224
1b43af54 3225int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3226{
3227 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3228 int prefix;
3229
2d7202bf
TG
3230 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3231 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3232 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3233 } else
3234 prefix = 0;
3235
191cd582
BH
3236 return rt6_fill_node(arg->net,
3237 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3238 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3239 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3240}
3241
67ba4152 3242static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3243{
3b1e0a65 3244 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3245 struct nlattr *tb[RTA_MAX+1];
3246 struct rt6_info *rt;
1da177e4 3247 struct sk_buff *skb;
ab364a6f 3248 struct rtmsg *rtm;
4c9483b2 3249 struct flowi6 fl6;
72331bc0 3250 int err, iif = 0, oif = 0;
1da177e4 3251
ab364a6f
TG
3252 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3253 if (err < 0)
3254 goto errout;
1da177e4 3255
ab364a6f 3256 err = -EINVAL;
4c9483b2 3257 memset(&fl6, 0, sizeof(fl6));
1da177e4 3258
ab364a6f
TG
3259 if (tb[RTA_SRC]) {
3260 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3261 goto errout;
3262
4e3fd7a0 3263 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3264 }
3265
3266 if (tb[RTA_DST]) {
3267 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3268 goto errout;
3269
4e3fd7a0 3270 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3271 }
3272
3273 if (tb[RTA_IIF])
3274 iif = nla_get_u32(tb[RTA_IIF]);
3275
3276 if (tb[RTA_OIF])
72331bc0 3277 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3278
2e47b291
LC
3279 if (tb[RTA_MARK])
3280 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3281
1da177e4
LT
3282 if (iif) {
3283 struct net_device *dev;
72331bc0
SL
3284 int flags = 0;
3285
5578689a 3286 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3287 if (!dev) {
3288 err = -ENODEV;
ab364a6f 3289 goto errout;
1da177e4 3290 }
72331bc0
SL
3291
3292 fl6.flowi6_iif = iif;
3293
3294 if (!ipv6_addr_any(&fl6.saddr))
3295 flags |= RT6_LOOKUP_F_HAS_SADDR;
3296
3297 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3298 flags);
3299 } else {
3300 fl6.flowi6_oif = oif;
3301
ca254490
DA
3302 if (netif_index_is_l3_master(net, oif)) {
3303 fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
3304 FLOWI_FLAG_SKIP_NH_OIF;
3305 }
3306
72331bc0 3307 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3308 }
3309
ab364a6f 3310 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3311 if (!skb) {
94e187c0 3312 ip6_rt_put(rt);
ab364a6f
TG
3313 err = -ENOBUFS;
3314 goto errout;
3315 }
1da177e4 3316
ab364a6f
TG
3317 /* Reserve room for dummy headers, this skb can pass
3318 through good chunk of routing engine.
3319 */
459a98ed 3320 skb_reset_mac_header(skb);
ab364a6f 3321 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3322
d8d1f30b 3323 skb_dst_set(skb, &rt->dst);
1da177e4 3324
4c9483b2 3325 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3326 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3327 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3328 if (err < 0) {
ab364a6f
TG
3329 kfree_skb(skb);
3330 goto errout;
1da177e4
LT
3331 }
3332
15e47304 3333 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3334errout:
1da177e4 3335 return err;
1da177e4
LT
3336}
3337
37a1d361
RP
3338void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3339 unsigned int nlm_flags)
1da177e4
LT
3340{
3341 struct sk_buff *skb;
5578689a 3342 struct net *net = info->nl_net;
528c4ceb
DL
3343 u32 seq;
3344 int err;
3345
3346 err = -ENOBUFS;
38308473 3347 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3348
19e42e45 3349 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3350 if (!skb)
21713ebc
TG
3351 goto errout;
3352
191cd582 3353 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3354 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3355 if (err < 0) {
3356 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3357 WARN_ON(err == -EMSGSIZE);
3358 kfree_skb(skb);
3359 goto errout;
3360 }
15e47304 3361 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3362 info->nlh, gfp_any());
3363 return;
21713ebc
TG
3364errout:
3365 if (err < 0)
5578689a 3366 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3367}
3368
8ed67789 3369static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3370 unsigned long event, void *ptr)
8ed67789 3371{
351638e7 3372 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3373 struct net *net = dev_net(dev);
8ed67789
DL
3374
3375 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3376 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3377 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3378#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3379 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3380 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3381 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3382 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3383#endif
3384 }
3385
3386 return NOTIFY_OK;
3387}
3388
1da177e4
LT
3389/*
3390 * /proc
3391 */
3392
3393#ifdef CONFIG_PROC_FS
3394
33120b30
AD
3395static const struct file_operations ipv6_route_proc_fops = {
3396 .owner = THIS_MODULE,
3397 .open = ipv6_route_open,
3398 .read = seq_read,
3399 .llseek = seq_lseek,
8d2ca1d7 3400 .release = seq_release_net,
33120b30
AD
3401};
3402
1da177e4
LT
3403static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3404{
69ddb805 3405 struct net *net = (struct net *)seq->private;
1da177e4 3406 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3407 net->ipv6.rt6_stats->fib_nodes,
3408 net->ipv6.rt6_stats->fib_route_nodes,
3409 net->ipv6.rt6_stats->fib_rt_alloc,
3410 net->ipv6.rt6_stats->fib_rt_entries,
3411 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3412 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3413 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3414
3415 return 0;
3416}
3417
3418static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3419{
de05c557 3420 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3421}
3422
9a32144e 3423static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3424 .owner = THIS_MODULE,
3425 .open = rt6_stats_seq_open,
3426 .read = seq_read,
3427 .llseek = seq_lseek,
b6fcbdb4 3428 .release = single_release_net,
1da177e4
LT
3429};
3430#endif /* CONFIG_PROC_FS */
3431
3432#ifdef CONFIG_SYSCTL
3433
1da177e4 3434static
fe2c6338 3435int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3436 void __user *buffer, size_t *lenp, loff_t *ppos)
3437{
c486da34
LAG
3438 struct net *net;
3439 int delay;
3440 if (!write)
1da177e4 3441 return -EINVAL;
c486da34
LAG
3442
3443 net = (struct net *)ctl->extra1;
3444 delay = net->ipv6.sysctl.flush_delay;
3445 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3446 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3447 return 0;
1da177e4
LT
3448}
3449
fe2c6338 3450struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3451 {
1da177e4 3452 .procname = "flush",
4990509f 3453 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3454 .maxlen = sizeof(int),
89c8b3a1 3455 .mode = 0200,
6d9f239a 3456 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3457 },
3458 {
1da177e4 3459 .procname = "gc_thresh",
9a7ec3a9 3460 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3461 .maxlen = sizeof(int),
3462 .mode = 0644,
6d9f239a 3463 .proc_handler = proc_dointvec,
1da177e4
LT
3464 },
3465 {
1da177e4 3466 .procname = "max_size",
4990509f 3467 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3468 .maxlen = sizeof(int),
3469 .mode = 0644,
6d9f239a 3470 .proc_handler = proc_dointvec,
1da177e4
LT
3471 },
3472 {
1da177e4 3473 .procname = "gc_min_interval",
4990509f 3474 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3475 .maxlen = sizeof(int),
3476 .mode = 0644,
6d9f239a 3477 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3478 },
3479 {
1da177e4 3480 .procname = "gc_timeout",
4990509f 3481 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3482 .maxlen = sizeof(int),
3483 .mode = 0644,
6d9f239a 3484 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3485 },
3486 {
1da177e4 3487 .procname = "gc_interval",
4990509f 3488 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3489 .maxlen = sizeof(int),
3490 .mode = 0644,
6d9f239a 3491 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3492 },
3493 {
1da177e4 3494 .procname = "gc_elasticity",
4990509f 3495 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3496 .maxlen = sizeof(int),
3497 .mode = 0644,
f3d3f616 3498 .proc_handler = proc_dointvec,
1da177e4
LT
3499 },
3500 {
1da177e4 3501 .procname = "mtu_expires",
4990509f 3502 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3503 .maxlen = sizeof(int),
3504 .mode = 0644,
6d9f239a 3505 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3506 },
3507 {
1da177e4 3508 .procname = "min_adv_mss",
4990509f 3509 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3510 .maxlen = sizeof(int),
3511 .mode = 0644,
f3d3f616 3512 .proc_handler = proc_dointvec,
1da177e4
LT
3513 },
3514 {
1da177e4 3515 .procname = "gc_min_interval_ms",
4990509f 3516 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3517 .maxlen = sizeof(int),
3518 .mode = 0644,
6d9f239a 3519 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3520 },
f8572d8f 3521 { }
1da177e4
LT
3522};
3523
2c8c1e72 3524struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3525{
3526 struct ctl_table *table;
3527
3528 table = kmemdup(ipv6_route_table_template,
3529 sizeof(ipv6_route_table_template),
3530 GFP_KERNEL);
5ee09105
YH
3531
3532 if (table) {
3533 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3534 table[0].extra1 = net;
86393e52 3535 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3536 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3537 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3538 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3539 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3540 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3541 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3542 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3543 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3544
3545 /* Don't export sysctls to unprivileged users */
3546 if (net->user_ns != &init_user_ns)
3547 table[0].procname = NULL;
5ee09105
YH
3548 }
3549
760f2d01
DL
3550 return table;
3551}
1da177e4
LT
3552#endif
3553
2c8c1e72 3554static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3555{
633d424b 3556 int ret = -ENOMEM;
8ed67789 3557
86393e52
AD
3558 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3559 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3560
fc66f95c
ED
3561 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3562 goto out_ip6_dst_ops;
3563
8ed67789
DL
3564 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3565 sizeof(*net->ipv6.ip6_null_entry),
3566 GFP_KERNEL);
3567 if (!net->ipv6.ip6_null_entry)
fc66f95c 3568 goto out_ip6_dst_entries;
d8d1f30b 3569 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3570 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3571 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3572 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3573 ip6_template_metrics, true);
8ed67789
DL
3574
3575#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3576 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3577 sizeof(*net->ipv6.ip6_prohibit_entry),
3578 GFP_KERNEL);
68fffc67
PZ
3579 if (!net->ipv6.ip6_prohibit_entry)
3580 goto out_ip6_null_entry;
d8d1f30b 3581 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3582 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3583 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3584 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3585 ip6_template_metrics, true);
8ed67789
DL
3586
3587 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3588 sizeof(*net->ipv6.ip6_blk_hole_entry),
3589 GFP_KERNEL);
68fffc67
PZ
3590 if (!net->ipv6.ip6_blk_hole_entry)
3591 goto out_ip6_prohibit_entry;
d8d1f30b 3592 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3593 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3594 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3595 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3596 ip6_template_metrics, true);
8ed67789
DL
3597#endif
3598
b339a47c
PZ
3599 net->ipv6.sysctl.flush_delay = 0;
3600 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3601 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3602 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3603 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3604 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3605 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3606 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3607
6891a346
BT
3608 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3609
8ed67789
DL
3610 ret = 0;
3611out:
3612 return ret;
f2fc6a54 3613
68fffc67
PZ
3614#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3615out_ip6_prohibit_entry:
3616 kfree(net->ipv6.ip6_prohibit_entry);
3617out_ip6_null_entry:
3618 kfree(net->ipv6.ip6_null_entry);
3619#endif
fc66f95c
ED
3620out_ip6_dst_entries:
3621 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3622out_ip6_dst_ops:
f2fc6a54 3623 goto out;
cdb18761
DL
3624}
3625
2c8c1e72 3626static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3627{
8ed67789
DL
3628 kfree(net->ipv6.ip6_null_entry);
3629#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3630 kfree(net->ipv6.ip6_prohibit_entry);
3631 kfree(net->ipv6.ip6_blk_hole_entry);
3632#endif
41bb78b4 3633 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3634}
3635
d189634e
TG
3636static int __net_init ip6_route_net_init_late(struct net *net)
3637{
3638#ifdef CONFIG_PROC_FS
d4beaa66
G
3639 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3640 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3641#endif
3642 return 0;
3643}
3644
3645static void __net_exit ip6_route_net_exit_late(struct net *net)
3646{
3647#ifdef CONFIG_PROC_FS
ece31ffd
G
3648 remove_proc_entry("ipv6_route", net->proc_net);
3649 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3650#endif
3651}
3652
cdb18761
DL
3653static struct pernet_operations ip6_route_net_ops = {
3654 .init = ip6_route_net_init,
3655 .exit = ip6_route_net_exit,
3656};
3657
c3426b47
DM
3658static int __net_init ipv6_inetpeer_init(struct net *net)
3659{
3660 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3661
3662 if (!bp)
3663 return -ENOMEM;
3664 inet_peer_base_init(bp);
3665 net->ipv6.peers = bp;
3666 return 0;
3667}
3668
3669static void __net_exit ipv6_inetpeer_exit(struct net *net)
3670{
3671 struct inet_peer_base *bp = net->ipv6.peers;
3672
3673 net->ipv6.peers = NULL;
56a6b248 3674 inetpeer_invalidate_tree(bp);
c3426b47
DM
3675 kfree(bp);
3676}
3677
2b823f72 3678static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3679 .init = ipv6_inetpeer_init,
3680 .exit = ipv6_inetpeer_exit,
3681};
3682
d189634e
TG
3683static struct pernet_operations ip6_route_net_late_ops = {
3684 .init = ip6_route_net_init_late,
3685 .exit = ip6_route_net_exit_late,
3686};
3687
8ed67789
DL
3688static struct notifier_block ip6_route_dev_notifier = {
3689 .notifier_call = ip6_route_dev_notify,
3690 .priority = 0,
3691};
3692
433d49c3 3693int __init ip6_route_init(void)
1da177e4 3694{
433d49c3 3695 int ret;
8d0b94af 3696 int cpu;
433d49c3 3697
9a7ec3a9
DL
3698 ret = -ENOMEM;
3699 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3700 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3701 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3702 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3703 goto out;
14e50e57 3704
fc66f95c 3705 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3706 if (ret)
bdb3289f 3707 goto out_kmem_cache;
bdb3289f 3708
c3426b47
DM
3709 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3710 if (ret)
e8803b6c 3711 goto out_dst_entries;
2a0c451a 3712
7e52b33b
DM
3713 ret = register_pernet_subsys(&ip6_route_net_ops);
3714 if (ret)
3715 goto out_register_inetpeer;
c3426b47 3716
5dc121e9
AE
3717 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3718
8ed67789
DL
3719 /* Registering of the loopback is done before this portion of code,
3720 * the loopback reference in rt6_info will not be taken, do it
3721 * manually for init_net */
d8d1f30b 3722 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3723 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3724 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3725 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 3726 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 3727 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
3728 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3729 #endif
e8803b6c 3730 ret = fib6_init();
433d49c3 3731 if (ret)
8ed67789 3732 goto out_register_subsys;
433d49c3 3733
433d49c3
DL
3734 ret = xfrm6_init();
3735 if (ret)
e8803b6c 3736 goto out_fib6_init;
c35b7e72 3737
433d49c3
DL
3738 ret = fib6_rules_init();
3739 if (ret)
3740 goto xfrm6_init;
7e5449c2 3741
d189634e
TG
3742 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3743 if (ret)
3744 goto fib6_rules_init;
3745
433d49c3 3746 ret = -ENOBUFS;
c7ac8679
GR
3747 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3748 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3749 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3750 goto out_register_late_subsys;
c127ea2c 3751
8ed67789 3752 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3753 if (ret)
d189634e 3754 goto out_register_late_subsys;
8ed67789 3755
8d0b94af
MKL
3756 for_each_possible_cpu(cpu) {
3757 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3758
3759 INIT_LIST_HEAD(&ul->head);
3760 spin_lock_init(&ul->lock);
3761 }
3762
433d49c3
DL
3763out:
3764 return ret;
3765
d189634e
TG
3766out_register_late_subsys:
3767 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3768fib6_rules_init:
433d49c3
DL
3769 fib6_rules_cleanup();
3770xfrm6_init:
433d49c3 3771 xfrm6_fini();
2a0c451a
TG
3772out_fib6_init:
3773 fib6_gc_cleanup();
8ed67789
DL
3774out_register_subsys:
3775 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3776out_register_inetpeer:
3777 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3778out_dst_entries:
3779 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3780out_kmem_cache:
f2fc6a54 3781 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3782 goto out;
1da177e4
LT
3783}
3784
3785void ip6_route_cleanup(void)
3786{
8ed67789 3787 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3788 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3789 fib6_rules_cleanup();
1da177e4 3790 xfrm6_fini();
1da177e4 3791 fib6_gc_cleanup();
c3426b47 3792 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3793 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3794 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3795 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3796}