]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/route.c
net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 104 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
105 const struct in6_addr *gwaddr,
106 struct net_device *dev,
95c96174 107 unsigned int pref);
efa2cea0 108static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 109 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
110 const struct in6_addr *gwaddr,
111 struct net_device *dev);
70ceb4f5
YH
112#endif
113
8d0b94af
MKL
114struct uncached_list {
115 spinlock_t lock;
116 struct list_head head;
117};
118
119static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121static void rt6_uncached_list_add(struct rt6_info *rt)
122{
123 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125 rt->dst.flags |= DST_NOCACHE;
126 rt->rt6i_uncached_list = ul;
127
128 spin_lock_bh(&ul->lock);
129 list_add_tail(&rt->rt6i_uncached, &ul->head);
130 spin_unlock_bh(&ul->lock);
131}
132
133static void rt6_uncached_list_del(struct rt6_info *rt)
134{
135 if (!list_empty(&rt->rt6i_uncached)) {
136 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138 spin_lock_bh(&ul->lock);
139 list_del(&rt->rt6i_uncached);
140 spin_unlock_bh(&ul->lock);
141 }
142}
143
144static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145{
146 struct net_device *loopback_dev = net->loopback_dev;
147 int cpu;
148
e332bc67
EB
149 if (dev == loopback_dev)
150 return;
151
8d0b94af
MKL
152 for_each_possible_cpu(cpu) {
153 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 struct rt6_info *rt;
155
156 spin_lock_bh(&ul->lock);
157 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 struct inet6_dev *rt_idev = rt->rt6i_idev;
159 struct net_device *rt_dev = rt->dst.dev;
160
e332bc67 161 if (rt_idev->dev == dev) {
8d0b94af
MKL
162 rt->rt6i_idev = in6_dev_get(loopback_dev);
163 in6_dev_put(rt_idev);
164 }
165
e332bc67 166 if (rt_dev == dev) {
8d0b94af
MKL
167 rt->dst.dev = loopback_dev;
168 dev_hold(rt->dst.dev);
169 dev_put(rt_dev);
170 }
171 }
172 spin_unlock_bh(&ul->lock);
173 }
174}
175
d52d3997
MKL
176static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177{
178 return dst_metrics_write_ptr(rt->dst.from);
179}
180
06582540
DM
181static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182{
4b32b5ad 183 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 184
d52d3997
MKL
185 if (rt->rt6i_flags & RTF_PCPU)
186 return rt6_pcpu_cow_metrics(rt);
187 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
188 return NULL;
189 else
3b471175 190 return dst_cow_metrics_generic(dst, old);
06582540
DM
191}
192
f894cbf8
DM
193static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 struct sk_buff *skb,
195 const void *daddr)
39232973
DM
196{
197 struct in6_addr *p = &rt->rt6i_gateway;
198
a7563f34 199 if (!ipv6_addr_any(p))
39232973 200 return (const void *) p;
f894cbf8
DM
201 else if (skb)
202 return &ipv6_hdr(skb)->daddr;
39232973
DM
203 return daddr;
204}
205
f894cbf8
DM
206static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 struct sk_buff *skb,
208 const void *daddr)
d3aaeb38 209{
39232973
DM
210 struct rt6_info *rt = (struct rt6_info *) dst;
211 struct neighbour *n;
212
f894cbf8 213 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 214 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
215 if (n)
216 return n;
217 return neigh_create(&nd_tbl, daddr, dst->dev);
218}
219
111757e7
JA
220static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
221{
222 struct net_device *dev = dst->dev;
223 struct rt6_info *rt = (struct rt6_info *)dst;
224
225 daddr = choose_neigh_daddr(rt, NULL, daddr);
226 if (!daddr)
227 return;
228 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
229 return;
230 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
231 return;
232 __ipv6_confirm_neigh(dev, daddr);
233}
234
9a7ec3a9 235static struct dst_ops ip6_dst_ops_template = {
1da177e4 236 .family = AF_INET6,
1da177e4
LT
237 .gc = ip6_dst_gc,
238 .gc_thresh = 1024,
239 .check = ip6_dst_check,
0dbaee3b 240 .default_advmss = ip6_default_advmss,
ebb762f2 241 .mtu = ip6_mtu,
06582540 242 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
243 .destroy = ip6_dst_destroy,
244 .ifdown = ip6_dst_ifdown,
245 .negative_advice = ip6_negative_advice,
246 .link_failure = ip6_link_failure,
247 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 248 .redirect = rt6_do_redirect,
9f8955cc 249 .local_out = __ip6_local_out,
d3aaeb38 250 .neigh_lookup = ip6_neigh_lookup,
111757e7 251 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
252};
253
ebb762f2 254static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 255{
618f9bc7
SK
256 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
257
258 return mtu ? : dst->dev->mtu;
ec831ea7
RD
259}
260
6700c270
DM
261static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
262 struct sk_buff *skb, u32 mtu)
14e50e57
DM
263{
264}
265
6700c270
DM
266static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb)
b587ee3b
DM
268{
269}
270
14e50e57
DM
271static struct dst_ops ip6_dst_blackhole_ops = {
272 .family = AF_INET6,
14e50e57
DM
273 .destroy = ip6_dst_destroy,
274 .check = ip6_dst_check,
ebb762f2 275 .mtu = ip6_blackhole_mtu,
214f45c9 276 .default_advmss = ip6_default_advmss,
14e50e57 277 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 278 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 279 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 280 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
281};
282
62fa8a84 283static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 284 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
285};
286
fb0af4c7 287static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
2c20cbd7 291 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 292 .error = -ENETUNREACH,
d8d1f30b
CG
293 .input = ip6_pkt_discard,
294 .output = ip6_pkt_discard_out,
1da177e4
LT
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 297 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300};
301
101367c2
TG
302#ifdef CONFIG_IPV6_MULTIPLE_TABLES
303
fb0af4c7 304static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -EACCES,
d8d1f30b
CG
310 .input = ip6_pkt_prohibit,
311 .output = ip6_pkt_prohibit_out,
101367c2
TG
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 314 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
315 .rt6i_metric = ~(u32) 0,
316 .rt6i_ref = ATOMIC_INIT(1),
317};
318
fb0af4c7 319static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
320 .dst = {
321 .__refcnt = ATOMIC_INIT(1),
322 .__use = 1,
2c20cbd7 323 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 324 .error = -EINVAL,
d8d1f30b 325 .input = dst_discard,
ede2059d 326 .output = dst_discard_out,
101367c2
TG
327 },
328 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 329 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
330 .rt6i_metric = ~(u32) 0,
331 .rt6i_ref = ATOMIC_INIT(1),
332};
333
334#endif
335
ebfa45f0
MKL
336static void rt6_info_init(struct rt6_info *rt)
337{
338 struct dst_entry *dst = &rt->dst;
339
340 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
341 INIT_LIST_HEAD(&rt->rt6i_siblings);
342 INIT_LIST_HEAD(&rt->rt6i_uncached);
343}
344
1da177e4 345/* allocate dst with ip6_dst_ops */
d52d3997
MKL
346static struct rt6_info *__ip6_dst_alloc(struct net *net,
347 struct net_device *dev,
ad706862 348 int flags)
1da177e4 349{
97bab73f 350 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 351 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 352
ebfa45f0
MKL
353 if (rt)
354 rt6_info_init(rt);
8104891b 355
cf911662 356 return rt;
1da177e4
LT
357}
358
9ab179d8
DA
359struct rt6_info *ip6_dst_alloc(struct net *net,
360 struct net_device *dev,
361 int flags)
d52d3997 362{
ad706862 363 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
364
365 if (rt) {
366 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
367 if (rt->rt6i_pcpu) {
368 int cpu;
369
370 for_each_possible_cpu(cpu) {
371 struct rt6_info **p;
372
373 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
374 /* no one shares rt */
375 *p = NULL;
376 }
377 } else {
378 dst_destroy((struct dst_entry *)rt);
379 return NULL;
380 }
381 }
382
383 return rt;
384}
9ab179d8 385EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 386
1da177e4
LT
387static void ip6_dst_destroy(struct dst_entry *dst)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 390 struct dst_entry *from = dst->from;
8d0b94af 391 struct inet6_dev *idev;
1da177e4 392
4b32b5ad 393 dst_destroy_metrics_generic(dst);
87775312 394 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
395 rt6_uncached_list_del(rt);
396
397 idev = rt->rt6i_idev;
38308473 398 if (idev) {
1da177e4
LT
399 rt->rt6i_idev = NULL;
400 in6_dev_put(idev);
1ab1457c 401 }
1716a961 402
ecd98837
YH
403 dst->from = NULL;
404 dst_release(from);
b3419363
DM
405}
406
1da177e4
LT
407static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
408 int how)
409{
410 struct rt6_info *rt = (struct rt6_info *)dst;
411 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 412 struct net_device *loopback_dev =
c346dca1 413 dev_net(dev)->loopback_dev;
1da177e4 414
97cac082
DM
415 if (dev != loopback_dev) {
416 if (idev && idev->dev == dev) {
417 struct inet6_dev *loopback_idev =
418 in6_dev_get(loopback_dev);
419 if (loopback_idev) {
420 rt->rt6i_idev = loopback_idev;
421 in6_dev_put(idev);
422 }
423 }
1da177e4
LT
424 }
425}
426
5973fb1e
MKL
427static bool __rt6_check_expired(const struct rt6_info *rt)
428{
429 if (rt->rt6i_flags & RTF_EXPIRES)
430 return time_after(jiffies, rt->dst.expires);
431 else
432 return false;
433}
434
a50feda5 435static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 436{
1716a961
G
437 if (rt->rt6i_flags & RTF_EXPIRES) {
438 if (time_after(jiffies, rt->dst.expires))
a50feda5 439 return true;
1716a961 440 } else if (rt->dst.from) {
3fd91fb3 441 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 442 }
a50feda5 443 return false;
1da177e4
LT
444}
445
51ebd318
ND
446/* Multipath route selection:
447 * Hash based function using packet header and flowlabel.
448 * Adapted from fib_info_hashfn()
449 */
450static int rt6_info_hash_nhsfn(unsigned int candidate_count,
451 const struct flowi6 *fl6)
452{
644d0e65 453 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
454}
455
456static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
457 struct flowi6 *fl6, int oif,
458 int strict)
51ebd318
ND
459{
460 struct rt6_info *sibling, *next_sibling;
461 int route_choosen;
462
463 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
464 /* Don't change the route, if route_choosen == 0
465 * (siblings does not include ourself)
466 */
467 if (route_choosen)
468 list_for_each_entry_safe(sibling, next_sibling,
469 &match->rt6i_siblings, rt6i_siblings) {
470 route_choosen--;
471 if (route_choosen == 0) {
52bd4c0c
ND
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
51ebd318
ND
474 match = sibling;
475 break;
476 }
477 }
478 return match;
479}
480
1da177e4 481/*
c71099ac 482 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
483 */
484
8ed67789
DL
485static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
b71d1d42 487 const struct in6_addr *saddr,
1da177e4 488 int oif,
d420895e 489 int flags)
1da177e4
LT
490{
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
dd3abc4e
YH
494 if (!oif && ipv6_addr_any(saddr))
495 goto out;
496
d8d1f30b 497 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 498 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
499
500 if (oif) {
1da177e4
LT
501 if (dev->ifindex == oif)
502 return sprt;
503 if (dev->flags & IFF_LOOPBACK) {
38308473 504 if (!sprt->rt6i_idev ||
1da177e4 505 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 506 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 507 continue;
17fb0b2b
DA
508 if (local &&
509 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
510 continue;
511 }
512 local = sprt;
513 }
dd3abc4e
YH
514 } else {
515 if (ipv6_chk_addr(net, saddr, dev,
516 flags & RT6_LOOKUP_F_IFACE))
517 return sprt;
1da177e4 518 }
dd3abc4e 519 }
1da177e4 520
dd3abc4e 521 if (oif) {
1da177e4
LT
522 if (local)
523 return local;
524
d420895e 525 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 526 return net->ipv6.ip6_null_entry;
1da177e4 527 }
dd3abc4e 528out:
1da177e4
LT
529 return rt;
530}
531
27097255 532#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
533struct __rt6_probe_work {
534 struct work_struct work;
535 struct in6_addr target;
536 struct net_device *dev;
537};
538
539static void rt6_probe_deferred(struct work_struct *w)
540{
541 struct in6_addr mcaddr;
542 struct __rt6_probe_work *work =
543 container_of(w, struct __rt6_probe_work, work);
544
545 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 546 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 547 dev_put(work->dev);
662f5533 548 kfree(work);
c2f17e82
HFS
549}
550
27097255
YH
551static void rt6_probe(struct rt6_info *rt)
552{
990edb42 553 struct __rt6_probe_work *work;
f2c31e32 554 struct neighbour *neigh;
27097255
YH
555 /*
556 * Okay, this does not seem to be appropriate
557 * for now, however, we need to check if it
558 * is really so; aka Router Reachability Probing.
559 *
560 * Router Reachability Probe MUST be rate-limited
561 * to no more than one per minute.
562 */
2152caea 563 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 564 return;
2152caea
YH
565 rcu_read_lock_bh();
566 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
567 if (neigh) {
8d6c31bf
MKL
568 if (neigh->nud_state & NUD_VALID)
569 goto out;
570
990edb42 571 work = NULL;
2152caea 572 write_lock(&neigh->lock);
990edb42
MKL
573 if (!(neigh->nud_state & NUD_VALID) &&
574 time_after(jiffies,
575 neigh->updated +
576 rt->rt6i_idev->cnf.rtr_probe_interval)) {
577 work = kmalloc(sizeof(*work), GFP_ATOMIC);
578 if (work)
579 __neigh_set_probe_once(neigh);
c2f17e82 580 }
2152caea 581 write_unlock(&neigh->lock);
990edb42
MKL
582 } else {
583 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 584 }
990edb42
MKL
585
586 if (work) {
587 INIT_WORK(&work->work, rt6_probe_deferred);
588 work->target = rt->rt6i_gateway;
589 dev_hold(rt->dst.dev);
590 work->dev = rt->dst.dev;
591 schedule_work(&work->work);
592 }
593
8d6c31bf 594out:
2152caea 595 rcu_read_unlock_bh();
27097255
YH
596}
597#else
598static inline void rt6_probe(struct rt6_info *rt)
599{
27097255
YH
600}
601#endif
602
1da177e4 603/*
554cfb7e 604 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 605 */
b6f99a21 606static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 607{
d1918542 608 struct net_device *dev = rt->dst.dev;
161980f4 609 if (!oif || dev->ifindex == oif)
554cfb7e 610 return 2;
161980f4
DM
611 if ((dev->flags & IFF_LOOPBACK) &&
612 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
613 return 1;
614 return 0;
554cfb7e 615}
1da177e4 616
afc154e9 617static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 618{
f2c31e32 619 struct neighbour *neigh;
afc154e9 620 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 621
4d0c5911
YH
622 if (rt->rt6i_flags & RTF_NONEXTHOP ||
623 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 624 return RT6_NUD_SUCCEED;
145a3621
YH
625
626 rcu_read_lock_bh();
627 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
628 if (neigh) {
629 read_lock(&neigh->lock);
554cfb7e 630 if (neigh->nud_state & NUD_VALID)
afc154e9 631 ret = RT6_NUD_SUCCEED;
398bcbeb 632#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 633 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 634 ret = RT6_NUD_SUCCEED;
7e980569
JB
635 else
636 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 637#endif
145a3621 638 read_unlock(&neigh->lock);
afc154e9
HFS
639 } else {
640 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 641 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 642 }
145a3621
YH
643 rcu_read_unlock_bh();
644
a5a81f0b 645 return ret;
1da177e4
LT
646}
647
554cfb7e
YH
648static int rt6_score_route(struct rt6_info *rt, int oif,
649 int strict)
1da177e4 650{
a5a81f0b 651 int m;
1ab1457c 652
4d0c5911 653 m = rt6_check_dev(rt, oif);
77d16f45 654 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 655 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
656#ifdef CONFIG_IPV6_ROUTER_PREF
657 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
658#endif
afc154e9
HFS
659 if (strict & RT6_LOOKUP_F_REACHABLE) {
660 int n = rt6_check_neigh(rt);
661 if (n < 0)
662 return n;
663 }
554cfb7e
YH
664 return m;
665}
666
f11e6659 667static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
668 int *mpri, struct rt6_info *match,
669 bool *do_rr)
554cfb7e 670{
f11e6659 671 int m;
afc154e9 672 bool match_do_rr = false;
35103d11
AG
673 struct inet6_dev *idev = rt->rt6i_idev;
674 struct net_device *dev = rt->dst.dev;
675
676 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
677 idev->cnf.ignore_routes_with_linkdown &&
678 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 679 goto out;
f11e6659
DM
680
681 if (rt6_check_expired(rt))
682 goto out;
683
684 m = rt6_score_route(rt, oif, strict);
7e980569 685 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
686 match_do_rr = true;
687 m = 0; /* lowest valid score */
7e980569 688 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 689 goto out;
afc154e9
HFS
690 }
691
692 if (strict & RT6_LOOKUP_F_REACHABLE)
693 rt6_probe(rt);
f11e6659 694
7e980569 695 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 696 if (m > *mpri) {
afc154e9 697 *do_rr = match_do_rr;
f11e6659
DM
698 *mpri = m;
699 match = rt;
f11e6659 700 }
f11e6659
DM
701out:
702 return match;
703}
704
705static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
706 struct rt6_info *rr_head,
afc154e9
HFS
707 u32 metric, int oif, int strict,
708 bool *do_rr)
f11e6659 709{
9fbdcfaf 710 struct rt6_info *rt, *match, *cont;
554cfb7e 711 int mpri = -1;
1da177e4 712
f11e6659 713 match = NULL;
9fbdcfaf
SK
714 cont = NULL;
715 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
716 if (rt->rt6i_metric != metric) {
717 cont = rt;
718 break;
719 }
720
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 }
723
724 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
afc154e9 730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
731 }
732
733 if (match || !cont)
734 return match;
735
736 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 738
f11e6659
DM
739 return match;
740}
1da177e4 741
f11e6659
DM
742static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
743{
744 struct rt6_info *match, *rt0;
8ed67789 745 struct net *net;
afc154e9 746 bool do_rr = false;
1da177e4 747
f11e6659
DM
748 rt0 = fn->rr_ptr;
749 if (!rt0)
750 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 751
afc154e9
HFS
752 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
753 &do_rr);
1da177e4 754
afc154e9 755 if (do_rr) {
d8d1f30b 756 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 757
554cfb7e 758 /* no entries matched; do round-robin */
f11e6659
DM
759 if (!next || next->rt6i_metric != rt0->rt6i_metric)
760 next = fn->leaf;
761
762 if (next != rt0)
763 fn->rr_ptr = next;
1da177e4 764 }
1da177e4 765
d1918542 766 net = dev_net(rt0->dst.dev);
a02cec21 767 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
768}
769
8b9df265
MKL
770static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
771{
772 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
773}
774
70ceb4f5
YH
775#ifdef CONFIG_IPV6_ROUTE_INFO
776int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 777 const struct in6_addr *gwaddr)
70ceb4f5 778{
c346dca1 779 struct net *net = dev_net(dev);
70ceb4f5
YH
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
4bed72e4 783 unsigned long lifetime;
70ceb4f5
YH
784 struct rt6_info *rt;
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 807 return -EINVAL;
70ceb4f5 808
4bed72e4 809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
f104a567
DJ
821 if (rinfo->prefix_len == 0)
822 rt = rt6_get_dflt_router(gwaddr, dev);
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 825 gwaddr, dev);
70ceb4f5
YH
826
827 if (rt && !lifetime) {
e0a1ad73 828 ip6_del_rt(rt);
70ceb4f5
YH
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
830218c1
DA
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834 dev, pref);
70ceb4f5
YH
835 else if (rt)
836 rt->rt6i_flags = RTF_ROUTEINFO |
837 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
838
839 if (rt) {
1716a961
G
840 if (!addrconf_finite_timeout(lifetime))
841 rt6_clean_expires(rt);
842 else
843 rt6_set_expires(rt, jiffies + HZ * lifetime);
844
94e187c0 845 ip6_rt_put(rt);
70ceb4f5
YH
846 }
847 return 0;
848}
849#endif
850
a3c00e46
MKL
851static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
852 struct in6_addr *saddr)
853{
854 struct fib6_node *pn;
855 while (1) {
856 if (fn->fn_flags & RTN_TL_ROOT)
857 return NULL;
858 pn = fn->parent;
859 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
860 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
861 else
862 fn = pn;
863 if (fn->fn_flags & RTN_RTINFO)
864 return fn;
865 }
866}
c71099ac 867
8ed67789
DL
868static struct rt6_info *ip6_pol_route_lookup(struct net *net,
869 struct fib6_table *table,
4c9483b2 870 struct flowi6 *fl6, int flags)
1da177e4
LT
871{
872 struct fib6_node *fn;
873 struct rt6_info *rt;
874
c71099ac 875 read_lock_bh(&table->tb6_lock);
4c9483b2 876 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
877restart:
878 rt = fn->leaf;
4c9483b2 879 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 880 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 881 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
882 if (rt == net->ipv6.ip6_null_entry) {
883 fn = fib6_backtrack(fn, &fl6->saddr);
884 if (fn)
885 goto restart;
886 }
d8d1f30b 887 dst_use(&rt->dst, jiffies);
c71099ac 888 read_unlock_bh(&table->tb6_lock);
b811580d
DA
889
890 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
891
c71099ac
TG
892 return rt;
893
894}
895
67ba4152 896struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
897 int flags)
898{
899 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
900}
901EXPORT_SYMBOL_GPL(ip6_route_lookup);
902
9acd9f3a
YH
903struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
904 const struct in6_addr *saddr, int oif, int strict)
c71099ac 905{
4c9483b2
DM
906 struct flowi6 fl6 = {
907 .flowi6_oif = oif,
908 .daddr = *daddr,
c71099ac
TG
909 };
910 struct dst_entry *dst;
77d16f45 911 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 912
adaa70bb 913 if (saddr) {
4c9483b2 914 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
915 flags |= RT6_LOOKUP_F_HAS_SADDR;
916 }
917
4c9483b2 918 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
919 if (dst->error == 0)
920 return (struct rt6_info *) dst;
921
922 dst_release(dst);
923
1da177e4
LT
924 return NULL;
925}
7159039a
YH
926EXPORT_SYMBOL(rt6_lookup);
927
c71099ac 928/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
929 It takes new route entry, the addition fails by any reason the
930 route is freed. In any case, if caller does not hold it, it may
931 be destroyed.
932 */
933
e5fd387a 934static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 935 struct mx6_config *mxc)
1da177e4
LT
936{
937 int err;
c71099ac 938 struct fib6_table *table;
1da177e4 939
c71099ac
TG
940 table = rt->rt6i_table;
941 write_lock_bh(&table->tb6_lock);
e715b6d3 942 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 943 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
944
945 return err;
946}
947
40e22e8f
TG
948int ip6_ins_rt(struct rt6_info *rt)
949{
e715b6d3
FW
950 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
951 struct mx6_config mxc = { .mx = NULL, };
952
953 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
954}
955
8b9df265
MKL
956static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
957 const struct in6_addr *daddr,
958 const struct in6_addr *saddr)
1da177e4 959{
1da177e4
LT
960 struct rt6_info *rt;
961
962 /*
963 * Clone the route.
964 */
965
d52d3997 966 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 967 ort = (struct rt6_info *)ort->dst.from;
1da177e4 968
ad706862 969 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
970
971 if (!rt)
972 return NULL;
973
974 ip6_rt_copy_init(rt, ort);
975 rt->rt6i_flags |= RTF_CACHE;
976 rt->rt6i_metric = 0;
977 rt->dst.flags |= DST_HOST;
978 rt->rt6i_dst.addr = *daddr;
979 rt->rt6i_dst.plen = 128;
1da177e4 980
83a09abd
MKL
981 if (!rt6_is_gw_or_nonexthop(ort)) {
982 if (ort->rt6i_dst.plen != 128 &&
983 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
984 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 985#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
986 if (rt->rt6i_src.plen && saddr) {
987 rt->rt6i_src.addr = *saddr;
988 rt->rt6i_src.plen = 128;
8b9df265 989 }
83a09abd 990#endif
95a9a5ba 991 }
1da177e4 992
95a9a5ba
YH
993 return rt;
994}
1da177e4 995
d52d3997
MKL
996static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
997{
998 struct rt6_info *pcpu_rt;
999
1000 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 1001 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
1002
1003 if (!pcpu_rt)
1004 return NULL;
1005 ip6_rt_copy_init(pcpu_rt, rt);
1006 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1007 pcpu_rt->rt6i_flags |= RTF_PCPU;
1008 return pcpu_rt;
1009}
1010
1011/* It should be called with read_lock_bh(&tb6_lock) acquired */
1012static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1013{
a73e4195 1014 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1015
1016 p = this_cpu_ptr(rt->rt6i_pcpu);
1017 pcpu_rt = *p;
1018
a73e4195
MKL
1019 if (pcpu_rt) {
1020 dst_hold(&pcpu_rt->dst);
1021 rt6_dst_from_metrics_check(pcpu_rt);
1022 }
1023 return pcpu_rt;
1024}
1025
1026static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1027{
9c7370a1 1028 struct fib6_table *table = rt->rt6i_table;
a73e4195 1029 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1030
1031 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1032 if (!pcpu_rt) {
1033 struct net *net = dev_net(rt->dst.dev);
1034
9c7370a1
MKL
1035 dst_hold(&net->ipv6.ip6_null_entry->dst);
1036 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1037 }
1038
9c7370a1
MKL
1039 read_lock_bh(&table->tb6_lock);
1040 if (rt->rt6i_pcpu) {
1041 p = this_cpu_ptr(rt->rt6i_pcpu);
1042 prev = cmpxchg(p, NULL, pcpu_rt);
1043 if (prev) {
1044 /* If someone did it before us, return prev instead */
1045 dst_destroy(&pcpu_rt->dst);
1046 pcpu_rt = prev;
1047 }
1048 } else {
1049 /* rt has been removed from the fib6 tree
1050 * before we have a chance to acquire the read_lock.
1051 * In this case, don't brother to create a pcpu rt
1052 * since rt is going away anyway. The next
1053 * dst_check() will trigger a re-lookup.
1054 */
d52d3997 1055 dst_destroy(&pcpu_rt->dst);
9c7370a1 1056 pcpu_rt = rt;
d52d3997 1057 }
d52d3997
MKL
1058 dst_hold(&pcpu_rt->dst);
1059 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1060 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1061 return pcpu_rt;
1062}
1063
9ff74384
DA
1064struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1065 int oif, struct flowi6 *fl6, int flags)
1da177e4 1066{
367efcb9 1067 struct fib6_node *fn, *saved_fn;
45e4fd26 1068 struct rt6_info *rt;
c71099ac 1069 int strict = 0;
1da177e4 1070
77d16f45 1071 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1072 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1073 if (net->ipv6.devconf_all->forwarding == 0)
1074 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1075
c71099ac 1076 read_lock_bh(&table->tb6_lock);
1da177e4 1077
4c9483b2 1078 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1079 saved_fn = fn;
1da177e4 1080
ca254490
DA
1081 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1082 oif = 0;
1083
a3c00e46 1084redo_rt6_select:
367efcb9 1085 rt = rt6_select(fn, oif, strict);
52bd4c0c 1086 if (rt->rt6i_nsiblings)
367efcb9 1087 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1088 if (rt == net->ipv6.ip6_null_entry) {
1089 fn = fib6_backtrack(fn, &fl6->saddr);
1090 if (fn)
1091 goto redo_rt6_select;
367efcb9
MKL
1092 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1093 /* also consider unreachable route */
1094 strict &= ~RT6_LOOKUP_F_REACHABLE;
1095 fn = saved_fn;
1096 goto redo_rt6_select;
367efcb9 1097 }
a3c00e46
MKL
1098 }
1099
fb9de91e 1100
3da59bd9 1101 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1102 dst_use(&rt->dst, jiffies);
1103 read_unlock_bh(&table->tb6_lock);
1104
1105 rt6_dst_from_metrics_check(rt);
b811580d
DA
1106
1107 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1108 return rt;
3da59bd9
MKL
1109 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1110 !(rt->rt6i_flags & RTF_GATEWAY))) {
1111 /* Create a RTF_CACHE clone which will not be
1112 * owned by the fib6 tree. It is for the special case where
1113 * the daddr in the skb during the neighbor look-up is different
1114 * from the fl6->daddr used to look-up route here.
1115 */
1116
1117 struct rt6_info *uncached_rt;
1118
d52d3997
MKL
1119 dst_use(&rt->dst, jiffies);
1120 read_unlock_bh(&table->tb6_lock);
1121
3da59bd9
MKL
1122 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1123 dst_release(&rt->dst);
c71099ac 1124
3da59bd9 1125 if (uncached_rt)
8d0b94af 1126 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1127 else
1128 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1129
3da59bd9 1130 dst_hold(&uncached_rt->dst);
b811580d
DA
1131
1132 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1133 return uncached_rt;
3da59bd9 1134
d52d3997
MKL
1135 } else {
1136 /* Get a percpu copy */
1137
1138 struct rt6_info *pcpu_rt;
1139
1140 rt->dst.lastuse = jiffies;
1141 rt->dst.__use++;
1142 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1143
9c7370a1
MKL
1144 if (pcpu_rt) {
1145 read_unlock_bh(&table->tb6_lock);
1146 } else {
1147 /* We have to do the read_unlock first
1148 * because rt6_make_pcpu_route() may trigger
1149 * ip6_dst_gc() which will take the write_lock.
1150 */
1151 dst_hold(&rt->dst);
1152 read_unlock_bh(&table->tb6_lock);
a73e4195 1153 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1154 dst_release(&rt->dst);
1155 }
d52d3997 1156
b811580d 1157 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1158 return pcpu_rt;
9c7370a1 1159
d52d3997 1160 }
1da177e4 1161}
9ff74384 1162EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1163
8ed67789 1164static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1165 struct flowi6 *fl6, int flags)
4acad72d 1166{
4c9483b2 1167 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1168}
1169
d409b847
MB
1170struct dst_entry *ip6_route_input_lookup(struct net *net,
1171 struct net_device *dev,
1172 struct flowi6 *fl6, int flags)
72331bc0
SL
1173{
1174 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1175 flags |= RT6_LOOKUP_F_IFACE;
1176
1177 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1178}
d409b847 1179EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1180
c71099ac
TG
1181void ip6_route_input(struct sk_buff *skb)
1182{
b71d1d42 1183 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1184 struct net *net = dev_net(skb->dev);
adaa70bb 1185 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1186 struct ip_tunnel_info *tun_info;
4c9483b2 1187 struct flowi6 fl6 = {
e0d56fdd 1188 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1189 .daddr = iph->daddr,
1190 .saddr = iph->saddr,
6502ca52 1191 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1192 .flowi6_mark = skb->mark,
1193 .flowi6_proto = iph->nexthdr,
c71099ac 1194 };
adaa70bb 1195
904af04d 1196 tun_info = skb_tunnel_info(skb);
46fa062a 1197 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1198 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1199 skb_dst_drop(skb);
72331bc0 1200 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1201}
1202
8ed67789 1203static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1204 struct flowi6 *fl6, int flags)
1da177e4 1205{
4c9483b2 1206 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1207}
1208
6f21c96a
PA
1209struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1210 struct flowi6 *fl6, int flags)
c71099ac 1211{
d46a9d67 1212 bool any_src;
c71099ac 1213
4c1feac5
DA
1214 if (rt6_need_strict(&fl6->daddr)) {
1215 struct dst_entry *dst;
1216
1217 dst = l3mdev_link_scope_lookup(net, fl6);
1218 if (dst)
1219 return dst;
1220 }
ca254490 1221
1fb9489b 1222 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1223
d46a9d67 1224 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1225 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1226 (fl6->flowi6_oif && any_src))
77d16f45 1227 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1228
d46a9d67 1229 if (!any_src)
adaa70bb 1230 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1231 else if (sk)
1232 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1233
4c9483b2 1234 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1235}
6f21c96a 1236EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1237
2774c131 1238struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1239{
5c1e6aa3 1240 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1241 struct dst_entry *new = NULL;
1242
f5b0a874 1243 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1244 if (rt) {
0a1f5962 1245 rt6_info_init(rt);
8104891b 1246
0a1f5962 1247 new = &rt->dst;
14e50e57 1248 new->__use = 1;
352e512c 1249 new->input = dst_discard;
ede2059d 1250 new->output = dst_discard_out;
14e50e57 1251
0a1f5962 1252 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1253 rt->rt6i_idev = ort->rt6i_idev;
1254 if (rt->rt6i_idev)
1255 in6_dev_hold(rt->rt6i_idev);
14e50e57 1256
4e3fd7a0 1257 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1258 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1259 rt->rt6i_metric = 0;
1260
1261 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1262#ifdef CONFIG_IPV6_SUBTREES
1263 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1264#endif
1265
1266 dst_free(new);
1267 }
1268
69ead7af
DM
1269 dst_release(dst_orig);
1270 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1271}
14e50e57 1272
1da177e4
LT
1273/*
1274 * Destination cache support functions
1275 */
1276
4b32b5ad
MKL
1277static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1278{
1279 if (rt->dst.from &&
1280 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1281 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1282}
1283
3da59bd9
MKL
1284static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1285{
1286 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1287 return NULL;
1288
1289 if (rt6_check_expired(rt))
1290 return NULL;
1291
1292 return &rt->dst;
1293}
1294
1295static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1296{
5973fb1e
MKL
1297 if (!__rt6_check_expired(rt) &&
1298 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1299 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1300 return &rt->dst;
1301 else
1302 return NULL;
1303}
1304
1da177e4
LT
1305static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1306{
1307 struct rt6_info *rt;
1308
1309 rt = (struct rt6_info *) dst;
1310
6f3118b5
ND
1311 /* All IPV6 dsts are created with ->obsolete set to the value
1312 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1313 * into this function always.
1314 */
e3bc10bd 1315
4b32b5ad
MKL
1316 rt6_dst_from_metrics_check(rt);
1317
02bcf4e0
MKL
1318 if (rt->rt6i_flags & RTF_PCPU ||
1319 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1320 return rt6_dst_from_check(rt, cookie);
1321 else
1322 return rt6_check(rt, cookie);
1da177e4
LT
1323}
1324
1325static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1326{
1327 struct rt6_info *rt = (struct rt6_info *) dst;
1328
1329 if (rt) {
54c1a859
YH
1330 if (rt->rt6i_flags & RTF_CACHE) {
1331 if (rt6_check_expired(rt)) {
1332 ip6_del_rt(rt);
1333 dst = NULL;
1334 }
1335 } else {
1da177e4 1336 dst_release(dst);
54c1a859
YH
1337 dst = NULL;
1338 }
1da177e4 1339 }
54c1a859 1340 return dst;
1da177e4
LT
1341}
1342
1343static void ip6_link_failure(struct sk_buff *skb)
1344{
1345 struct rt6_info *rt;
1346
3ffe533c 1347 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1348
adf30907 1349 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1350 if (rt) {
1eb4f758
HFS
1351 if (rt->rt6i_flags & RTF_CACHE) {
1352 dst_hold(&rt->dst);
8e3d5be7 1353 ip6_del_rt(rt);
1eb4f758 1354 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1355 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1356 }
1da177e4
LT
1357 }
1358}
1359
45e4fd26
MKL
1360static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1361{
1362 struct net *net = dev_net(rt->dst.dev);
1363
1364 rt->rt6i_flags |= RTF_MODIFIED;
1365 rt->rt6i_pmtu = mtu;
1366 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1367}
1368
0d3f6d29
MKL
1369static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1370{
1371 return !(rt->rt6i_flags & RTF_CACHE) &&
1372 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1373}
1374
45e4fd26
MKL
1375static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1376 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1377{
78556423 1378 const struct in6_addr *daddr, *saddr;
67ba4152 1379 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1380
45e4fd26
MKL
1381 if (rt6->rt6i_flags & RTF_LOCAL)
1382 return;
81aded24 1383
19bda36c
XL
1384 if (dst_metric_locked(dst, RTAX_MTU))
1385 return;
1386
78556423
JA
1387 if (iph) {
1388 daddr = &iph->daddr;
1389 saddr = &iph->saddr;
1390 } else if (sk) {
1391 daddr = &sk->sk_v6_daddr;
1392 saddr = &inet6_sk(sk)->saddr;
1393 } else {
1394 daddr = NULL;
1395 saddr = NULL;
1396 }
1397 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1398 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1399 if (mtu >= dst_mtu(dst))
1400 return;
9d289715 1401
0d3f6d29 1402 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1403 rt6_do_update_pmtu(rt6, mtu);
78556423 1404 } else if (daddr) {
45e4fd26
MKL
1405 struct rt6_info *nrt6;
1406
45e4fd26
MKL
1407 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1408 if (nrt6) {
1409 rt6_do_update_pmtu(nrt6, mtu);
1410
1411 /* ip6_ins_rt(nrt6) will bump the
1412 * rt6->rt6i_node->fn_sernum
1413 * which will fail the next rt6_check() and
1414 * invalidate the sk->sk_dst_cache.
1415 */
1416 ip6_ins_rt(nrt6);
1417 }
1da177e4
LT
1418 }
1419}
1420
45e4fd26
MKL
1421static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1422 struct sk_buff *skb, u32 mtu)
1423{
1424 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1425}
1426
42ae66c8 1427void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1428 int oif, u32 mark, kuid_t uid)
81aded24
DM
1429{
1430 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1431 struct dst_entry *dst;
1432 struct flowi6 fl6;
1433
1434 memset(&fl6, 0, sizeof(fl6));
1435 fl6.flowi6_oif = oif;
1b3c61dc 1436 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1437 fl6.daddr = iph->daddr;
1438 fl6.saddr = iph->saddr;
6502ca52 1439 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1440 fl6.flowi6_uid = uid;
81aded24
DM
1441
1442 dst = ip6_route_output(net, NULL, &fl6);
1443 if (!dst->error)
45e4fd26 1444 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1445 dst_release(dst);
1446}
1447EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1448
1449void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1450{
33c162a9
MKL
1451 struct dst_entry *dst;
1452
81aded24 1453 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1454 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1455
1456 dst = __sk_dst_get(sk);
1457 if (!dst || !dst->obsolete ||
1458 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1459 return;
1460
1461 bh_lock_sock(sk);
1462 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1463 ip6_datagram_dst_update(sk, false);
1464 bh_unlock_sock(sk);
81aded24
DM
1465}
1466EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1467
b55b76b2
DJ
1468/* Handle redirects */
1469struct ip6rd_flowi {
1470 struct flowi6 fl6;
1471 struct in6_addr gateway;
1472};
1473
1474static struct rt6_info *__ip6_route_redirect(struct net *net,
1475 struct fib6_table *table,
1476 struct flowi6 *fl6,
1477 int flags)
1478{
1479 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1480 struct rt6_info *rt;
1481 struct fib6_node *fn;
1482
1483 /* Get the "current" route for this destination and
67c408cf 1484 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1485 *
1486 * RFC 4861 specifies that redirects should only be
1487 * accepted if they come from the nexthop to the target.
1488 * Due to the way the routes are chosen, this notion
1489 * is a bit fuzzy and one might need to check all possible
1490 * routes.
1491 */
1492
1493 read_lock_bh(&table->tb6_lock);
1494 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1495restart:
1496 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1497 if (rt6_check_expired(rt))
1498 continue;
1499 if (rt->dst.error)
1500 break;
1501 if (!(rt->rt6i_flags & RTF_GATEWAY))
1502 continue;
1503 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1504 continue;
1505 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1506 continue;
1507 break;
1508 }
1509
1510 if (!rt)
1511 rt = net->ipv6.ip6_null_entry;
1512 else if (rt->dst.error) {
1513 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1514 goto out;
1515 }
1516
1517 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1518 fn = fib6_backtrack(fn, &fl6->saddr);
1519 if (fn)
1520 goto restart;
b55b76b2 1521 }
a3c00e46 1522
b0a1ba59 1523out:
b55b76b2
DJ
1524 dst_hold(&rt->dst);
1525
1526 read_unlock_bh(&table->tb6_lock);
1527
b811580d 1528 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1529 return rt;
1530};
1531
1532static struct dst_entry *ip6_route_redirect(struct net *net,
1533 const struct flowi6 *fl6,
1534 const struct in6_addr *gateway)
1535{
1536 int flags = RT6_LOOKUP_F_HAS_SADDR;
1537 struct ip6rd_flowi rdfl;
1538
1539 rdfl.fl6 = *fl6;
1540 rdfl.gateway = *gateway;
1541
1542 return fib6_rule_lookup(net, &rdfl.fl6,
1543 flags, __ip6_route_redirect);
1544}
1545
e2d118a1
LC
1546void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1547 kuid_t uid)
3a5ad2ee
DM
1548{
1549 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1550 struct dst_entry *dst;
1551 struct flowi6 fl6;
1552
1553 memset(&fl6, 0, sizeof(fl6));
e374c618 1554 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1555 fl6.flowi6_oif = oif;
1556 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1557 fl6.daddr = iph->daddr;
1558 fl6.saddr = iph->saddr;
6502ca52 1559 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1560 fl6.flowi6_uid = uid;
3a5ad2ee 1561
b55b76b2
DJ
1562 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1563 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1564 dst_release(dst);
1565}
1566EXPORT_SYMBOL_GPL(ip6_redirect);
1567
c92a59ec
DJ
1568void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1569 u32 mark)
1570{
1571 const struct ipv6hdr *iph = ipv6_hdr(skb);
1572 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1573 struct dst_entry *dst;
1574 struct flowi6 fl6;
1575
1576 memset(&fl6, 0, sizeof(fl6));
e374c618 1577 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1578 fl6.flowi6_oif = oif;
1579 fl6.flowi6_mark = mark;
c92a59ec
DJ
1580 fl6.daddr = msg->dest;
1581 fl6.saddr = iph->daddr;
e2d118a1 1582 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1583
b55b76b2
DJ
1584 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1585 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1586 dst_release(dst);
1587}
1588
3a5ad2ee
DM
1589void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1590{
e2d118a1
LC
1591 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1592 sk->sk_uid);
3a5ad2ee
DM
1593}
1594EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1595
0dbaee3b 1596static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1597{
0dbaee3b
DM
1598 struct net_device *dev = dst->dev;
1599 unsigned int mtu = dst_mtu(dst);
1600 struct net *net = dev_net(dev);
1601
1da177e4
LT
1602 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1603
5578689a
DL
1604 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1605 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1606
1607 /*
1ab1457c
YH
1608 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1609 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1610 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1611 * rely only on pmtu discovery"
1612 */
1613 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1614 mtu = IPV6_MAXPLEN;
1615 return mtu;
1616}
1617
ebb762f2 1618static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1619{
4b32b5ad
MKL
1620 const struct rt6_info *rt = (const struct rt6_info *)dst;
1621 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1622 struct inet6_dev *idev;
618f9bc7 1623
4b32b5ad
MKL
1624 if (mtu)
1625 goto out;
1626
1627 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1628 if (mtu)
30f78d8e 1629 goto out;
618f9bc7
SK
1630
1631 mtu = IPV6_MIN_MTU;
d33e4553
DM
1632
1633 rcu_read_lock();
1634 idev = __in6_dev_get(dst->dev);
1635 if (idev)
1636 mtu = idev->cnf.mtu6;
1637 rcu_read_unlock();
1638
30f78d8e 1639out:
14972cbd
RP
1640 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1641
1642 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1643}
1644
3b00944c
YH
1645static struct dst_entry *icmp6_dst_gc_list;
1646static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1647
3b00944c 1648struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1649 struct flowi6 *fl6)
1da177e4 1650{
87a11578 1651 struct dst_entry *dst;
1da177e4
LT
1652 struct rt6_info *rt;
1653 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1654 struct net *net = dev_net(dev);
1da177e4 1655
38308473 1656 if (unlikely(!idev))
122bdf67 1657 return ERR_PTR(-ENODEV);
1da177e4 1658
ad706862 1659 rt = ip6_dst_alloc(net, dev, 0);
38308473 1660 if (unlikely(!rt)) {
1da177e4 1661 in6_dev_put(idev);
87a11578 1662 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1663 goto out;
1664 }
1665
8e2ec639
YZ
1666 rt->dst.flags |= DST_HOST;
1667 rt->dst.output = ip6_output;
d8d1f30b 1668 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1669 rt->rt6i_gateway = fl6->daddr;
87a11578 1670 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1671 rt->rt6i_dst.plen = 128;
1672 rt->rt6i_idev = idev;
14edd87d 1673 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1674
3b00944c 1675 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1676 rt->dst.next = icmp6_dst_gc_list;
1677 icmp6_dst_gc_list = &rt->dst;
3b00944c 1678 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1679
5578689a 1680 fib6_force_start_gc(net);
1da177e4 1681
87a11578
DM
1682 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1683
1da177e4 1684out:
87a11578 1685 return dst;
1da177e4
LT
1686}
1687
3d0f24a7 1688int icmp6_dst_gc(void)
1da177e4 1689{
e9476e95 1690 struct dst_entry *dst, **pprev;
3d0f24a7 1691 int more = 0;
1da177e4 1692
3b00944c
YH
1693 spin_lock_bh(&icmp6_dst_lock);
1694 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1695
1da177e4
LT
1696 while ((dst = *pprev) != NULL) {
1697 if (!atomic_read(&dst->__refcnt)) {
1698 *pprev = dst->next;
1699 dst_free(dst);
1da177e4
LT
1700 } else {
1701 pprev = &dst->next;
3d0f24a7 1702 ++more;
1da177e4
LT
1703 }
1704 }
1705
3b00944c 1706 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1707
3d0f24a7 1708 return more;
1da177e4
LT
1709}
1710
1e493d19
DM
1711static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1712 void *arg)
1713{
1714 struct dst_entry *dst, **pprev;
1715
1716 spin_lock_bh(&icmp6_dst_lock);
1717 pprev = &icmp6_dst_gc_list;
1718 while ((dst = *pprev) != NULL) {
1719 struct rt6_info *rt = (struct rt6_info *) dst;
1720 if (func(rt, arg)) {
1721 *pprev = dst->next;
1722 dst_free(dst);
1723 } else {
1724 pprev = &dst->next;
1725 }
1726 }
1727 spin_unlock_bh(&icmp6_dst_lock);
1728}
1729
569d3645 1730static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1731{
86393e52 1732 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1733 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1734 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1735 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1736 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1737 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1738 int entries;
7019b78e 1739
fc66f95c 1740 entries = dst_entries_get_fast(ops);
49a18d86 1741 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1742 entries <= rt_max_size)
1da177e4
LT
1743 goto out;
1744
6891a346 1745 net->ipv6.ip6_rt_gc_expire++;
14956643 1746 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1747 entries = dst_entries_get_slow(ops);
1748 if (entries < ops->gc_thresh)
7019b78e 1749 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1750out:
7019b78e 1751 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1752 return entries > rt_max_size;
1da177e4
LT
1753}
1754
e715b6d3
FW
1755static int ip6_convert_metrics(struct mx6_config *mxc,
1756 const struct fib6_config *cfg)
1757{
c3a8d947 1758 bool ecn_ca = false;
e715b6d3
FW
1759 struct nlattr *nla;
1760 int remaining;
1761 u32 *mp;
1762
63159f29 1763 if (!cfg->fc_mx)
e715b6d3
FW
1764 return 0;
1765
1766 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1767 if (unlikely(!mp))
1768 return -ENOMEM;
1769
1770 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1771 int type = nla_type(nla);
1bb14807 1772 u32 val;
e715b6d3 1773
1bb14807
DB
1774 if (!type)
1775 continue;
1776 if (unlikely(type > RTAX_MAX))
1777 goto err;
ea697639 1778
1bb14807
DB
1779 if (type == RTAX_CC_ALGO) {
1780 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1781
1bb14807 1782 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1783 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1784 if (val == TCP_CA_UNSPEC)
1785 goto err;
1786 } else {
1787 val = nla_get_u32(nla);
e715b6d3 1788 }
626abd59
PA
1789 if (type == RTAX_HOPLIMIT && val > 255)
1790 val = 255;
b8d3e416
DB
1791 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1792 goto err;
1bb14807
DB
1793
1794 mp[type - 1] = val;
1795 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1796 }
1797
c3a8d947
DB
1798 if (ecn_ca) {
1799 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1800 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1801 }
e715b6d3 1802
c3a8d947 1803 mxc->mx = mp;
e715b6d3
FW
1804 return 0;
1805 err:
1806 kfree(mp);
1807 return -EINVAL;
1808}
1da177e4 1809
8c14586f
DA
1810static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1811 struct fib6_config *cfg,
1812 const struct in6_addr *gw_addr)
1813{
1814 struct flowi6 fl6 = {
1815 .flowi6_oif = cfg->fc_ifindex,
1816 .daddr = *gw_addr,
1817 .saddr = cfg->fc_prefsrc,
1818 };
1819 struct fib6_table *table;
1820 struct rt6_info *rt;
d5d32e4b 1821 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1822
1823 table = fib6_get_table(net, cfg->fc_table);
1824 if (!table)
1825 return NULL;
1826
1827 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1828 flags |= RT6_LOOKUP_F_HAS_SADDR;
1829
1830 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1831
1832 /* if table lookup failed, fall back to full lookup */
1833 if (rt == net->ipv6.ip6_null_entry) {
1834 ip6_rt_put(rt);
1835 rt = NULL;
1836 }
1837
1838 return rt;
1839}
1840
8c5b83f0 1841static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1842{
5578689a 1843 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1844 struct rt6_info *rt = NULL;
1845 struct net_device *dev = NULL;
1846 struct inet6_dev *idev = NULL;
c71099ac 1847 struct fib6_table *table;
1da177e4 1848 int addr_type;
8c5b83f0 1849 int err = -EINVAL;
1da177e4 1850
ec462631
DA
1851 /* RTF_PCPU is an internal flag; can not be set by userspace */
1852 if (cfg->fc_flags & RTF_PCPU)
1853 goto out;
1854
86872cb5 1855 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1856 goto out;
1da177e4 1857#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1858 if (cfg->fc_src_len)
8c5b83f0 1859 goto out;
1da177e4 1860#endif
86872cb5 1861 if (cfg->fc_ifindex) {
1da177e4 1862 err = -ENODEV;
5578689a 1863 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1864 if (!dev)
1865 goto out;
1866 idev = in6_dev_get(dev);
1867 if (!idev)
1868 goto out;
1869 }
1870
86872cb5
TG
1871 if (cfg->fc_metric == 0)
1872 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1873
d71314b4 1874 err = -ENOBUFS;
38308473
DM
1875 if (cfg->fc_nlinfo.nlh &&
1876 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1877 table = fib6_get_table(net, cfg->fc_table);
38308473 1878 if (!table) {
f3213831 1879 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1880 table = fib6_new_table(net, cfg->fc_table);
1881 }
1882 } else {
1883 table = fib6_new_table(net, cfg->fc_table);
1884 }
38308473
DM
1885
1886 if (!table)
c71099ac 1887 goto out;
c71099ac 1888
ad706862
MKL
1889 rt = ip6_dst_alloc(net, NULL,
1890 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1891
38308473 1892 if (!rt) {
1da177e4
LT
1893 err = -ENOMEM;
1894 goto out;
1895 }
1896
1716a961
G
1897 if (cfg->fc_flags & RTF_EXPIRES)
1898 rt6_set_expires(rt, jiffies +
1899 clock_t_to_jiffies(cfg->fc_expires));
1900 else
1901 rt6_clean_expires(rt);
1da177e4 1902
86872cb5
TG
1903 if (cfg->fc_protocol == RTPROT_UNSPEC)
1904 cfg->fc_protocol = RTPROT_BOOT;
1905 rt->rt6i_protocol = cfg->fc_protocol;
1906
1907 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1908
1909 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1910 rt->dst.input = ip6_mc_input;
ab79ad14
1911 else if (cfg->fc_flags & RTF_LOCAL)
1912 rt->dst.input = ip6_input;
1da177e4 1913 else
d8d1f30b 1914 rt->dst.input = ip6_forward;
1da177e4 1915
d8d1f30b 1916 rt->dst.output = ip6_output;
1da177e4 1917
19e42e45
RP
1918 if (cfg->fc_encap) {
1919 struct lwtunnel_state *lwtstate;
1920
1921 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1922 cfg->fc_encap, AF_INET6, cfg,
1923 &lwtstate);
19e42e45
RP
1924 if (err)
1925 goto out;
61adedf3
JB
1926 rt->dst.lwtstate = lwtstate_get(lwtstate);
1927 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1928 rt->dst.lwtstate->orig_output = rt->dst.output;
1929 rt->dst.output = lwtunnel_output;
25368623 1930 }
61adedf3
JB
1931 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1932 rt->dst.lwtstate->orig_input = rt->dst.input;
1933 rt->dst.input = lwtunnel_input;
25368623 1934 }
19e42e45
RP
1935 }
1936
86872cb5
TG
1937 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1938 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1939 if (rt->rt6i_dst.plen == 128)
e5fd387a 1940 rt->dst.flags |= DST_HOST;
e5fd387a 1941
1da177e4 1942#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1943 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1944 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1945#endif
1946
86872cb5 1947 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1948
1949 /* We cannot add true routes via loopback here,
1950 they would result in kernel looping; promote them to reject routes
1951 */
86872cb5 1952 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1953 (dev && (dev->flags & IFF_LOOPBACK) &&
1954 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1955 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1956 /* hold loopback dev/idev if we haven't done so. */
5578689a 1957 if (dev != net->loopback_dev) {
1da177e4
LT
1958 if (dev) {
1959 dev_put(dev);
1960 in6_dev_put(idev);
1961 }
5578689a 1962 dev = net->loopback_dev;
1da177e4
LT
1963 dev_hold(dev);
1964 idev = in6_dev_get(dev);
1965 if (!idev) {
1966 err = -ENODEV;
1967 goto out;
1968 }
1969 }
1da177e4 1970 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1971 switch (cfg->fc_type) {
1972 case RTN_BLACKHOLE:
1973 rt->dst.error = -EINVAL;
ede2059d 1974 rt->dst.output = dst_discard_out;
7150aede 1975 rt->dst.input = dst_discard;
ef2c7d7b
ND
1976 break;
1977 case RTN_PROHIBIT:
1978 rt->dst.error = -EACCES;
7150aede
K
1979 rt->dst.output = ip6_pkt_prohibit_out;
1980 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1981 break;
b4949ab2 1982 case RTN_THROW:
0315e382 1983 case RTN_UNREACHABLE:
ef2c7d7b 1984 default:
7150aede 1985 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1986 : (cfg->fc_type == RTN_UNREACHABLE)
1987 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1988 rt->dst.output = ip6_pkt_discard_out;
1989 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1990 break;
1991 }
1da177e4
LT
1992 goto install_route;
1993 }
1994
86872cb5 1995 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1996 const struct in6_addr *gw_addr;
1da177e4
LT
1997 int gwa_type;
1998
86872cb5 1999 gw_addr = &cfg->fc_gateway;
330567b7 2000 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2001
2002 /* if gw_addr is local we will fail to detect this in case
2003 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2004 * will return already-added prefix route via interface that
2005 * prefix route was assigned to, which might be non-loopback.
2006 */
2007 err = -EINVAL;
330567b7
FW
2008 if (ipv6_chk_addr_and_flags(net, gw_addr,
2009 gwa_type & IPV6_ADDR_LINKLOCAL ?
2010 dev : NULL, 0, 0))
48ed7b26
FW
2011 goto out;
2012
4e3fd7a0 2013 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2014
2015 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2016 struct rt6_info *grt = NULL;
1da177e4
LT
2017
2018 /* IPv6 strictly inhibits using not link-local
2019 addresses as nexthop address.
2020 Otherwise, router will not able to send redirects.
2021 It is very good, but in some (rare!) circumstances
2022 (SIT, PtP, NBMA NOARP links) it is handy to allow
2023 some exceptions. --ANK
96d5822c
EN
2024 We allow IPv4-mapped nexthops to support RFC4798-type
2025 addressing
1da177e4 2026 */
96d5822c
EN
2027 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2028 IPV6_ADDR_MAPPED)))
1da177e4
LT
2029 goto out;
2030
a435a07f 2031 if (cfg->fc_table) {
8c14586f
DA
2032 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2033
a435a07f
VB
2034 if (grt) {
2035 if (grt->rt6i_flags & RTF_GATEWAY ||
2036 (dev && dev != grt->dst.dev)) {
2037 ip6_rt_put(grt);
2038 grt = NULL;
2039 }
2040 }
2041 }
2042
8c14586f
DA
2043 if (!grt)
2044 grt = rt6_lookup(net, gw_addr, NULL,
2045 cfg->fc_ifindex, 1);
1da177e4
LT
2046
2047 err = -EHOSTUNREACH;
38308473 2048 if (!grt)
1da177e4
LT
2049 goto out;
2050 if (dev) {
d1918542 2051 if (dev != grt->dst.dev) {
94e187c0 2052 ip6_rt_put(grt);
1da177e4
LT
2053 goto out;
2054 }
2055 } else {
d1918542 2056 dev = grt->dst.dev;
1da177e4
LT
2057 idev = grt->rt6i_idev;
2058 dev_hold(dev);
2059 in6_dev_hold(grt->rt6i_idev);
2060 }
38308473 2061 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2062 err = 0;
94e187c0 2063 ip6_rt_put(grt);
1da177e4
LT
2064
2065 if (err)
2066 goto out;
2067 }
2068 err = -EINVAL;
38308473 2069 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
2070 goto out;
2071 }
2072
2073 err = -ENODEV;
38308473 2074 if (!dev)
1da177e4
LT
2075 goto out;
2076
c3968a85
DW
2077 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2078 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2079 err = -EINVAL;
2080 goto out;
2081 }
4e3fd7a0 2082 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2083 rt->rt6i_prefsrc.plen = 128;
2084 } else
2085 rt->rt6i_prefsrc.plen = 0;
2086
86872cb5 2087 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2088
2089install_route:
d8d1f30b 2090 rt->dst.dev = dev;
1da177e4 2091 rt->rt6i_idev = idev;
c71099ac 2092 rt->rt6i_table = table;
63152fc0 2093
c346dca1 2094 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2095
8c5b83f0 2096 return rt;
6b9ea5a6
RP
2097out:
2098 if (dev)
2099 dev_put(dev);
2100 if (idev)
2101 in6_dev_put(idev);
2102 if (rt)
2103 dst_free(&rt->dst);
2104
8c5b83f0 2105 return ERR_PTR(err);
6b9ea5a6
RP
2106}
2107
2108int ip6_route_add(struct fib6_config *cfg)
2109{
2110 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2111 struct rt6_info *rt;
6b9ea5a6
RP
2112 int err;
2113
8c5b83f0
RP
2114 rt = ip6_route_info_create(cfg);
2115 if (IS_ERR(rt)) {
2116 err = PTR_ERR(rt);
2117 rt = NULL;
6b9ea5a6 2118 goto out;
8c5b83f0 2119 }
6b9ea5a6 2120
e715b6d3
FW
2121 err = ip6_convert_metrics(&mxc, cfg);
2122 if (err)
2123 goto out;
1da177e4 2124
e715b6d3
FW
2125 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2126
2127 kfree(mxc.mx);
6b9ea5a6 2128
e715b6d3 2129 return err;
1da177e4 2130out:
1da177e4 2131 if (rt)
d8d1f30b 2132 dst_free(&rt->dst);
6b9ea5a6 2133
1da177e4
LT
2134 return err;
2135}
2136
86872cb5 2137static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2138{
2139 int err;
c71099ac 2140 struct fib6_table *table;
d1918542 2141 struct net *net = dev_net(rt->dst.dev);
1da177e4 2142
8e3d5be7
MKL
2143 if (rt == net->ipv6.ip6_null_entry ||
2144 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2145 err = -ENOENT;
2146 goto out;
2147 }
6c813a72 2148
c71099ac
TG
2149 table = rt->rt6i_table;
2150 write_lock_bh(&table->tb6_lock);
86872cb5 2151 err = fib6_del(rt, info);
c71099ac 2152 write_unlock_bh(&table->tb6_lock);
1da177e4 2153
6825a26c 2154out:
94e187c0 2155 ip6_rt_put(rt);
1da177e4
LT
2156 return err;
2157}
2158
e0a1ad73
TG
2159int ip6_del_rt(struct rt6_info *rt)
2160{
4d1169c1 2161 struct nl_info info = {
d1918542 2162 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2163 };
528c4ceb 2164 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2165}
2166
86872cb5 2167static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2168{
c71099ac 2169 struct fib6_table *table;
1da177e4
LT
2170 struct fib6_node *fn;
2171 struct rt6_info *rt;
2172 int err = -ESRCH;
2173
5578689a 2174 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2175 if (!table)
c71099ac
TG
2176 return err;
2177
2178 read_lock_bh(&table->tb6_lock);
1da177e4 2179
c71099ac 2180 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2181 &cfg->fc_dst, cfg->fc_dst_len,
2182 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2183
1da177e4 2184 if (fn) {
d8d1f30b 2185 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2186 if ((rt->rt6i_flags & RTF_CACHE) &&
2187 !(cfg->fc_flags & RTF_CACHE))
2188 continue;
86872cb5 2189 if (cfg->fc_ifindex &&
d1918542
DM
2190 (!rt->dst.dev ||
2191 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2192 continue;
86872cb5
TG
2193 if (cfg->fc_flags & RTF_GATEWAY &&
2194 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2195 continue;
86872cb5 2196 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2197 continue;
c2ed1880
M
2198 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2199 continue;
d8d1f30b 2200 dst_hold(&rt->dst);
c71099ac 2201 read_unlock_bh(&table->tb6_lock);
1da177e4 2202
86872cb5 2203 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2204 }
2205 }
c71099ac 2206 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2207
2208 return err;
2209}
2210
6700c270 2211static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2212{
a6279458 2213 struct netevent_redirect netevent;
e8599ff4 2214 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2215 struct ndisc_options ndopts;
2216 struct inet6_dev *in6_dev;
2217 struct neighbour *neigh;
71bcdba0 2218 struct rd_msg *msg;
6e157b6a
DM
2219 int optlen, on_link;
2220 u8 *lladdr;
e8599ff4 2221
29a3cad5 2222 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2223 optlen -= sizeof(*msg);
e8599ff4
DM
2224
2225 if (optlen < 0) {
6e157b6a 2226 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2227 return;
2228 }
2229
71bcdba0 2230 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2231
71bcdba0 2232 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2233 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2234 return;
2235 }
2236
6e157b6a 2237 on_link = 0;
71bcdba0 2238 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2239 on_link = 1;
71bcdba0 2240 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2241 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2242 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2243 return;
2244 }
2245
2246 in6_dev = __in6_dev_get(skb->dev);
2247 if (!in6_dev)
2248 return;
2249 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2250 return;
2251
2252 /* RFC2461 8.1:
2253 * The IP source address of the Redirect MUST be the same as the current
2254 * first-hop router for the specified ICMP Destination Address.
2255 */
2256
f997c55c 2257 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2258 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2259 return;
2260 }
6e157b6a
DM
2261
2262 lladdr = NULL;
e8599ff4
DM
2263 if (ndopts.nd_opts_tgt_lladdr) {
2264 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2265 skb->dev);
2266 if (!lladdr) {
2267 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2268 return;
2269 }
2270 }
2271
6e157b6a 2272 rt = (struct rt6_info *) dst;
ec13ad1d 2273 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2274 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2275 return;
6e157b6a 2276 }
e8599ff4 2277
6e157b6a
DM
2278 /* Redirect received -> path was valid.
2279 * Look, redirects are sent only in response to data packets,
2280 * so that this nexthop apparently is reachable. --ANK
2281 */
78556423 2282 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2283
71bcdba0 2284 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2285 if (!neigh)
2286 return;
a6279458 2287
1da177e4
LT
2288 /*
2289 * We have finally decided to accept it.
2290 */
2291
f997c55c 2292 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2293 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2294 NEIGH_UPDATE_F_OVERRIDE|
2295 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2296 NEIGH_UPDATE_F_ISROUTER)),
2297 NDISC_REDIRECT, &ndopts);
1da177e4 2298
83a09abd 2299 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2300 if (!nrt)
1da177e4
LT
2301 goto out;
2302
2303 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2304 if (on_link)
2305 nrt->rt6i_flags &= ~RTF_GATEWAY;
2306
4e3fd7a0 2307 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2308
40e22e8f 2309 if (ip6_ins_rt(nrt))
1da177e4
LT
2310 goto out;
2311
d8d1f30b
CG
2312 netevent.old = &rt->dst;
2313 netevent.new = &nrt->dst;
71bcdba0 2314 netevent.daddr = &msg->dest;
60592833 2315 netevent.neigh = neigh;
8d71740c
TT
2316 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2317
38308473 2318 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2319 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2320 ip6_del_rt(rt);
1da177e4
LT
2321 }
2322
2323out:
e8599ff4 2324 neigh_release(neigh);
6e157b6a
DM
2325}
2326
1da177e4
LT
2327/*
2328 * Misc support functions
2329 */
2330
4b32b5ad
MKL
2331static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2332{
2333 BUG_ON(from->dst.from);
2334
2335 rt->rt6i_flags &= ~RTF_EXPIRES;
2336 dst_hold(&from->dst);
2337 rt->dst.from = &from->dst;
2338 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2339}
2340
83a09abd
MKL
2341static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2342{
2343 rt->dst.input = ort->dst.input;
2344 rt->dst.output = ort->dst.output;
2345 rt->rt6i_dst = ort->rt6i_dst;
2346 rt->dst.error = ort->dst.error;
2347 rt->rt6i_idev = ort->rt6i_idev;
2348 if (rt->rt6i_idev)
2349 in6_dev_hold(rt->rt6i_idev);
2350 rt->dst.lastuse = jiffies;
2351 rt->rt6i_gateway = ort->rt6i_gateway;
2352 rt->rt6i_flags = ort->rt6i_flags;
2353 rt6_set_from(rt, ort);
2354 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2355#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2356 rt->rt6i_src = ort->rt6i_src;
1da177e4 2357#endif
83a09abd
MKL
2358 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2359 rt->rt6i_table = ort->rt6i_table;
61adedf3 2360 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2361}
2362
70ceb4f5 2363#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2364static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2365 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2366 const struct in6_addr *gwaddr,
2367 struct net_device *dev)
70ceb4f5 2368{
830218c1
DA
2369 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2370 int ifindex = dev->ifindex;
70ceb4f5
YH
2371 struct fib6_node *fn;
2372 struct rt6_info *rt = NULL;
c71099ac
TG
2373 struct fib6_table *table;
2374
830218c1 2375 table = fib6_get_table(net, tb_id);
38308473 2376 if (!table)
c71099ac 2377 return NULL;
70ceb4f5 2378
5744dd9b 2379 read_lock_bh(&table->tb6_lock);
67ba4152 2380 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2381 if (!fn)
2382 goto out;
2383
d8d1f30b 2384 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2385 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2386 continue;
2387 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2388 continue;
2389 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2390 continue;
d8d1f30b 2391 dst_hold(&rt->dst);
70ceb4f5
YH
2392 break;
2393 }
2394out:
5744dd9b 2395 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2396 return rt;
2397}
2398
efa2cea0 2399static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2400 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2401 const struct in6_addr *gwaddr,
2402 struct net_device *dev,
95c96174 2403 unsigned int pref)
70ceb4f5 2404{
86872cb5 2405 struct fib6_config cfg = {
238fc7ea 2406 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2407 .fc_ifindex = dev->ifindex,
86872cb5
TG
2408 .fc_dst_len = prefixlen,
2409 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2410 RTF_UP | RTF_PREF(pref),
15e47304 2411 .fc_nlinfo.portid = 0,
efa2cea0
DL
2412 .fc_nlinfo.nlh = NULL,
2413 .fc_nlinfo.nl_net = net,
86872cb5
TG
2414 };
2415
830218c1 2416 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2417 cfg.fc_dst = *prefix;
2418 cfg.fc_gateway = *gwaddr;
70ceb4f5 2419
e317da96
YH
2420 /* We should treat it as a default route if prefix length is 0. */
2421 if (!prefixlen)
86872cb5 2422 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2423
86872cb5 2424 ip6_route_add(&cfg);
70ceb4f5 2425
830218c1 2426 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2427}
2428#endif
2429
b71d1d42 2430struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2431{
830218c1 2432 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2433 struct rt6_info *rt;
c71099ac 2434 struct fib6_table *table;
1da177e4 2435
830218c1 2436 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2437 if (!table)
c71099ac 2438 return NULL;
1da177e4 2439
5744dd9b 2440 read_lock_bh(&table->tb6_lock);
67ba4152 2441 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2442 if (dev == rt->dst.dev &&
045927ff 2443 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2444 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2445 break;
2446 }
2447 if (rt)
d8d1f30b 2448 dst_hold(&rt->dst);
5744dd9b 2449 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2450 return rt;
2451}
2452
b71d1d42 2453struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2454 struct net_device *dev,
2455 unsigned int pref)
1da177e4 2456{
86872cb5 2457 struct fib6_config cfg = {
ca254490 2458 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2459 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2460 .fc_ifindex = dev->ifindex,
2461 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2462 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2463 .fc_nlinfo.portid = 0,
5578689a 2464 .fc_nlinfo.nlh = NULL,
c346dca1 2465 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2466 };
1da177e4 2467
4e3fd7a0 2468 cfg.fc_gateway = *gwaddr;
1da177e4 2469
830218c1
DA
2470 if (!ip6_route_add(&cfg)) {
2471 struct fib6_table *table;
2472
2473 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2474 if (table)
2475 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2476 }
1da177e4 2477
1da177e4
LT
2478 return rt6_get_dflt_router(gwaddr, dev);
2479}
2480
830218c1 2481static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2482{
2483 struct rt6_info *rt;
2484
2485restart:
c71099ac 2486 read_lock_bh(&table->tb6_lock);
d8d1f30b 2487 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2488 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2489 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2490 dst_hold(&rt->dst);
c71099ac 2491 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2492 ip6_del_rt(rt);
1da177e4
LT
2493 goto restart;
2494 }
2495 }
c71099ac 2496 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2497
2498 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2499}
2500
2501void rt6_purge_dflt_routers(struct net *net)
2502{
2503 struct fib6_table *table;
2504 struct hlist_head *head;
2505 unsigned int h;
2506
2507 rcu_read_lock();
2508
2509 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2510 head = &net->ipv6.fib_table_hash[h];
2511 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2512 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2513 __rt6_purge_dflt_routers(table);
2514 }
2515 }
2516
2517 rcu_read_unlock();
1da177e4
LT
2518}
2519
5578689a
DL
2520static void rtmsg_to_fib6_config(struct net *net,
2521 struct in6_rtmsg *rtmsg,
86872cb5
TG
2522 struct fib6_config *cfg)
2523{
2524 memset(cfg, 0, sizeof(*cfg));
2525
ca254490
DA
2526 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2527 : RT6_TABLE_MAIN;
86872cb5
TG
2528 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2529 cfg->fc_metric = rtmsg->rtmsg_metric;
2530 cfg->fc_expires = rtmsg->rtmsg_info;
2531 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2532 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2533 cfg->fc_flags = rtmsg->rtmsg_flags;
2534
5578689a 2535 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2536
4e3fd7a0
AD
2537 cfg->fc_dst = rtmsg->rtmsg_dst;
2538 cfg->fc_src = rtmsg->rtmsg_src;
2539 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2540}
2541
5578689a 2542int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2543{
86872cb5 2544 struct fib6_config cfg;
1da177e4
LT
2545 struct in6_rtmsg rtmsg;
2546 int err;
2547
67ba4152 2548 switch (cmd) {
1da177e4
LT
2549 case SIOCADDRT: /* Add a route */
2550 case SIOCDELRT: /* Delete a route */
af31f412 2551 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2552 return -EPERM;
2553 err = copy_from_user(&rtmsg, arg,
2554 sizeof(struct in6_rtmsg));
2555 if (err)
2556 return -EFAULT;
86872cb5 2557
5578689a 2558 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2559
1da177e4
LT
2560 rtnl_lock();
2561 switch (cmd) {
2562 case SIOCADDRT:
86872cb5 2563 err = ip6_route_add(&cfg);
1da177e4
LT
2564 break;
2565 case SIOCDELRT:
86872cb5 2566 err = ip6_route_del(&cfg);
1da177e4
LT
2567 break;
2568 default:
2569 err = -EINVAL;
2570 }
2571 rtnl_unlock();
2572
2573 return err;
3ff50b79 2574 }
1da177e4
LT
2575
2576 return -EINVAL;
2577}
2578
2579/*
2580 * Drop the packet on the floor
2581 */
2582
d5fdd6ba 2583static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2584{
612f09e8 2585 int type;
adf30907 2586 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2587 switch (ipstats_mib_noroutes) {
2588 case IPSTATS_MIB_INNOROUTES:
0660e03f 2589 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2590 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2591 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2592 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2593 break;
2594 }
2595 /* FALLTHROUGH */
2596 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2597 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2598 ipstats_mib_noroutes);
612f09e8
YH
2599 break;
2600 }
3ffe533c 2601 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2602 kfree_skb(skb);
2603 return 0;
2604}
2605
9ce8ade0
TG
2606static int ip6_pkt_discard(struct sk_buff *skb)
2607{
612f09e8 2608 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2609}
2610
ede2059d 2611static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2612{
adf30907 2613 skb->dev = skb_dst(skb)->dev;
612f09e8 2614 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2615}
2616
9ce8ade0
TG
2617static int ip6_pkt_prohibit(struct sk_buff *skb)
2618{
612f09e8 2619 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2620}
2621
ede2059d 2622static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2623{
adf30907 2624 skb->dev = skb_dst(skb)->dev;
612f09e8 2625 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2626}
2627
1da177e4
LT
2628/*
2629 * Allocate a dst for local (unicast / anycast) address.
2630 */
2631
2632struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2633 const struct in6_addr *addr,
8f031519 2634 bool anycast)
1da177e4 2635{
ca254490 2636 u32 tb_id;
c346dca1 2637 struct net *net = dev_net(idev->dev);
5f02ce24
DA
2638 struct net_device *dev = net->loopback_dev;
2639 struct rt6_info *rt;
2640
2641 /* use L3 Master device as loopback for host routes if device
2642 * is enslaved and address is not link local or multicast
2643 */
2644 if (!rt6_need_strict(addr))
2645 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2646
2647 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2648 if (!rt)
1da177e4
LT
2649 return ERR_PTR(-ENOMEM);
2650
1da177e4
LT
2651 in6_dev_hold(idev);
2652
11d53b49 2653 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2654 rt->dst.input = ip6_input;
2655 rt->dst.output = ip6_output;
1da177e4 2656 rt->rt6i_idev = idev;
1da177e4
LT
2657
2658 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2659 if (anycast)
2660 rt->rt6i_flags |= RTF_ANYCAST;
2661 else
1da177e4 2662 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2663
550bab42 2664 rt->rt6i_gateway = *addr;
4e3fd7a0 2665 rt->rt6i_dst.addr = *addr;
1da177e4 2666 rt->rt6i_dst.plen = 128;
ca254490
DA
2667 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2668 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2669 rt->dst.flags |= DST_NOCACHE;
1da177e4 2670
d8d1f30b 2671 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2672
2673 return rt;
2674}
2675
c3968a85
DW
2676/* remove deleted ip from prefsrc entries */
2677struct arg_dev_net_ip {
2678 struct net_device *dev;
2679 struct net *net;
2680 struct in6_addr *addr;
2681};
2682
2683static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2684{
2685 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2686 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2687 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2688
d1918542 2689 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2690 rt != net->ipv6.ip6_null_entry &&
2691 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2692 /* remove prefsrc entry */
2693 rt->rt6i_prefsrc.plen = 0;
2694 }
2695 return 0;
2696}
2697
2698void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2699{
2700 struct net *net = dev_net(ifp->idev->dev);
2701 struct arg_dev_net_ip adni = {
2702 .dev = ifp->idev->dev,
2703 .net = net,
2704 .addr = &ifp->addr,
2705 };
0c3584d5 2706 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2707}
2708
be7a010d
DJ
2709#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2710#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2711
2712/* Remove routers and update dst entries when gateway turn into host. */
2713static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2714{
2715 struct in6_addr *gateway = (struct in6_addr *)arg;
2716
2717 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2718 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2719 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2720 return -1;
2721 }
2722 return 0;
2723}
2724
2725void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2726{
2727 fib6_clean_all(net, fib6_clean_tohost, gateway);
2728}
2729
8ed67789
DL
2730struct arg_dev_net {
2731 struct net_device *dev;
2732 struct net *net;
2733};
2734
1da177e4
LT
2735static int fib6_ifdown(struct rt6_info *rt, void *arg)
2736{
bc3ef660 2737 const struct arg_dev_net *adn = arg;
2738 const struct net_device *dev = adn->dev;
8ed67789 2739
d1918542 2740 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2741 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2742 return -1;
c159d30c 2743
1da177e4
LT
2744 return 0;
2745}
2746
f3db4851 2747void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2748{
8ed67789
DL
2749 struct arg_dev_net adn = {
2750 .dev = dev,
2751 .net = net,
2752 };
2753
0c3584d5 2754 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2755 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2756 if (dev)
2757 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2758}
2759
95c96174 2760struct rt6_mtu_change_arg {
1da177e4 2761 struct net_device *dev;
95c96174 2762 unsigned int mtu;
1da177e4
LT
2763};
2764
2765static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2766{
2767 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2768 struct inet6_dev *idev;
2769
2770 /* In IPv6 pmtu discovery is not optional,
2771 so that RTAX_MTU lock cannot disable it.
2772 We still use this lock to block changes
2773 caused by addrconf/ndisc.
2774 */
2775
2776 idev = __in6_dev_get(arg->dev);
38308473 2777 if (!idev)
1da177e4
LT
2778 return 0;
2779
2780 /* For administrative MTU increase, there is no way to discover
2781 IPv6 PMTU increase, so PMTU increase should be updated here.
2782 Since RFC 1981 doesn't include administrative MTU increase
2783 update PMTU increase is a MUST. (i.e. jumbo frame)
2784 */
2785 /*
2786 If new MTU is less than route PMTU, this new MTU will be the
2787 lowest MTU in the path, update the route PMTU to reflect PMTU
2788 decreases; if new MTU is greater than route PMTU, and the
2789 old MTU is the lowest MTU in the path, update the route PMTU
2790 to reflect the increase. In this case if the other nodes' MTU
2791 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2792 PMTU discovery.
1da177e4 2793 */
d1918542 2794 if (rt->dst.dev == arg->dev &&
fb56be83 2795 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2796 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2797 if (rt->rt6i_flags & RTF_CACHE) {
2798 /* For RTF_CACHE with rt6i_pmtu == 0
2799 * (i.e. a redirected route),
2800 * the metrics of its rt->dst.from has already
2801 * been updated.
2802 */
2803 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2804 rt->rt6i_pmtu = arg->mtu;
2805 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2806 (dst_mtu(&rt->dst) < arg->mtu &&
2807 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2808 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2809 }
566cfd8f 2810 }
1da177e4
LT
2811 return 0;
2812}
2813
95c96174 2814void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2815{
c71099ac
TG
2816 struct rt6_mtu_change_arg arg = {
2817 .dev = dev,
2818 .mtu = mtu,
2819 };
1da177e4 2820
0c3584d5 2821 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2822}
2823
ef7c79ed 2824static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2825 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2826 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2827 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2828 [RTA_PRIORITY] = { .type = NLA_U32 },
2829 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2830 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2831 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2832 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2833 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2834 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2835 [RTA_UID] = { .type = NLA_U32 },
86872cb5
TG
2836};
2837
2838static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2839 struct fib6_config *cfg)
1da177e4 2840{
86872cb5
TG
2841 struct rtmsg *rtm;
2842 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2843 unsigned int pref;
86872cb5 2844 int err;
1da177e4 2845
86872cb5
TG
2846 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2847 if (err < 0)
2848 goto errout;
1da177e4 2849
86872cb5
TG
2850 err = -EINVAL;
2851 rtm = nlmsg_data(nlh);
2852 memset(cfg, 0, sizeof(*cfg));
2853
2854 cfg->fc_table = rtm->rtm_table;
2855 cfg->fc_dst_len = rtm->rtm_dst_len;
2856 cfg->fc_src_len = rtm->rtm_src_len;
2857 cfg->fc_flags = RTF_UP;
2858 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2859 cfg->fc_type = rtm->rtm_type;
86872cb5 2860
ef2c7d7b
ND
2861 if (rtm->rtm_type == RTN_UNREACHABLE ||
2862 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2863 rtm->rtm_type == RTN_PROHIBIT ||
2864 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2865 cfg->fc_flags |= RTF_REJECT;
2866
ab79ad14
2867 if (rtm->rtm_type == RTN_LOCAL)
2868 cfg->fc_flags |= RTF_LOCAL;
2869
1f56a01f
MKL
2870 if (rtm->rtm_flags & RTM_F_CLONED)
2871 cfg->fc_flags |= RTF_CACHE;
2872
15e47304 2873 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2874 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2875 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2876
2877 if (tb[RTA_GATEWAY]) {
67b61f6c 2878 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2879 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2880 }
86872cb5
TG
2881
2882 if (tb[RTA_DST]) {
2883 int plen = (rtm->rtm_dst_len + 7) >> 3;
2884
2885 if (nla_len(tb[RTA_DST]) < plen)
2886 goto errout;
2887
2888 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2889 }
86872cb5
TG
2890
2891 if (tb[RTA_SRC]) {
2892 int plen = (rtm->rtm_src_len + 7) >> 3;
2893
2894 if (nla_len(tb[RTA_SRC]) < plen)
2895 goto errout;
2896
2897 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2898 }
86872cb5 2899
c3968a85 2900 if (tb[RTA_PREFSRC])
67b61f6c 2901 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2902
86872cb5
TG
2903 if (tb[RTA_OIF])
2904 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2905
2906 if (tb[RTA_PRIORITY])
2907 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2908
2909 if (tb[RTA_METRICS]) {
2910 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2911 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2912 }
86872cb5
TG
2913
2914 if (tb[RTA_TABLE])
2915 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2916
51ebd318
ND
2917 if (tb[RTA_MULTIPATH]) {
2918 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2919 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
2920
2921 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2922 cfg->fc_mp_len);
2923 if (err < 0)
2924 goto errout;
51ebd318
ND
2925 }
2926
c78ba6d6
LR
2927 if (tb[RTA_PREF]) {
2928 pref = nla_get_u8(tb[RTA_PREF]);
2929 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2930 pref != ICMPV6_ROUTER_PREF_HIGH)
2931 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2932 cfg->fc_flags |= RTF_PREF(pref);
2933 }
2934
19e42e45
RP
2935 if (tb[RTA_ENCAP])
2936 cfg->fc_encap = tb[RTA_ENCAP];
2937
9ed59592 2938 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
2939 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2940
9ed59592
DA
2941 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2942 if (err < 0)
2943 goto errout;
2944 }
2945
32bc201e
XL
2946 if (tb[RTA_EXPIRES]) {
2947 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2948
2949 if (addrconf_finite_timeout(timeout)) {
2950 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2951 cfg->fc_flags |= RTF_EXPIRES;
2952 }
2953 }
2954
86872cb5
TG
2955 err = 0;
2956errout:
2957 return err;
1da177e4
LT
2958}
2959
6b9ea5a6
RP
2960struct rt6_nh {
2961 struct rt6_info *rt6_info;
2962 struct fib6_config r_cfg;
2963 struct mx6_config mxc;
2964 struct list_head next;
2965};
2966
2967static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2968{
2969 struct rt6_nh *nh;
2970
2971 list_for_each_entry(nh, rt6_nh_list, next) {
2972 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2973 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2974 nh->r_cfg.fc_ifindex);
2975 }
2976}
2977
2978static int ip6_route_info_append(struct list_head *rt6_nh_list,
2979 struct rt6_info *rt, struct fib6_config *r_cfg)
2980{
2981 struct rt6_nh *nh;
2982 struct rt6_info *rtnh;
2983 int err = -EEXIST;
2984
2985 list_for_each_entry(nh, rt6_nh_list, next) {
2986 /* check if rt6_info already exists */
2987 rtnh = nh->rt6_info;
2988
2989 if (rtnh->dst.dev == rt->dst.dev &&
2990 rtnh->rt6i_idev == rt->rt6i_idev &&
2991 ipv6_addr_equal(&rtnh->rt6i_gateway,
2992 &rt->rt6i_gateway))
2993 return err;
2994 }
2995
2996 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2997 if (!nh)
2998 return -ENOMEM;
2999 nh->rt6_info = rt;
3000 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3001 if (err) {
3002 kfree(nh);
3003 return err;
3004 }
3005 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3006 list_add_tail(&nh->next, rt6_nh_list);
3007
3008 return 0;
3009}
3010
3011static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
3012{
3013 struct fib6_config r_cfg;
3014 struct rtnexthop *rtnh;
6b9ea5a6
RP
3015 struct rt6_info *rt;
3016 struct rt6_nh *err_nh;
3017 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
3018 int remaining;
3019 int attrlen;
6b9ea5a6
RP
3020 int err = 1;
3021 int nhn = 0;
3022 int replace = (cfg->fc_nlinfo.nlh &&
3023 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3024 LIST_HEAD(rt6_nh_list);
51ebd318 3025
35f1b4e9 3026 remaining = cfg->fc_mp_len;
51ebd318 3027 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3028
6b9ea5a6
RP
3029 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3030 * rt6_info structs per nexthop
3031 */
51ebd318
ND
3032 while (rtnh_ok(rtnh, remaining)) {
3033 memcpy(&r_cfg, cfg, sizeof(*cfg));
3034 if (rtnh->rtnh_ifindex)
3035 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3036
3037 attrlen = rtnh_attrlen(rtnh);
3038 if (attrlen > 0) {
3039 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3040
3041 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3042 if (nla) {
67b61f6c 3043 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3044 r_cfg.fc_flags |= RTF_GATEWAY;
3045 }
19e42e45
RP
3046 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3047 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3048 if (nla)
3049 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3050 }
6b9ea5a6 3051
8c5b83f0
RP
3052 rt = ip6_route_info_create(&r_cfg);
3053 if (IS_ERR(rt)) {
3054 err = PTR_ERR(rt);
3055 rt = NULL;
6b9ea5a6 3056 goto cleanup;
8c5b83f0 3057 }
6b9ea5a6
RP
3058
3059 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3060 if (err) {
6b9ea5a6
RP
3061 dst_free(&rt->dst);
3062 goto cleanup;
3063 }
3064
3065 rtnh = rtnh_next(rtnh, &remaining);
3066 }
3067
3068 err_nh = NULL;
3069 list_for_each_entry(nh, &rt6_nh_list, next) {
3070 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3071 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3072 nh->rt6_info = NULL;
3073 if (err) {
3074 if (replace && nhn)
3075 ip6_print_replace_route_err(&rt6_nh_list);
3076 err_nh = nh;
3077 goto add_errout;
51ebd318 3078 }
6b9ea5a6 3079
1a72418b 3080 /* Because each route is added like a single route we remove
27596472
MK
3081 * these flags after the first nexthop: if there is a collision,
3082 * we have already failed to add the first nexthop:
3083 * fib6_add_rt2node() has rejected it; when replacing, old
3084 * nexthops have been replaced by first new, the rest should
3085 * be added to it.
1a72418b 3086 */
27596472
MK
3087 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3088 NLM_F_REPLACE);
6b9ea5a6
RP
3089 nhn++;
3090 }
3091
3092 goto cleanup;
3093
3094add_errout:
3095 /* Delete routes that were already added */
3096 list_for_each_entry(nh, &rt6_nh_list, next) {
3097 if (err_nh == nh)
3098 break;
3099 ip6_route_del(&nh->r_cfg);
3100 }
3101
3102cleanup:
3103 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3104 if (nh->rt6_info)
3105 dst_free(&nh->rt6_info->dst);
52fe51f8 3106 kfree(nh->mxc.mx);
6b9ea5a6
RP
3107 list_del(&nh->next);
3108 kfree(nh);
3109 }
3110
3111 return err;
3112}
3113
3114static int ip6_route_multipath_del(struct fib6_config *cfg)
3115{
3116 struct fib6_config r_cfg;
3117 struct rtnexthop *rtnh;
3118 int remaining;
3119 int attrlen;
3120 int err = 1, last_err = 0;
3121
3122 remaining = cfg->fc_mp_len;
3123 rtnh = (struct rtnexthop *)cfg->fc_mp;
3124
3125 /* Parse a Multipath Entry */
3126 while (rtnh_ok(rtnh, remaining)) {
3127 memcpy(&r_cfg, cfg, sizeof(*cfg));
3128 if (rtnh->rtnh_ifindex)
3129 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3130
3131 attrlen = rtnh_attrlen(rtnh);
3132 if (attrlen > 0) {
3133 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3134
3135 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3136 if (nla) {
3137 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3138 r_cfg.fc_flags |= RTF_GATEWAY;
3139 }
3140 }
3141 err = ip6_route_del(&r_cfg);
3142 if (err)
3143 last_err = err;
3144
51ebd318
ND
3145 rtnh = rtnh_next(rtnh, &remaining);
3146 }
3147
3148 return last_err;
3149}
3150
67ba4152 3151static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3152{
86872cb5
TG
3153 struct fib6_config cfg;
3154 int err;
1da177e4 3155
86872cb5
TG
3156 err = rtm_to_fib6_config(skb, nlh, &cfg);
3157 if (err < 0)
3158 return err;
3159
51ebd318 3160 if (cfg.fc_mp)
6b9ea5a6 3161 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3162 else
3163 return ip6_route_del(&cfg);
1da177e4
LT
3164}
3165
67ba4152 3166static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3167{
86872cb5
TG
3168 struct fib6_config cfg;
3169 int err;
1da177e4 3170
86872cb5
TG
3171 err = rtm_to_fib6_config(skb, nlh, &cfg);
3172 if (err < 0)
3173 return err;
3174
51ebd318 3175 if (cfg.fc_mp)
6b9ea5a6 3176 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3177 else
3178 return ip6_route_add(&cfg);
1da177e4
LT
3179}
3180
19e42e45 3181static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3182{
3183 return NLMSG_ALIGN(sizeof(struct rtmsg))
3184 + nla_total_size(16) /* RTA_SRC */
3185 + nla_total_size(16) /* RTA_DST */
3186 + nla_total_size(16) /* RTA_GATEWAY */
3187 + nla_total_size(16) /* RTA_PREFSRC */
3188 + nla_total_size(4) /* RTA_TABLE */
3189 + nla_total_size(4) /* RTA_IIF */
3190 + nla_total_size(4) /* RTA_OIF */
3191 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3192 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3193 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3194 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3195 + nla_total_size(1) /* RTA_PREF */
61adedf3 3196 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3197}
3198
191cd582
BH
3199static int rt6_fill_node(struct net *net,
3200 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3201 struct in6_addr *dst, struct in6_addr *src,
15e47304 3202 int iif, int type, u32 portid, u32 seq,
7bc570c8 3203 int prefix, int nowait, unsigned int flags)
1da177e4 3204{
4b32b5ad 3205 u32 metrics[RTAX_MAX];
1da177e4 3206 struct rtmsg *rtm;
2d7202bf 3207 struct nlmsghdr *nlh;
e3703b3d 3208 long expires;
9e762a4a 3209 u32 table;
1da177e4
LT
3210
3211 if (prefix) { /* user wants prefix routes only */
3212 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3213 /* success since this is not a prefix route */
3214 return 1;
3215 }
3216 }
3217
15e47304 3218 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3219 if (!nlh)
26932566 3220 return -EMSGSIZE;
2d7202bf
TG
3221
3222 rtm = nlmsg_data(nlh);
1da177e4
LT
3223 rtm->rtm_family = AF_INET6;
3224 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3225 rtm->rtm_src_len = rt->rt6i_src.plen;
3226 rtm->rtm_tos = 0;
c71099ac 3227 if (rt->rt6i_table)
9e762a4a 3228 table = rt->rt6i_table->tb6_id;
c71099ac 3229 else
9e762a4a
PM
3230 table = RT6_TABLE_UNSPEC;
3231 rtm->rtm_table = table;
c78679e8
DM
3232 if (nla_put_u32(skb, RTA_TABLE, table))
3233 goto nla_put_failure;
ef2c7d7b
ND
3234 if (rt->rt6i_flags & RTF_REJECT) {
3235 switch (rt->dst.error) {
3236 case -EINVAL:
3237 rtm->rtm_type = RTN_BLACKHOLE;
3238 break;
3239 case -EACCES:
3240 rtm->rtm_type = RTN_PROHIBIT;
3241 break;
b4949ab2
ND
3242 case -EAGAIN:
3243 rtm->rtm_type = RTN_THROW;
3244 break;
ef2c7d7b
ND
3245 default:
3246 rtm->rtm_type = RTN_UNREACHABLE;
3247 break;
3248 }
3249 }
38308473 3250 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3251 rtm->rtm_type = RTN_LOCAL;
d1918542 3252 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3253 rtm->rtm_type = RTN_LOCAL;
3254 else
3255 rtm->rtm_type = RTN_UNICAST;
3256 rtm->rtm_flags = 0;
35103d11 3257 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3258 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3259 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3260 rtm->rtm_flags |= RTNH_F_DEAD;
3261 }
1da177e4
LT
3262 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3263 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3264 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3265 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3266 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3267 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3268 rtm->rtm_protocol = RTPROT_RA;
3269 else
3270 rtm->rtm_protocol = RTPROT_KERNEL;
3271 }
1da177e4 3272
38308473 3273 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3274 rtm->rtm_flags |= RTM_F_CLONED;
3275
3276 if (dst) {
930345ea 3277 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3278 goto nla_put_failure;
1ab1457c 3279 rtm->rtm_dst_len = 128;
1da177e4 3280 } else if (rtm->rtm_dst_len)
930345ea 3281 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3282 goto nla_put_failure;
1da177e4
LT
3283#ifdef CONFIG_IPV6_SUBTREES
3284 if (src) {
930345ea 3285 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3286 goto nla_put_failure;
1ab1457c 3287 rtm->rtm_src_len = 128;
c78679e8 3288 } else if (rtm->rtm_src_len &&
930345ea 3289 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3290 goto nla_put_failure;
1da177e4 3291#endif
7bc570c8
YH
3292 if (iif) {
3293#ifdef CONFIG_IPV6_MROUTE
3294 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2cf75070
NA
3295 int err = ip6mr_get_route(net, skb, rtm, nowait,
3296 portid);
3297
7bc570c8
YH
3298 if (err <= 0) {
3299 if (!nowait) {
3300 if (err == 0)
3301 return 0;
3302 goto nla_put_failure;
3303 } else {
3304 if (err == -EMSGSIZE)
3305 goto nla_put_failure;
3306 }
3307 }
3308 } else
3309#endif
c78679e8
DM
3310 if (nla_put_u32(skb, RTA_IIF, iif))
3311 goto nla_put_failure;
7bc570c8 3312 } else if (dst) {
1da177e4 3313 struct in6_addr saddr_buf;
c78679e8 3314 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3315 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3316 goto nla_put_failure;
1da177e4 3317 }
2d7202bf 3318
c3968a85
DW
3319 if (rt->rt6i_prefsrc.plen) {
3320 struct in6_addr saddr_buf;
4e3fd7a0 3321 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3322 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3323 goto nla_put_failure;
c3968a85
DW
3324 }
3325
4b32b5ad
MKL
3326 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3327 if (rt->rt6i_pmtu)
3328 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3329 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3330 goto nla_put_failure;
3331
dd0cbf29 3332 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3333 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3334 goto nla_put_failure;
94f826b8 3335 }
2d7202bf 3336
c78679e8
DM
3337 if (rt->dst.dev &&
3338 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3339 goto nla_put_failure;
3340 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3341 goto nla_put_failure;
8253947e
LW
3342
3343 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3344
87a50699 3345 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3346 goto nla_put_failure;
2d7202bf 3347
c78ba6d6
LR
3348 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3349 goto nla_put_failure;
3350
ea7a8085
DA
3351 if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3352 goto nla_put_failure;
19e42e45 3353
053c095a
JB
3354 nlmsg_end(skb, nlh);
3355 return 0;
2d7202bf
TG
3356
3357nla_put_failure:
26932566
PM
3358 nlmsg_cancel(skb, nlh);
3359 return -EMSGSIZE;
1da177e4
LT
3360}
3361
1b43af54 3362int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3363{
3364 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3365 int prefix;
3366
2d7202bf
TG
3367 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3368 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3369 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3370 } else
3371 prefix = 0;
3372
191cd582
BH
3373 return rt6_fill_node(arg->net,
3374 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3375 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3376 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3377}
3378
67ba4152 3379static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3380{
3b1e0a65 3381 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3382 struct nlattr *tb[RTA_MAX+1];
3383 struct rt6_info *rt;
1da177e4 3384 struct sk_buff *skb;
ab364a6f 3385 struct rtmsg *rtm;
4c9483b2 3386 struct flowi6 fl6;
72331bc0 3387 int err, iif = 0, oif = 0;
1da177e4 3388
ab364a6f
TG
3389 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3390 if (err < 0)
3391 goto errout;
1da177e4 3392
ab364a6f 3393 err = -EINVAL;
4c9483b2 3394 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3395 rtm = nlmsg_data(nlh);
3396 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
1da177e4 3397
ab364a6f
TG
3398 if (tb[RTA_SRC]) {
3399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3400 goto errout;
3401
4e3fd7a0 3402 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3403 }
3404
3405 if (tb[RTA_DST]) {
3406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3407 goto errout;
3408
4e3fd7a0 3409 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3410 }
3411
3412 if (tb[RTA_IIF])
3413 iif = nla_get_u32(tb[RTA_IIF]);
3414
3415 if (tb[RTA_OIF])
72331bc0 3416 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3417
2e47b291
LC
3418 if (tb[RTA_MARK])
3419 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3420
622ec2c9
LC
3421 if (tb[RTA_UID])
3422 fl6.flowi6_uid = make_kuid(current_user_ns(),
3423 nla_get_u32(tb[RTA_UID]));
3424 else
3425 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3426
1da177e4
LT
3427 if (iif) {
3428 struct net_device *dev;
72331bc0
SL
3429 int flags = 0;
3430
5578689a 3431 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3432 if (!dev) {
3433 err = -ENODEV;
ab364a6f 3434 goto errout;
1da177e4 3435 }
72331bc0
SL
3436
3437 fl6.flowi6_iif = iif;
3438
3439 if (!ipv6_addr_any(&fl6.saddr))
3440 flags |= RT6_LOOKUP_F_HAS_SADDR;
3441
3442 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3443 flags);
3444 } else {
3445 fl6.flowi6_oif = oif;
3446
3447 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3448 }
3449
ab364a6f 3450 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3451 if (!skb) {
94e187c0 3452 ip6_rt_put(rt);
ab364a6f
TG
3453 err = -ENOBUFS;
3454 goto errout;
3455 }
1da177e4 3456
ab364a6f
TG
3457 /* Reserve room for dummy headers, this skb can pass
3458 through good chunk of routing engine.
3459 */
459a98ed 3460 skb_reset_mac_header(skb);
ab364a6f 3461 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3462
d8d1f30b 3463 skb_dst_set(skb, &rt->dst);
1da177e4 3464
4c9483b2 3465 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3466 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3467 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3468 if (err < 0) {
ab364a6f
TG
3469 kfree_skb(skb);
3470 goto errout;
1da177e4
LT
3471 }
3472
15e47304 3473 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3474errout:
1da177e4 3475 return err;
1da177e4
LT
3476}
3477
37a1d361
RP
3478void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3479 unsigned int nlm_flags)
1da177e4
LT
3480{
3481 struct sk_buff *skb;
5578689a 3482 struct net *net = info->nl_net;
528c4ceb
DL
3483 u32 seq;
3484 int err;
3485
3486 err = -ENOBUFS;
38308473 3487 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3488
19e42e45 3489 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3490 if (!skb)
21713ebc
TG
3491 goto errout;
3492
191cd582 3493 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3494 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3495 if (err < 0) {
3496 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3497 WARN_ON(err == -EMSGSIZE);
3498 kfree_skb(skb);
3499 goto errout;
3500 }
15e47304 3501 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3502 info->nlh, gfp_any());
3503 return;
21713ebc
TG
3504errout:
3505 if (err < 0)
5578689a 3506 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3507}
3508
8ed67789 3509static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3510 unsigned long event, void *ptr)
8ed67789 3511{
351638e7 3512 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3513 struct net *net = dev_net(dev);
8ed67789 3514
e12b8e2f
WC
3515 if (!(dev->flags & IFF_LOOPBACK))
3516 return NOTIFY_OK;
3517
3518 if (event == NETDEV_REGISTER) {
d8d1f30b 3519 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3520 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3521#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3522 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3523 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3524 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 3525 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
e12b8e2f
WC
3526#endif
3527 } else if (event == NETDEV_UNREGISTER) {
3528 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3529#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3530 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3531 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
3532#endif
3533 }
3534
3535 return NOTIFY_OK;
3536}
3537
1da177e4
LT
3538/*
3539 * /proc
3540 */
3541
3542#ifdef CONFIG_PROC_FS
3543
33120b30
AD
3544static const struct file_operations ipv6_route_proc_fops = {
3545 .owner = THIS_MODULE,
3546 .open = ipv6_route_open,
3547 .read = seq_read,
3548 .llseek = seq_lseek,
8d2ca1d7 3549 .release = seq_release_net,
33120b30
AD
3550};
3551
1da177e4
LT
3552static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3553{
69ddb805 3554 struct net *net = (struct net *)seq->private;
1da177e4 3555 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3556 net->ipv6.rt6_stats->fib_nodes,
3557 net->ipv6.rt6_stats->fib_route_nodes,
3558 net->ipv6.rt6_stats->fib_rt_alloc,
3559 net->ipv6.rt6_stats->fib_rt_entries,
3560 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3561 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3562 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3563
3564 return 0;
3565}
3566
3567static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3568{
de05c557 3569 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3570}
3571
9a32144e 3572static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3573 .owner = THIS_MODULE,
3574 .open = rt6_stats_seq_open,
3575 .read = seq_read,
3576 .llseek = seq_lseek,
b6fcbdb4 3577 .release = single_release_net,
1da177e4
LT
3578};
3579#endif /* CONFIG_PROC_FS */
3580
3581#ifdef CONFIG_SYSCTL
3582
1da177e4 3583static
fe2c6338 3584int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3585 void __user *buffer, size_t *lenp, loff_t *ppos)
3586{
c486da34
LAG
3587 struct net *net;
3588 int delay;
3589 if (!write)
1da177e4 3590 return -EINVAL;
c486da34
LAG
3591
3592 net = (struct net *)ctl->extra1;
3593 delay = net->ipv6.sysctl.flush_delay;
3594 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3595 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3596 return 0;
1da177e4
LT
3597}
3598
fe2c6338 3599struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3600 {
1da177e4 3601 .procname = "flush",
4990509f 3602 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3603 .maxlen = sizeof(int),
89c8b3a1 3604 .mode = 0200,
6d9f239a 3605 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3606 },
3607 {
1da177e4 3608 .procname = "gc_thresh",
9a7ec3a9 3609 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3610 .maxlen = sizeof(int),
3611 .mode = 0644,
6d9f239a 3612 .proc_handler = proc_dointvec,
1da177e4
LT
3613 },
3614 {
1da177e4 3615 .procname = "max_size",
4990509f 3616 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3617 .maxlen = sizeof(int),
3618 .mode = 0644,
6d9f239a 3619 .proc_handler = proc_dointvec,
1da177e4
LT
3620 },
3621 {
1da177e4 3622 .procname = "gc_min_interval",
4990509f 3623 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3624 .maxlen = sizeof(int),
3625 .mode = 0644,
6d9f239a 3626 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3627 },
3628 {
1da177e4 3629 .procname = "gc_timeout",
4990509f 3630 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3631 .maxlen = sizeof(int),
3632 .mode = 0644,
6d9f239a 3633 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3634 },
3635 {
1da177e4 3636 .procname = "gc_interval",
4990509f 3637 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3638 .maxlen = sizeof(int),
3639 .mode = 0644,
6d9f239a 3640 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3641 },
3642 {
1da177e4 3643 .procname = "gc_elasticity",
4990509f 3644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3645 .maxlen = sizeof(int),
3646 .mode = 0644,
f3d3f616 3647 .proc_handler = proc_dointvec,
1da177e4
LT
3648 },
3649 {
1da177e4 3650 .procname = "mtu_expires",
4990509f 3651 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3652 .maxlen = sizeof(int),
3653 .mode = 0644,
6d9f239a 3654 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3655 },
3656 {
1da177e4 3657 .procname = "min_adv_mss",
4990509f 3658 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3659 .maxlen = sizeof(int),
3660 .mode = 0644,
f3d3f616 3661 .proc_handler = proc_dointvec,
1da177e4
LT
3662 },
3663 {
1da177e4 3664 .procname = "gc_min_interval_ms",
4990509f 3665 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3666 .maxlen = sizeof(int),
3667 .mode = 0644,
6d9f239a 3668 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3669 },
f8572d8f 3670 { }
1da177e4
LT
3671};
3672
2c8c1e72 3673struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3674{
3675 struct ctl_table *table;
3676
3677 table = kmemdup(ipv6_route_table_template,
3678 sizeof(ipv6_route_table_template),
3679 GFP_KERNEL);
5ee09105
YH
3680
3681 if (table) {
3682 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3683 table[0].extra1 = net;
86393e52 3684 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3685 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3686 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3687 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3688 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3689 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3690 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3691 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3692 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3693
3694 /* Don't export sysctls to unprivileged users */
3695 if (net->user_ns != &init_user_ns)
3696 table[0].procname = NULL;
5ee09105
YH
3697 }
3698
760f2d01
DL
3699 return table;
3700}
1da177e4
LT
3701#endif
3702
2c8c1e72 3703static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3704{
633d424b 3705 int ret = -ENOMEM;
8ed67789 3706
86393e52
AD
3707 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3708 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3709
fc66f95c
ED
3710 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3711 goto out_ip6_dst_ops;
3712
8ed67789
DL
3713 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3714 sizeof(*net->ipv6.ip6_null_entry),
3715 GFP_KERNEL);
3716 if (!net->ipv6.ip6_null_entry)
fc66f95c 3717 goto out_ip6_dst_entries;
d8d1f30b 3718 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3719 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3720 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3721 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3722 ip6_template_metrics, true);
8ed67789
DL
3723
3724#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3725 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3726 sizeof(*net->ipv6.ip6_prohibit_entry),
3727 GFP_KERNEL);
68fffc67
PZ
3728 if (!net->ipv6.ip6_prohibit_entry)
3729 goto out_ip6_null_entry;
d8d1f30b 3730 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3731 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3732 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3733 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3734 ip6_template_metrics, true);
8ed67789
DL
3735
3736 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3737 sizeof(*net->ipv6.ip6_blk_hole_entry),
3738 GFP_KERNEL);
68fffc67
PZ
3739 if (!net->ipv6.ip6_blk_hole_entry)
3740 goto out_ip6_prohibit_entry;
d8d1f30b 3741 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3742 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3743 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3744 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3745 ip6_template_metrics, true);
8ed67789
DL
3746#endif
3747
b339a47c
PZ
3748 net->ipv6.sysctl.flush_delay = 0;
3749 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3750 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3751 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3752 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3753 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3754 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3755 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3756
6891a346
BT
3757 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3758
8ed67789
DL
3759 ret = 0;
3760out:
3761 return ret;
f2fc6a54 3762
68fffc67
PZ
3763#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3764out_ip6_prohibit_entry:
3765 kfree(net->ipv6.ip6_prohibit_entry);
3766out_ip6_null_entry:
3767 kfree(net->ipv6.ip6_null_entry);
3768#endif
fc66f95c
ED
3769out_ip6_dst_entries:
3770 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3771out_ip6_dst_ops:
f2fc6a54 3772 goto out;
cdb18761
DL
3773}
3774
2c8c1e72 3775static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3776{
8ed67789
DL
3777 kfree(net->ipv6.ip6_null_entry);
3778#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3779 kfree(net->ipv6.ip6_prohibit_entry);
3780 kfree(net->ipv6.ip6_blk_hole_entry);
3781#endif
41bb78b4 3782 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3783}
3784
d189634e
TG
3785static int __net_init ip6_route_net_init_late(struct net *net)
3786{
3787#ifdef CONFIG_PROC_FS
d4beaa66
G
3788 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3789 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3790#endif
3791 return 0;
3792}
3793
3794static void __net_exit ip6_route_net_exit_late(struct net *net)
3795{
3796#ifdef CONFIG_PROC_FS
ece31ffd
G
3797 remove_proc_entry("ipv6_route", net->proc_net);
3798 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3799#endif
3800}
3801
cdb18761
DL
3802static struct pernet_operations ip6_route_net_ops = {
3803 .init = ip6_route_net_init,
3804 .exit = ip6_route_net_exit,
3805};
3806
c3426b47
DM
3807static int __net_init ipv6_inetpeer_init(struct net *net)
3808{
3809 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3810
3811 if (!bp)
3812 return -ENOMEM;
3813 inet_peer_base_init(bp);
3814 net->ipv6.peers = bp;
3815 return 0;
3816}
3817
3818static void __net_exit ipv6_inetpeer_exit(struct net *net)
3819{
3820 struct inet_peer_base *bp = net->ipv6.peers;
3821
3822 net->ipv6.peers = NULL;
56a6b248 3823 inetpeer_invalidate_tree(bp);
c3426b47
DM
3824 kfree(bp);
3825}
3826
2b823f72 3827static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3828 .init = ipv6_inetpeer_init,
3829 .exit = ipv6_inetpeer_exit,
3830};
3831
d189634e
TG
3832static struct pernet_operations ip6_route_net_late_ops = {
3833 .init = ip6_route_net_init_late,
3834 .exit = ip6_route_net_exit_late,
3835};
3836
8ed67789
DL
3837static struct notifier_block ip6_route_dev_notifier = {
3838 .notifier_call = ip6_route_dev_notify,
e12b8e2f 3839 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
3840};
3841
3f0bbd5f
WC
3842void __init ip6_route_init_special_entries(void)
3843{
3844 /* Registering of the loopback is done before this portion of code,
3845 * the loopback reference in rt6_info will not be taken, do it
3846 * manually for init_net */
3847 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3848 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3849 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3850 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3851 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3852 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3853 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3854 #endif
3855}
3856
433d49c3 3857int __init ip6_route_init(void)
1da177e4 3858{
433d49c3 3859 int ret;
8d0b94af 3860 int cpu;
433d49c3 3861
9a7ec3a9
DL
3862 ret = -ENOMEM;
3863 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3864 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3865 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3866 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3867 goto out;
14e50e57 3868
fc66f95c 3869 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3870 if (ret)
bdb3289f 3871 goto out_kmem_cache;
bdb3289f 3872
c3426b47
DM
3873 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3874 if (ret)
e8803b6c 3875 goto out_dst_entries;
2a0c451a 3876
7e52b33b
DM
3877 ret = register_pernet_subsys(&ip6_route_net_ops);
3878 if (ret)
3879 goto out_register_inetpeer;
c3426b47 3880
5dc121e9
AE
3881 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3882
e8803b6c 3883 ret = fib6_init();
433d49c3 3884 if (ret)
8ed67789 3885 goto out_register_subsys;
433d49c3 3886
433d49c3
DL
3887 ret = xfrm6_init();
3888 if (ret)
e8803b6c 3889 goto out_fib6_init;
c35b7e72 3890
433d49c3
DL
3891 ret = fib6_rules_init();
3892 if (ret)
3893 goto xfrm6_init;
7e5449c2 3894
d189634e
TG
3895 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3896 if (ret)
3897 goto fib6_rules_init;
3898
433d49c3 3899 ret = -ENOBUFS;
c7ac8679
GR
3900 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3901 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3902 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3903 goto out_register_late_subsys;
c127ea2c 3904
8ed67789 3905 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3906 if (ret)
d189634e 3907 goto out_register_late_subsys;
8ed67789 3908
8d0b94af
MKL
3909 for_each_possible_cpu(cpu) {
3910 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3911
3912 INIT_LIST_HEAD(&ul->head);
3913 spin_lock_init(&ul->lock);
3914 }
3915
433d49c3
DL
3916out:
3917 return ret;
3918
d189634e
TG
3919out_register_late_subsys:
3920 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3921fib6_rules_init:
433d49c3
DL
3922 fib6_rules_cleanup();
3923xfrm6_init:
433d49c3 3924 xfrm6_fini();
2a0c451a
TG
3925out_fib6_init:
3926 fib6_gc_cleanup();
8ed67789
DL
3927out_register_subsys:
3928 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3929out_register_inetpeer:
3930 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3931out_dst_entries:
3932 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3933out_kmem_cache:
f2fc6a54 3934 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3935 goto out;
1da177e4
LT
3936}
3937
3938void ip6_route_cleanup(void)
3939{
8ed67789 3940 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3941 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3942 fib6_rules_cleanup();
1da177e4 3943 xfrm6_fini();
1da177e4 3944 fib6_gc_cleanup();
c3426b47 3945 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3946 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3947 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3948 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3949}