]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - net/ipv6/route.c
net: add confirm_neigh method to dst_ops
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
1da177e4 101
70ceb4f5 102#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 103static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 104 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
105 const struct in6_addr *gwaddr,
106 struct net_device *dev,
95c96174 107 unsigned int pref);
efa2cea0 108static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 109 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
110 const struct in6_addr *gwaddr,
111 struct net_device *dev);
70ceb4f5
YH
112#endif
113
8d0b94af
MKL
114struct uncached_list {
115 spinlock_t lock;
116 struct list_head head;
117};
118
119static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121static void rt6_uncached_list_add(struct rt6_info *rt)
122{
123 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125 rt->dst.flags |= DST_NOCACHE;
126 rt->rt6i_uncached_list = ul;
127
128 spin_lock_bh(&ul->lock);
129 list_add_tail(&rt->rt6i_uncached, &ul->head);
130 spin_unlock_bh(&ul->lock);
131}
132
133static void rt6_uncached_list_del(struct rt6_info *rt)
134{
135 if (!list_empty(&rt->rt6i_uncached)) {
136 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138 spin_lock_bh(&ul->lock);
139 list_del(&rt->rt6i_uncached);
140 spin_unlock_bh(&ul->lock);
141 }
142}
143
144static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145{
146 struct net_device *loopback_dev = net->loopback_dev;
147 int cpu;
148
e332bc67
EB
149 if (dev == loopback_dev)
150 return;
151
8d0b94af
MKL
152 for_each_possible_cpu(cpu) {
153 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 struct rt6_info *rt;
155
156 spin_lock_bh(&ul->lock);
157 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 struct inet6_dev *rt_idev = rt->rt6i_idev;
159 struct net_device *rt_dev = rt->dst.dev;
160
e332bc67 161 if (rt_idev->dev == dev) {
8d0b94af
MKL
162 rt->rt6i_idev = in6_dev_get(loopback_dev);
163 in6_dev_put(rt_idev);
164 }
165
e332bc67 166 if (rt_dev == dev) {
8d0b94af
MKL
167 rt->dst.dev = loopback_dev;
168 dev_hold(rt->dst.dev);
169 dev_put(rt_dev);
170 }
171 }
172 spin_unlock_bh(&ul->lock);
173 }
174}
175
d52d3997
MKL
176static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177{
178 return dst_metrics_write_ptr(rt->dst.from);
179}
180
06582540
DM
181static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182{
4b32b5ad 183 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 184
d52d3997
MKL
185 if (rt->rt6i_flags & RTF_PCPU)
186 return rt6_pcpu_cow_metrics(rt);
187 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
188 return NULL;
189 else
3b471175 190 return dst_cow_metrics_generic(dst, old);
06582540
DM
191}
192
f894cbf8
DM
193static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 struct sk_buff *skb,
195 const void *daddr)
39232973
DM
196{
197 struct in6_addr *p = &rt->rt6i_gateway;
198
a7563f34 199 if (!ipv6_addr_any(p))
39232973 200 return (const void *) p;
f894cbf8
DM
201 else if (skb)
202 return &ipv6_hdr(skb)->daddr;
39232973
DM
203 return daddr;
204}
205
f894cbf8
DM
206static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 struct sk_buff *skb,
208 const void *daddr)
d3aaeb38 209{
39232973
DM
210 struct rt6_info *rt = (struct rt6_info *) dst;
211 struct neighbour *n;
212
f894cbf8 213 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 214 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
215 if (n)
216 return n;
217 return neigh_create(&nd_tbl, daddr, dst->dev);
218}
219
111757e7
JA
220static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
221{
222 struct net_device *dev = dst->dev;
223 struct rt6_info *rt = (struct rt6_info *)dst;
224
225 daddr = choose_neigh_daddr(rt, NULL, daddr);
226 if (!daddr)
227 return;
228 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
229 return;
230 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
231 return;
232 __ipv6_confirm_neigh(dev, daddr);
233}
234
9a7ec3a9 235static struct dst_ops ip6_dst_ops_template = {
1da177e4 236 .family = AF_INET6,
1da177e4
LT
237 .gc = ip6_dst_gc,
238 .gc_thresh = 1024,
239 .check = ip6_dst_check,
0dbaee3b 240 .default_advmss = ip6_default_advmss,
ebb762f2 241 .mtu = ip6_mtu,
06582540 242 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
243 .destroy = ip6_dst_destroy,
244 .ifdown = ip6_dst_ifdown,
245 .negative_advice = ip6_negative_advice,
246 .link_failure = ip6_link_failure,
247 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 248 .redirect = rt6_do_redirect,
9f8955cc 249 .local_out = __ip6_local_out,
d3aaeb38 250 .neigh_lookup = ip6_neigh_lookup,
111757e7 251 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
252};
253
ebb762f2 254static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 255{
618f9bc7
SK
256 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
257
258 return mtu ? : dst->dev->mtu;
ec831ea7
RD
259}
260
6700c270
DM
261static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
262 struct sk_buff *skb, u32 mtu)
14e50e57
DM
263{
264}
265
6700c270
DM
266static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb)
b587ee3b
DM
268{
269}
270
14e50e57
DM
271static struct dst_ops ip6_dst_blackhole_ops = {
272 .family = AF_INET6,
14e50e57
DM
273 .destroy = ip6_dst_destroy,
274 .check = ip6_dst_check,
ebb762f2 275 .mtu = ip6_blackhole_mtu,
214f45c9 276 .default_advmss = ip6_default_advmss,
14e50e57 277 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 278 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 279 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 280 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
281};
282
62fa8a84 283static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 284 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
285};
286
fb0af4c7 287static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
2c20cbd7 291 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 292 .error = -ENETUNREACH,
d8d1f30b
CG
293 .input = ip6_pkt_discard,
294 .output = ip6_pkt_discard_out,
1da177e4
LT
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 297 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300};
301
101367c2
TG
302#ifdef CONFIG_IPV6_MULTIPLE_TABLES
303
fb0af4c7 304static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
2c20cbd7 308 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 309 .error = -EACCES,
d8d1f30b
CG
310 .input = ip6_pkt_prohibit,
311 .output = ip6_pkt_prohibit_out,
101367c2
TG
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 314 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
315 .rt6i_metric = ~(u32) 0,
316 .rt6i_ref = ATOMIC_INIT(1),
317};
318
fb0af4c7 319static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
320 .dst = {
321 .__refcnt = ATOMIC_INIT(1),
322 .__use = 1,
2c20cbd7 323 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 324 .error = -EINVAL,
d8d1f30b 325 .input = dst_discard,
ede2059d 326 .output = dst_discard_out,
101367c2
TG
327 },
328 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 329 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
330 .rt6i_metric = ~(u32) 0,
331 .rt6i_ref = ATOMIC_INIT(1),
332};
333
334#endif
335
ebfa45f0
MKL
336static void rt6_info_init(struct rt6_info *rt)
337{
338 struct dst_entry *dst = &rt->dst;
339
340 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
341 INIT_LIST_HEAD(&rt->rt6i_siblings);
342 INIT_LIST_HEAD(&rt->rt6i_uncached);
343}
344
1da177e4 345/* allocate dst with ip6_dst_ops */
d52d3997
MKL
346static struct rt6_info *__ip6_dst_alloc(struct net *net,
347 struct net_device *dev,
ad706862 348 int flags)
1da177e4 349{
97bab73f 350 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 351 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 352
ebfa45f0
MKL
353 if (rt)
354 rt6_info_init(rt);
8104891b 355
cf911662 356 return rt;
1da177e4
LT
357}
358
9ab179d8
DA
359struct rt6_info *ip6_dst_alloc(struct net *net,
360 struct net_device *dev,
361 int flags)
d52d3997 362{
ad706862 363 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
364
365 if (rt) {
366 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
367 if (rt->rt6i_pcpu) {
368 int cpu;
369
370 for_each_possible_cpu(cpu) {
371 struct rt6_info **p;
372
373 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
374 /* no one shares rt */
375 *p = NULL;
376 }
377 } else {
378 dst_destroy((struct dst_entry *)rt);
379 return NULL;
380 }
381 }
382
383 return rt;
384}
9ab179d8 385EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 386
1da177e4
LT
387static void ip6_dst_destroy(struct dst_entry *dst)
388{
389 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 390 struct dst_entry *from = dst->from;
8d0b94af 391 struct inet6_dev *idev;
1da177e4 392
4b32b5ad 393 dst_destroy_metrics_generic(dst);
87775312 394 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
395 rt6_uncached_list_del(rt);
396
397 idev = rt->rt6i_idev;
38308473 398 if (idev) {
1da177e4
LT
399 rt->rt6i_idev = NULL;
400 in6_dev_put(idev);
1ab1457c 401 }
1716a961 402
ecd98837
YH
403 dst->from = NULL;
404 dst_release(from);
b3419363
DM
405}
406
1da177e4
LT
407static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
408 int how)
409{
410 struct rt6_info *rt = (struct rt6_info *)dst;
411 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 412 struct net_device *loopback_dev =
c346dca1 413 dev_net(dev)->loopback_dev;
1da177e4 414
97cac082
DM
415 if (dev != loopback_dev) {
416 if (idev && idev->dev == dev) {
417 struct inet6_dev *loopback_idev =
418 in6_dev_get(loopback_dev);
419 if (loopback_idev) {
420 rt->rt6i_idev = loopback_idev;
421 in6_dev_put(idev);
422 }
423 }
1da177e4
LT
424 }
425}
426
5973fb1e
MKL
427static bool __rt6_check_expired(const struct rt6_info *rt)
428{
429 if (rt->rt6i_flags & RTF_EXPIRES)
430 return time_after(jiffies, rt->dst.expires);
431 else
432 return false;
433}
434
a50feda5 435static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 436{
1716a961
G
437 if (rt->rt6i_flags & RTF_EXPIRES) {
438 if (time_after(jiffies, rt->dst.expires))
a50feda5 439 return true;
1716a961 440 } else if (rt->dst.from) {
3fd91fb3 441 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 442 }
a50feda5 443 return false;
1da177e4
LT
444}
445
51ebd318
ND
446/* Multipath route selection:
447 * Hash based function using packet header and flowlabel.
448 * Adapted from fib_info_hashfn()
449 */
450static int rt6_info_hash_nhsfn(unsigned int candidate_count,
451 const struct flowi6 *fl6)
452{
644d0e65 453 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
454}
455
456static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
457 struct flowi6 *fl6, int oif,
458 int strict)
51ebd318
ND
459{
460 struct rt6_info *sibling, *next_sibling;
461 int route_choosen;
462
463 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
464 /* Don't change the route, if route_choosen == 0
465 * (siblings does not include ourself)
466 */
467 if (route_choosen)
468 list_for_each_entry_safe(sibling, next_sibling,
469 &match->rt6i_siblings, rt6i_siblings) {
470 route_choosen--;
471 if (route_choosen == 0) {
52bd4c0c
ND
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
51ebd318
ND
474 match = sibling;
475 break;
476 }
477 }
478 return match;
479}
480
1da177e4 481/*
c71099ac 482 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
483 */
484
8ed67789
DL
485static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
b71d1d42 487 const struct in6_addr *saddr,
1da177e4 488 int oif,
d420895e 489 int flags)
1da177e4
LT
490{
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
dd3abc4e
YH
494 if (!oif && ipv6_addr_any(saddr))
495 goto out;
496
d8d1f30b 497 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 498 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
499
500 if (oif) {
1da177e4
LT
501 if (dev->ifindex == oif)
502 return sprt;
503 if (dev->flags & IFF_LOOPBACK) {
38308473 504 if (!sprt->rt6i_idev ||
1da177e4 505 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 506 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 507 continue;
17fb0b2b
DA
508 if (local &&
509 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
510 continue;
511 }
512 local = sprt;
513 }
dd3abc4e
YH
514 } else {
515 if (ipv6_chk_addr(net, saddr, dev,
516 flags & RT6_LOOKUP_F_IFACE))
517 return sprt;
1da177e4 518 }
dd3abc4e 519 }
1da177e4 520
dd3abc4e 521 if (oif) {
1da177e4
LT
522 if (local)
523 return local;
524
d420895e 525 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 526 return net->ipv6.ip6_null_entry;
1da177e4 527 }
dd3abc4e 528out:
1da177e4
LT
529 return rt;
530}
531
27097255 532#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
533struct __rt6_probe_work {
534 struct work_struct work;
535 struct in6_addr target;
536 struct net_device *dev;
537};
538
539static void rt6_probe_deferred(struct work_struct *w)
540{
541 struct in6_addr mcaddr;
542 struct __rt6_probe_work *work =
543 container_of(w, struct __rt6_probe_work, work);
544
545 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 546 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 547 dev_put(work->dev);
662f5533 548 kfree(work);
c2f17e82
HFS
549}
550
27097255
YH
551static void rt6_probe(struct rt6_info *rt)
552{
990edb42 553 struct __rt6_probe_work *work;
f2c31e32 554 struct neighbour *neigh;
27097255
YH
555 /*
556 * Okay, this does not seem to be appropriate
557 * for now, however, we need to check if it
558 * is really so; aka Router Reachability Probing.
559 *
560 * Router Reachability Probe MUST be rate-limited
561 * to no more than one per minute.
562 */
2152caea 563 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 564 return;
2152caea
YH
565 rcu_read_lock_bh();
566 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
567 if (neigh) {
8d6c31bf
MKL
568 if (neigh->nud_state & NUD_VALID)
569 goto out;
570
990edb42 571 work = NULL;
2152caea 572 write_lock(&neigh->lock);
990edb42
MKL
573 if (!(neigh->nud_state & NUD_VALID) &&
574 time_after(jiffies,
575 neigh->updated +
576 rt->rt6i_idev->cnf.rtr_probe_interval)) {
577 work = kmalloc(sizeof(*work), GFP_ATOMIC);
578 if (work)
579 __neigh_set_probe_once(neigh);
c2f17e82 580 }
2152caea 581 write_unlock(&neigh->lock);
990edb42
MKL
582 } else {
583 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 584 }
990edb42
MKL
585
586 if (work) {
587 INIT_WORK(&work->work, rt6_probe_deferred);
588 work->target = rt->rt6i_gateway;
589 dev_hold(rt->dst.dev);
590 work->dev = rt->dst.dev;
591 schedule_work(&work->work);
592 }
593
8d6c31bf 594out:
2152caea 595 rcu_read_unlock_bh();
27097255
YH
596}
597#else
598static inline void rt6_probe(struct rt6_info *rt)
599{
27097255
YH
600}
601#endif
602
1da177e4 603/*
554cfb7e 604 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 605 */
b6f99a21 606static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 607{
d1918542 608 struct net_device *dev = rt->dst.dev;
161980f4 609 if (!oif || dev->ifindex == oif)
554cfb7e 610 return 2;
161980f4
DM
611 if ((dev->flags & IFF_LOOPBACK) &&
612 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
613 return 1;
614 return 0;
554cfb7e 615}
1da177e4 616
afc154e9 617static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 618{
f2c31e32 619 struct neighbour *neigh;
afc154e9 620 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 621
4d0c5911
YH
622 if (rt->rt6i_flags & RTF_NONEXTHOP ||
623 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 624 return RT6_NUD_SUCCEED;
145a3621
YH
625
626 rcu_read_lock_bh();
627 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
628 if (neigh) {
629 read_lock(&neigh->lock);
554cfb7e 630 if (neigh->nud_state & NUD_VALID)
afc154e9 631 ret = RT6_NUD_SUCCEED;
398bcbeb 632#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 633 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 634 ret = RT6_NUD_SUCCEED;
7e980569
JB
635 else
636 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 637#endif
145a3621 638 read_unlock(&neigh->lock);
afc154e9
HFS
639 } else {
640 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 641 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 642 }
145a3621
YH
643 rcu_read_unlock_bh();
644
a5a81f0b 645 return ret;
1da177e4
LT
646}
647
554cfb7e
YH
648static int rt6_score_route(struct rt6_info *rt, int oif,
649 int strict)
1da177e4 650{
a5a81f0b 651 int m;
1ab1457c 652
4d0c5911 653 m = rt6_check_dev(rt, oif);
77d16f45 654 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 655 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
656#ifdef CONFIG_IPV6_ROUTER_PREF
657 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
658#endif
afc154e9
HFS
659 if (strict & RT6_LOOKUP_F_REACHABLE) {
660 int n = rt6_check_neigh(rt);
661 if (n < 0)
662 return n;
663 }
554cfb7e
YH
664 return m;
665}
666
f11e6659 667static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
668 int *mpri, struct rt6_info *match,
669 bool *do_rr)
554cfb7e 670{
f11e6659 671 int m;
afc154e9 672 bool match_do_rr = false;
35103d11
AG
673 struct inet6_dev *idev = rt->rt6i_idev;
674 struct net_device *dev = rt->dst.dev;
675
676 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
677 idev->cnf.ignore_routes_with_linkdown &&
678 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 679 goto out;
f11e6659
DM
680
681 if (rt6_check_expired(rt))
682 goto out;
683
684 m = rt6_score_route(rt, oif, strict);
7e980569 685 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
686 match_do_rr = true;
687 m = 0; /* lowest valid score */
7e980569 688 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 689 goto out;
afc154e9
HFS
690 }
691
692 if (strict & RT6_LOOKUP_F_REACHABLE)
693 rt6_probe(rt);
f11e6659 694
7e980569 695 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 696 if (m > *mpri) {
afc154e9 697 *do_rr = match_do_rr;
f11e6659
DM
698 *mpri = m;
699 match = rt;
f11e6659 700 }
f11e6659
DM
701out:
702 return match;
703}
704
705static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
706 struct rt6_info *rr_head,
afc154e9
HFS
707 u32 metric, int oif, int strict,
708 bool *do_rr)
f11e6659 709{
9fbdcfaf 710 struct rt6_info *rt, *match, *cont;
554cfb7e 711 int mpri = -1;
1da177e4 712
f11e6659 713 match = NULL;
9fbdcfaf
SK
714 cont = NULL;
715 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
716 if (rt->rt6i_metric != metric) {
717 cont = rt;
718 break;
719 }
720
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 }
723
724 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
afc154e9 730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
731 }
732
733 if (match || !cont)
734 return match;
735
736 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 738
f11e6659
DM
739 return match;
740}
1da177e4 741
f11e6659
DM
742static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
743{
744 struct rt6_info *match, *rt0;
8ed67789 745 struct net *net;
afc154e9 746 bool do_rr = false;
1da177e4 747
f11e6659
DM
748 rt0 = fn->rr_ptr;
749 if (!rt0)
750 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 751
afc154e9
HFS
752 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
753 &do_rr);
1da177e4 754
afc154e9 755 if (do_rr) {
d8d1f30b 756 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 757
554cfb7e 758 /* no entries matched; do round-robin */
f11e6659
DM
759 if (!next || next->rt6i_metric != rt0->rt6i_metric)
760 next = fn->leaf;
761
762 if (next != rt0)
763 fn->rr_ptr = next;
1da177e4 764 }
1da177e4 765
d1918542 766 net = dev_net(rt0->dst.dev);
a02cec21 767 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
768}
769
8b9df265
MKL
770static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
771{
772 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
773}
774
70ceb4f5
YH
775#ifdef CONFIG_IPV6_ROUTE_INFO
776int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 777 const struct in6_addr *gwaddr)
70ceb4f5 778{
c346dca1 779 struct net *net = dev_net(dev);
70ceb4f5
YH
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
4bed72e4 783 unsigned long lifetime;
70ceb4f5
YH
784 struct rt6_info *rt;
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 807 return -EINVAL;
70ceb4f5 808
4bed72e4 809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
f104a567
DJ
821 if (rinfo->prefix_len == 0)
822 rt = rt6_get_dflt_router(gwaddr, dev);
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 825 gwaddr, dev);
70ceb4f5
YH
826
827 if (rt && !lifetime) {
e0a1ad73 828 ip6_del_rt(rt);
70ceb4f5
YH
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
830218c1
DA
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834 dev, pref);
70ceb4f5
YH
835 else if (rt)
836 rt->rt6i_flags = RTF_ROUTEINFO |
837 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
838
839 if (rt) {
1716a961
G
840 if (!addrconf_finite_timeout(lifetime))
841 rt6_clean_expires(rt);
842 else
843 rt6_set_expires(rt, jiffies + HZ * lifetime);
844
94e187c0 845 ip6_rt_put(rt);
70ceb4f5
YH
846 }
847 return 0;
848}
849#endif
850
a3c00e46
MKL
851static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
852 struct in6_addr *saddr)
853{
854 struct fib6_node *pn;
855 while (1) {
856 if (fn->fn_flags & RTN_TL_ROOT)
857 return NULL;
858 pn = fn->parent;
859 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
860 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
861 else
862 fn = pn;
863 if (fn->fn_flags & RTN_RTINFO)
864 return fn;
865 }
866}
c71099ac 867
8ed67789
DL
868static struct rt6_info *ip6_pol_route_lookup(struct net *net,
869 struct fib6_table *table,
4c9483b2 870 struct flowi6 *fl6, int flags)
1da177e4
LT
871{
872 struct fib6_node *fn;
873 struct rt6_info *rt;
874
c71099ac 875 read_lock_bh(&table->tb6_lock);
4c9483b2 876 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
877restart:
878 rt = fn->leaf;
4c9483b2 879 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 880 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 881 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
882 if (rt == net->ipv6.ip6_null_entry) {
883 fn = fib6_backtrack(fn, &fl6->saddr);
884 if (fn)
885 goto restart;
886 }
d8d1f30b 887 dst_use(&rt->dst, jiffies);
c71099ac 888 read_unlock_bh(&table->tb6_lock);
b811580d
DA
889
890 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
891
c71099ac
TG
892 return rt;
893
894}
895
67ba4152 896struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
897 int flags)
898{
899 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
900}
901EXPORT_SYMBOL_GPL(ip6_route_lookup);
902
9acd9f3a
YH
903struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
904 const struct in6_addr *saddr, int oif, int strict)
c71099ac 905{
4c9483b2
DM
906 struct flowi6 fl6 = {
907 .flowi6_oif = oif,
908 .daddr = *daddr,
c71099ac
TG
909 };
910 struct dst_entry *dst;
77d16f45 911 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 912
adaa70bb 913 if (saddr) {
4c9483b2 914 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
915 flags |= RT6_LOOKUP_F_HAS_SADDR;
916 }
917
4c9483b2 918 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
919 if (dst->error == 0)
920 return (struct rt6_info *) dst;
921
922 dst_release(dst);
923
1da177e4
LT
924 return NULL;
925}
7159039a
YH
926EXPORT_SYMBOL(rt6_lookup);
927
c71099ac 928/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
929 It takes new route entry, the addition fails by any reason the
930 route is freed. In any case, if caller does not hold it, it may
931 be destroyed.
932 */
933
e5fd387a 934static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 935 struct mx6_config *mxc)
1da177e4
LT
936{
937 int err;
c71099ac 938 struct fib6_table *table;
1da177e4 939
c71099ac
TG
940 table = rt->rt6i_table;
941 write_lock_bh(&table->tb6_lock);
e715b6d3 942 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 943 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
944
945 return err;
946}
947
40e22e8f
TG
948int ip6_ins_rt(struct rt6_info *rt)
949{
e715b6d3
FW
950 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
951 struct mx6_config mxc = { .mx = NULL, };
952
953 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
954}
955
8b9df265
MKL
956static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
957 const struct in6_addr *daddr,
958 const struct in6_addr *saddr)
1da177e4 959{
1da177e4
LT
960 struct rt6_info *rt;
961
962 /*
963 * Clone the route.
964 */
965
d52d3997 966 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 967 ort = (struct rt6_info *)ort->dst.from;
1da177e4 968
ad706862 969 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
970
971 if (!rt)
972 return NULL;
973
974 ip6_rt_copy_init(rt, ort);
975 rt->rt6i_flags |= RTF_CACHE;
976 rt->rt6i_metric = 0;
977 rt->dst.flags |= DST_HOST;
978 rt->rt6i_dst.addr = *daddr;
979 rt->rt6i_dst.plen = 128;
1da177e4 980
83a09abd
MKL
981 if (!rt6_is_gw_or_nonexthop(ort)) {
982 if (ort->rt6i_dst.plen != 128 &&
983 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
984 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 985#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
986 if (rt->rt6i_src.plen && saddr) {
987 rt->rt6i_src.addr = *saddr;
988 rt->rt6i_src.plen = 128;
8b9df265 989 }
83a09abd 990#endif
95a9a5ba 991 }
1da177e4 992
95a9a5ba
YH
993 return rt;
994}
1da177e4 995
d52d3997
MKL
996static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
997{
998 struct rt6_info *pcpu_rt;
999
1000 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 1001 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
1002
1003 if (!pcpu_rt)
1004 return NULL;
1005 ip6_rt_copy_init(pcpu_rt, rt);
1006 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1007 pcpu_rt->rt6i_flags |= RTF_PCPU;
1008 return pcpu_rt;
1009}
1010
1011/* It should be called with read_lock_bh(&tb6_lock) acquired */
1012static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1013{
a73e4195 1014 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1015
1016 p = this_cpu_ptr(rt->rt6i_pcpu);
1017 pcpu_rt = *p;
1018
a73e4195
MKL
1019 if (pcpu_rt) {
1020 dst_hold(&pcpu_rt->dst);
1021 rt6_dst_from_metrics_check(pcpu_rt);
1022 }
1023 return pcpu_rt;
1024}
1025
1026static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1027{
9c7370a1 1028 struct fib6_table *table = rt->rt6i_table;
a73e4195 1029 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1030
1031 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1032 if (!pcpu_rt) {
1033 struct net *net = dev_net(rt->dst.dev);
1034
9c7370a1
MKL
1035 dst_hold(&net->ipv6.ip6_null_entry->dst);
1036 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1037 }
1038
9c7370a1
MKL
1039 read_lock_bh(&table->tb6_lock);
1040 if (rt->rt6i_pcpu) {
1041 p = this_cpu_ptr(rt->rt6i_pcpu);
1042 prev = cmpxchg(p, NULL, pcpu_rt);
1043 if (prev) {
1044 /* If someone did it before us, return prev instead */
1045 dst_destroy(&pcpu_rt->dst);
1046 pcpu_rt = prev;
1047 }
1048 } else {
1049 /* rt has been removed from the fib6 tree
1050 * before we have a chance to acquire the read_lock.
1051 * In this case, don't brother to create a pcpu rt
1052 * since rt is going away anyway. The next
1053 * dst_check() will trigger a re-lookup.
1054 */
d52d3997 1055 dst_destroy(&pcpu_rt->dst);
9c7370a1 1056 pcpu_rt = rt;
d52d3997 1057 }
d52d3997
MKL
1058 dst_hold(&pcpu_rt->dst);
1059 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1060 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1061 return pcpu_rt;
1062}
1063
9ff74384
DA
1064struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1065 int oif, struct flowi6 *fl6, int flags)
1da177e4 1066{
367efcb9 1067 struct fib6_node *fn, *saved_fn;
45e4fd26 1068 struct rt6_info *rt;
c71099ac 1069 int strict = 0;
1da177e4 1070
77d16f45 1071 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1072 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1073 if (net->ipv6.devconf_all->forwarding == 0)
1074 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1075
c71099ac 1076 read_lock_bh(&table->tb6_lock);
1da177e4 1077
4c9483b2 1078 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1079 saved_fn = fn;
1da177e4 1080
ca254490
DA
1081 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1082 oif = 0;
1083
a3c00e46 1084redo_rt6_select:
367efcb9 1085 rt = rt6_select(fn, oif, strict);
52bd4c0c 1086 if (rt->rt6i_nsiblings)
367efcb9 1087 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1088 if (rt == net->ipv6.ip6_null_entry) {
1089 fn = fib6_backtrack(fn, &fl6->saddr);
1090 if (fn)
1091 goto redo_rt6_select;
367efcb9
MKL
1092 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1093 /* also consider unreachable route */
1094 strict &= ~RT6_LOOKUP_F_REACHABLE;
1095 fn = saved_fn;
1096 goto redo_rt6_select;
367efcb9 1097 }
a3c00e46
MKL
1098 }
1099
fb9de91e 1100
3da59bd9 1101 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1102 dst_use(&rt->dst, jiffies);
1103 read_unlock_bh(&table->tb6_lock);
1104
1105 rt6_dst_from_metrics_check(rt);
b811580d
DA
1106
1107 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1108 return rt;
3da59bd9
MKL
1109 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1110 !(rt->rt6i_flags & RTF_GATEWAY))) {
1111 /* Create a RTF_CACHE clone which will not be
1112 * owned by the fib6 tree. It is for the special case where
1113 * the daddr in the skb during the neighbor look-up is different
1114 * from the fl6->daddr used to look-up route here.
1115 */
1116
1117 struct rt6_info *uncached_rt;
1118
d52d3997
MKL
1119 dst_use(&rt->dst, jiffies);
1120 read_unlock_bh(&table->tb6_lock);
1121
3da59bd9
MKL
1122 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1123 dst_release(&rt->dst);
c71099ac 1124
3da59bd9 1125 if (uncached_rt)
8d0b94af 1126 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1127 else
1128 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1129
3da59bd9 1130 dst_hold(&uncached_rt->dst);
b811580d
DA
1131
1132 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1133 return uncached_rt;
3da59bd9 1134
d52d3997
MKL
1135 } else {
1136 /* Get a percpu copy */
1137
1138 struct rt6_info *pcpu_rt;
1139
1140 rt->dst.lastuse = jiffies;
1141 rt->dst.__use++;
1142 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1143
9c7370a1
MKL
1144 if (pcpu_rt) {
1145 read_unlock_bh(&table->tb6_lock);
1146 } else {
1147 /* We have to do the read_unlock first
1148 * because rt6_make_pcpu_route() may trigger
1149 * ip6_dst_gc() which will take the write_lock.
1150 */
1151 dst_hold(&rt->dst);
1152 read_unlock_bh(&table->tb6_lock);
a73e4195 1153 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1154 dst_release(&rt->dst);
1155 }
d52d3997 1156
b811580d 1157 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1158 return pcpu_rt;
9c7370a1 1159
d52d3997 1160 }
1da177e4 1161}
9ff74384 1162EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1163
8ed67789 1164static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1165 struct flowi6 *fl6, int flags)
4acad72d 1166{
4c9483b2 1167 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1168}
1169
d409b847
MB
1170struct dst_entry *ip6_route_input_lookup(struct net *net,
1171 struct net_device *dev,
1172 struct flowi6 *fl6, int flags)
72331bc0
SL
1173{
1174 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1175 flags |= RT6_LOOKUP_F_IFACE;
1176
1177 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1178}
d409b847 1179EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1180
c71099ac
TG
1181void ip6_route_input(struct sk_buff *skb)
1182{
b71d1d42 1183 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1184 struct net *net = dev_net(skb->dev);
adaa70bb 1185 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1186 struct ip_tunnel_info *tun_info;
4c9483b2 1187 struct flowi6 fl6 = {
e0d56fdd 1188 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1189 .daddr = iph->daddr,
1190 .saddr = iph->saddr,
6502ca52 1191 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1192 .flowi6_mark = skb->mark,
1193 .flowi6_proto = iph->nexthdr,
c71099ac 1194 };
adaa70bb 1195
904af04d 1196 tun_info = skb_tunnel_info(skb);
46fa062a 1197 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1198 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1199 skb_dst_drop(skb);
72331bc0 1200 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1201}
1202
8ed67789 1203static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1204 struct flowi6 *fl6, int flags)
1da177e4 1205{
4c9483b2 1206 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1207}
1208
6f21c96a
PA
1209struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1210 struct flowi6 *fl6, int flags)
c71099ac 1211{
d46a9d67 1212 bool any_src;
c71099ac 1213
4c1feac5
DA
1214 if (rt6_need_strict(&fl6->daddr)) {
1215 struct dst_entry *dst;
1216
1217 dst = l3mdev_link_scope_lookup(net, fl6);
1218 if (dst)
1219 return dst;
1220 }
ca254490 1221
1fb9489b 1222 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1223
d46a9d67 1224 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1225 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1226 (fl6->flowi6_oif && any_src))
77d16f45 1227 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1228
d46a9d67 1229 if (!any_src)
adaa70bb 1230 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1231 else if (sk)
1232 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1233
4c9483b2 1234 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1235}
6f21c96a 1236EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1237
2774c131 1238struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1239{
5c1e6aa3 1240 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1241 struct dst_entry *new = NULL;
1242
f5b0a874 1243 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1244 if (rt) {
0a1f5962 1245 rt6_info_init(rt);
8104891b 1246
0a1f5962 1247 new = &rt->dst;
14e50e57 1248 new->__use = 1;
352e512c 1249 new->input = dst_discard;
ede2059d 1250 new->output = dst_discard_out;
14e50e57 1251
0a1f5962 1252 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1253 rt->rt6i_idev = ort->rt6i_idev;
1254 if (rt->rt6i_idev)
1255 in6_dev_hold(rt->rt6i_idev);
14e50e57 1256
4e3fd7a0 1257 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1258 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1259 rt->rt6i_metric = 0;
1260
1261 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1262#ifdef CONFIG_IPV6_SUBTREES
1263 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1264#endif
1265
1266 dst_free(new);
1267 }
1268
69ead7af
DM
1269 dst_release(dst_orig);
1270 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1271}
14e50e57 1272
1da177e4
LT
1273/*
1274 * Destination cache support functions
1275 */
1276
4b32b5ad
MKL
1277static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1278{
1279 if (rt->dst.from &&
1280 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1281 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1282}
1283
3da59bd9
MKL
1284static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1285{
1286 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1287 return NULL;
1288
1289 if (rt6_check_expired(rt))
1290 return NULL;
1291
1292 return &rt->dst;
1293}
1294
1295static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1296{
5973fb1e
MKL
1297 if (!__rt6_check_expired(rt) &&
1298 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1299 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1300 return &rt->dst;
1301 else
1302 return NULL;
1303}
1304
1da177e4
LT
1305static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1306{
1307 struct rt6_info *rt;
1308
1309 rt = (struct rt6_info *) dst;
1310
6f3118b5
ND
1311 /* All IPV6 dsts are created with ->obsolete set to the value
1312 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1313 * into this function always.
1314 */
e3bc10bd 1315
4b32b5ad
MKL
1316 rt6_dst_from_metrics_check(rt);
1317
02bcf4e0
MKL
1318 if (rt->rt6i_flags & RTF_PCPU ||
1319 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1320 return rt6_dst_from_check(rt, cookie);
1321 else
1322 return rt6_check(rt, cookie);
1da177e4
LT
1323}
1324
1325static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1326{
1327 struct rt6_info *rt = (struct rt6_info *) dst;
1328
1329 if (rt) {
54c1a859
YH
1330 if (rt->rt6i_flags & RTF_CACHE) {
1331 if (rt6_check_expired(rt)) {
1332 ip6_del_rt(rt);
1333 dst = NULL;
1334 }
1335 } else {
1da177e4 1336 dst_release(dst);
54c1a859
YH
1337 dst = NULL;
1338 }
1da177e4 1339 }
54c1a859 1340 return dst;
1da177e4
LT
1341}
1342
1343static void ip6_link_failure(struct sk_buff *skb)
1344{
1345 struct rt6_info *rt;
1346
3ffe533c 1347 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1348
adf30907 1349 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1350 if (rt) {
1eb4f758
HFS
1351 if (rt->rt6i_flags & RTF_CACHE) {
1352 dst_hold(&rt->dst);
8e3d5be7 1353 ip6_del_rt(rt);
1eb4f758 1354 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1355 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1356 }
1da177e4
LT
1357 }
1358}
1359
45e4fd26
MKL
1360static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1361{
1362 struct net *net = dev_net(rt->dst.dev);
1363
1364 rt->rt6i_flags |= RTF_MODIFIED;
1365 rt->rt6i_pmtu = mtu;
1366 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1367}
1368
0d3f6d29
MKL
1369static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1370{
1371 return !(rt->rt6i_flags & RTF_CACHE) &&
1372 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1373}
1374
45e4fd26
MKL
1375static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1376 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1377{
67ba4152 1378 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1379
45e4fd26
MKL
1380 if (rt6->rt6i_flags & RTF_LOCAL)
1381 return;
81aded24 1382
19bda36c
XL
1383 if (dst_metric_locked(dst, RTAX_MTU))
1384 return;
1385
45e4fd26
MKL
1386 dst_confirm(dst);
1387 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1388 if (mtu >= dst_mtu(dst))
1389 return;
9d289715 1390
0d3f6d29 1391 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1392 rt6_do_update_pmtu(rt6, mtu);
1393 } else {
1394 const struct in6_addr *daddr, *saddr;
1395 struct rt6_info *nrt6;
1396
1397 if (iph) {
1398 daddr = &iph->daddr;
1399 saddr = &iph->saddr;
1400 } else if (sk) {
1401 daddr = &sk->sk_v6_daddr;
1402 saddr = &inet6_sk(sk)->saddr;
1403 } else {
1404 return;
1405 }
1406 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1407 if (nrt6) {
1408 rt6_do_update_pmtu(nrt6, mtu);
1409
1410 /* ip6_ins_rt(nrt6) will bump the
1411 * rt6->rt6i_node->fn_sernum
1412 * which will fail the next rt6_check() and
1413 * invalidate the sk->sk_dst_cache.
1414 */
1415 ip6_ins_rt(nrt6);
1416 }
1da177e4
LT
1417 }
1418}
1419
45e4fd26
MKL
1420static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1421 struct sk_buff *skb, u32 mtu)
1422{
1423 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1424}
1425
42ae66c8 1426void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1427 int oif, u32 mark, kuid_t uid)
81aded24
DM
1428{
1429 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1430 struct dst_entry *dst;
1431 struct flowi6 fl6;
1432
1433 memset(&fl6, 0, sizeof(fl6));
1434 fl6.flowi6_oif = oif;
1b3c61dc 1435 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1436 fl6.daddr = iph->daddr;
1437 fl6.saddr = iph->saddr;
6502ca52 1438 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1439 fl6.flowi6_uid = uid;
81aded24
DM
1440
1441 dst = ip6_route_output(net, NULL, &fl6);
1442 if (!dst->error)
45e4fd26 1443 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1444 dst_release(dst);
1445}
1446EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1447
1448void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1449{
33c162a9
MKL
1450 struct dst_entry *dst;
1451
81aded24 1452 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1453 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1454
1455 dst = __sk_dst_get(sk);
1456 if (!dst || !dst->obsolete ||
1457 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1458 return;
1459
1460 bh_lock_sock(sk);
1461 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1462 ip6_datagram_dst_update(sk, false);
1463 bh_unlock_sock(sk);
81aded24
DM
1464}
1465EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1466
b55b76b2
DJ
1467/* Handle redirects */
1468struct ip6rd_flowi {
1469 struct flowi6 fl6;
1470 struct in6_addr gateway;
1471};
1472
1473static struct rt6_info *__ip6_route_redirect(struct net *net,
1474 struct fib6_table *table,
1475 struct flowi6 *fl6,
1476 int flags)
1477{
1478 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1479 struct rt6_info *rt;
1480 struct fib6_node *fn;
1481
1482 /* Get the "current" route for this destination and
67c408cf 1483 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1484 *
1485 * RFC 4861 specifies that redirects should only be
1486 * accepted if they come from the nexthop to the target.
1487 * Due to the way the routes are chosen, this notion
1488 * is a bit fuzzy and one might need to check all possible
1489 * routes.
1490 */
1491
1492 read_lock_bh(&table->tb6_lock);
1493 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1494restart:
1495 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1496 if (rt6_check_expired(rt))
1497 continue;
1498 if (rt->dst.error)
1499 break;
1500 if (!(rt->rt6i_flags & RTF_GATEWAY))
1501 continue;
1502 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1503 continue;
1504 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1505 continue;
1506 break;
1507 }
1508
1509 if (!rt)
1510 rt = net->ipv6.ip6_null_entry;
1511 else if (rt->dst.error) {
1512 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1513 goto out;
1514 }
1515
1516 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1517 fn = fib6_backtrack(fn, &fl6->saddr);
1518 if (fn)
1519 goto restart;
b55b76b2 1520 }
a3c00e46 1521
b0a1ba59 1522out:
b55b76b2
DJ
1523 dst_hold(&rt->dst);
1524
1525 read_unlock_bh(&table->tb6_lock);
1526
b811580d 1527 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1528 return rt;
1529};
1530
1531static struct dst_entry *ip6_route_redirect(struct net *net,
1532 const struct flowi6 *fl6,
1533 const struct in6_addr *gateway)
1534{
1535 int flags = RT6_LOOKUP_F_HAS_SADDR;
1536 struct ip6rd_flowi rdfl;
1537
1538 rdfl.fl6 = *fl6;
1539 rdfl.gateway = *gateway;
1540
1541 return fib6_rule_lookup(net, &rdfl.fl6,
1542 flags, __ip6_route_redirect);
1543}
1544
e2d118a1
LC
1545void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1546 kuid_t uid)
3a5ad2ee
DM
1547{
1548 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1549 struct dst_entry *dst;
1550 struct flowi6 fl6;
1551
1552 memset(&fl6, 0, sizeof(fl6));
e374c618 1553 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1554 fl6.flowi6_oif = oif;
1555 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1556 fl6.daddr = iph->daddr;
1557 fl6.saddr = iph->saddr;
6502ca52 1558 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1559 fl6.flowi6_uid = uid;
3a5ad2ee 1560
b55b76b2
DJ
1561 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1562 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1563 dst_release(dst);
1564}
1565EXPORT_SYMBOL_GPL(ip6_redirect);
1566
c92a59ec
DJ
1567void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1568 u32 mark)
1569{
1570 const struct ipv6hdr *iph = ipv6_hdr(skb);
1571 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1572 struct dst_entry *dst;
1573 struct flowi6 fl6;
1574
1575 memset(&fl6, 0, sizeof(fl6));
e374c618 1576 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1577 fl6.flowi6_oif = oif;
1578 fl6.flowi6_mark = mark;
c92a59ec
DJ
1579 fl6.daddr = msg->dest;
1580 fl6.saddr = iph->daddr;
e2d118a1 1581 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1582
b55b76b2
DJ
1583 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1584 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1585 dst_release(dst);
1586}
1587
3a5ad2ee
DM
1588void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1589{
e2d118a1
LC
1590 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1591 sk->sk_uid);
3a5ad2ee
DM
1592}
1593EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1594
0dbaee3b 1595static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1596{
0dbaee3b
DM
1597 struct net_device *dev = dst->dev;
1598 unsigned int mtu = dst_mtu(dst);
1599 struct net *net = dev_net(dev);
1600
1da177e4
LT
1601 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1602
5578689a
DL
1603 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1604 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1605
1606 /*
1ab1457c
YH
1607 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1608 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1609 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1610 * rely only on pmtu discovery"
1611 */
1612 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1613 mtu = IPV6_MAXPLEN;
1614 return mtu;
1615}
1616
ebb762f2 1617static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1618{
4b32b5ad
MKL
1619 const struct rt6_info *rt = (const struct rt6_info *)dst;
1620 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1621 struct inet6_dev *idev;
618f9bc7 1622
4b32b5ad
MKL
1623 if (mtu)
1624 goto out;
1625
1626 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1627 if (mtu)
30f78d8e 1628 goto out;
618f9bc7
SK
1629
1630 mtu = IPV6_MIN_MTU;
d33e4553
DM
1631
1632 rcu_read_lock();
1633 idev = __in6_dev_get(dst->dev);
1634 if (idev)
1635 mtu = idev->cnf.mtu6;
1636 rcu_read_unlock();
1637
30f78d8e 1638out:
14972cbd
RP
1639 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1640
1641 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1642}
1643
3b00944c
YH
1644static struct dst_entry *icmp6_dst_gc_list;
1645static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1646
3b00944c 1647struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1648 struct flowi6 *fl6)
1da177e4 1649{
87a11578 1650 struct dst_entry *dst;
1da177e4
LT
1651 struct rt6_info *rt;
1652 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1653 struct net *net = dev_net(dev);
1da177e4 1654
38308473 1655 if (unlikely(!idev))
122bdf67 1656 return ERR_PTR(-ENODEV);
1da177e4 1657
ad706862 1658 rt = ip6_dst_alloc(net, dev, 0);
38308473 1659 if (unlikely(!rt)) {
1da177e4 1660 in6_dev_put(idev);
87a11578 1661 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1662 goto out;
1663 }
1664
8e2ec639
YZ
1665 rt->dst.flags |= DST_HOST;
1666 rt->dst.output = ip6_output;
d8d1f30b 1667 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1668 rt->rt6i_gateway = fl6->daddr;
87a11578 1669 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1670 rt->rt6i_dst.plen = 128;
1671 rt->rt6i_idev = idev;
14edd87d 1672 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1673
3b00944c 1674 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1675 rt->dst.next = icmp6_dst_gc_list;
1676 icmp6_dst_gc_list = &rt->dst;
3b00944c 1677 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1678
5578689a 1679 fib6_force_start_gc(net);
1da177e4 1680
87a11578
DM
1681 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1682
1da177e4 1683out:
87a11578 1684 return dst;
1da177e4
LT
1685}
1686
3d0f24a7 1687int icmp6_dst_gc(void)
1da177e4 1688{
e9476e95 1689 struct dst_entry *dst, **pprev;
3d0f24a7 1690 int more = 0;
1da177e4 1691
3b00944c
YH
1692 spin_lock_bh(&icmp6_dst_lock);
1693 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1694
1da177e4
LT
1695 while ((dst = *pprev) != NULL) {
1696 if (!atomic_read(&dst->__refcnt)) {
1697 *pprev = dst->next;
1698 dst_free(dst);
1da177e4
LT
1699 } else {
1700 pprev = &dst->next;
3d0f24a7 1701 ++more;
1da177e4
LT
1702 }
1703 }
1704
3b00944c 1705 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1706
3d0f24a7 1707 return more;
1da177e4
LT
1708}
1709
1e493d19
DM
1710static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1711 void *arg)
1712{
1713 struct dst_entry *dst, **pprev;
1714
1715 spin_lock_bh(&icmp6_dst_lock);
1716 pprev = &icmp6_dst_gc_list;
1717 while ((dst = *pprev) != NULL) {
1718 struct rt6_info *rt = (struct rt6_info *) dst;
1719 if (func(rt, arg)) {
1720 *pprev = dst->next;
1721 dst_free(dst);
1722 } else {
1723 pprev = &dst->next;
1724 }
1725 }
1726 spin_unlock_bh(&icmp6_dst_lock);
1727}
1728
569d3645 1729static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1730{
86393e52 1731 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1732 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1733 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1734 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1735 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1736 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1737 int entries;
7019b78e 1738
fc66f95c 1739 entries = dst_entries_get_fast(ops);
49a18d86 1740 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1741 entries <= rt_max_size)
1da177e4
LT
1742 goto out;
1743
6891a346 1744 net->ipv6.ip6_rt_gc_expire++;
14956643 1745 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1746 entries = dst_entries_get_slow(ops);
1747 if (entries < ops->gc_thresh)
7019b78e 1748 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1749out:
7019b78e 1750 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1751 return entries > rt_max_size;
1da177e4
LT
1752}
1753
e715b6d3
FW
1754static int ip6_convert_metrics(struct mx6_config *mxc,
1755 const struct fib6_config *cfg)
1756{
c3a8d947 1757 bool ecn_ca = false;
e715b6d3
FW
1758 struct nlattr *nla;
1759 int remaining;
1760 u32 *mp;
1761
63159f29 1762 if (!cfg->fc_mx)
e715b6d3
FW
1763 return 0;
1764
1765 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1766 if (unlikely(!mp))
1767 return -ENOMEM;
1768
1769 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1770 int type = nla_type(nla);
1bb14807 1771 u32 val;
e715b6d3 1772
1bb14807
DB
1773 if (!type)
1774 continue;
1775 if (unlikely(type > RTAX_MAX))
1776 goto err;
ea697639 1777
1bb14807
DB
1778 if (type == RTAX_CC_ALGO) {
1779 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1780
1bb14807 1781 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1782 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1783 if (val == TCP_CA_UNSPEC)
1784 goto err;
1785 } else {
1786 val = nla_get_u32(nla);
e715b6d3 1787 }
626abd59
PA
1788 if (type == RTAX_HOPLIMIT && val > 255)
1789 val = 255;
b8d3e416
DB
1790 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1791 goto err;
1bb14807
DB
1792
1793 mp[type - 1] = val;
1794 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1795 }
1796
c3a8d947
DB
1797 if (ecn_ca) {
1798 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1799 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1800 }
e715b6d3 1801
c3a8d947 1802 mxc->mx = mp;
e715b6d3
FW
1803 return 0;
1804 err:
1805 kfree(mp);
1806 return -EINVAL;
1807}
1da177e4 1808
8c14586f
DA
1809static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1810 struct fib6_config *cfg,
1811 const struct in6_addr *gw_addr)
1812{
1813 struct flowi6 fl6 = {
1814 .flowi6_oif = cfg->fc_ifindex,
1815 .daddr = *gw_addr,
1816 .saddr = cfg->fc_prefsrc,
1817 };
1818 struct fib6_table *table;
1819 struct rt6_info *rt;
d5d32e4b 1820 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1821
1822 table = fib6_get_table(net, cfg->fc_table);
1823 if (!table)
1824 return NULL;
1825
1826 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1827 flags |= RT6_LOOKUP_F_HAS_SADDR;
1828
1829 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1830
1831 /* if table lookup failed, fall back to full lookup */
1832 if (rt == net->ipv6.ip6_null_entry) {
1833 ip6_rt_put(rt);
1834 rt = NULL;
1835 }
1836
1837 return rt;
1838}
1839
8c5b83f0 1840static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1841{
5578689a 1842 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1843 struct rt6_info *rt = NULL;
1844 struct net_device *dev = NULL;
1845 struct inet6_dev *idev = NULL;
c71099ac 1846 struct fib6_table *table;
1da177e4 1847 int addr_type;
8c5b83f0 1848 int err = -EINVAL;
1da177e4 1849
ec462631
DA
1850 /* RTF_PCPU is an internal flag; can not be set by userspace */
1851 if (cfg->fc_flags & RTF_PCPU)
1852 goto out;
1853
86872cb5 1854 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1855 goto out;
1da177e4 1856#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1857 if (cfg->fc_src_len)
8c5b83f0 1858 goto out;
1da177e4 1859#endif
86872cb5 1860 if (cfg->fc_ifindex) {
1da177e4 1861 err = -ENODEV;
5578689a 1862 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1863 if (!dev)
1864 goto out;
1865 idev = in6_dev_get(dev);
1866 if (!idev)
1867 goto out;
1868 }
1869
86872cb5
TG
1870 if (cfg->fc_metric == 0)
1871 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1872
d71314b4 1873 err = -ENOBUFS;
38308473
DM
1874 if (cfg->fc_nlinfo.nlh &&
1875 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1876 table = fib6_get_table(net, cfg->fc_table);
38308473 1877 if (!table) {
f3213831 1878 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1879 table = fib6_new_table(net, cfg->fc_table);
1880 }
1881 } else {
1882 table = fib6_new_table(net, cfg->fc_table);
1883 }
38308473
DM
1884
1885 if (!table)
c71099ac 1886 goto out;
c71099ac 1887
ad706862
MKL
1888 rt = ip6_dst_alloc(net, NULL,
1889 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1890
38308473 1891 if (!rt) {
1da177e4
LT
1892 err = -ENOMEM;
1893 goto out;
1894 }
1895
1716a961
G
1896 if (cfg->fc_flags & RTF_EXPIRES)
1897 rt6_set_expires(rt, jiffies +
1898 clock_t_to_jiffies(cfg->fc_expires));
1899 else
1900 rt6_clean_expires(rt);
1da177e4 1901
86872cb5
TG
1902 if (cfg->fc_protocol == RTPROT_UNSPEC)
1903 cfg->fc_protocol = RTPROT_BOOT;
1904 rt->rt6i_protocol = cfg->fc_protocol;
1905
1906 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1907
1908 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1909 rt->dst.input = ip6_mc_input;
ab79ad14
1910 else if (cfg->fc_flags & RTF_LOCAL)
1911 rt->dst.input = ip6_input;
1da177e4 1912 else
d8d1f30b 1913 rt->dst.input = ip6_forward;
1da177e4 1914
d8d1f30b 1915 rt->dst.output = ip6_output;
1da177e4 1916
19e42e45
RP
1917 if (cfg->fc_encap) {
1918 struct lwtunnel_state *lwtstate;
1919
1920 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
127eb7cd
TH
1921 cfg->fc_encap, AF_INET6, cfg,
1922 &lwtstate);
19e42e45
RP
1923 if (err)
1924 goto out;
61adedf3
JB
1925 rt->dst.lwtstate = lwtstate_get(lwtstate);
1926 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1927 rt->dst.lwtstate->orig_output = rt->dst.output;
1928 rt->dst.output = lwtunnel_output;
25368623 1929 }
61adedf3
JB
1930 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1931 rt->dst.lwtstate->orig_input = rt->dst.input;
1932 rt->dst.input = lwtunnel_input;
25368623 1933 }
19e42e45
RP
1934 }
1935
86872cb5
TG
1936 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1937 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1938 if (rt->rt6i_dst.plen == 128)
e5fd387a 1939 rt->dst.flags |= DST_HOST;
e5fd387a 1940
1da177e4 1941#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1942 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1943 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1944#endif
1945
86872cb5 1946 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1947
1948 /* We cannot add true routes via loopback here,
1949 they would result in kernel looping; promote them to reject routes
1950 */
86872cb5 1951 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1952 (dev && (dev->flags & IFF_LOOPBACK) &&
1953 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1954 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1955 /* hold loopback dev/idev if we haven't done so. */
5578689a 1956 if (dev != net->loopback_dev) {
1da177e4
LT
1957 if (dev) {
1958 dev_put(dev);
1959 in6_dev_put(idev);
1960 }
5578689a 1961 dev = net->loopback_dev;
1da177e4
LT
1962 dev_hold(dev);
1963 idev = in6_dev_get(dev);
1964 if (!idev) {
1965 err = -ENODEV;
1966 goto out;
1967 }
1968 }
1da177e4 1969 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1970 switch (cfg->fc_type) {
1971 case RTN_BLACKHOLE:
1972 rt->dst.error = -EINVAL;
ede2059d 1973 rt->dst.output = dst_discard_out;
7150aede 1974 rt->dst.input = dst_discard;
ef2c7d7b
ND
1975 break;
1976 case RTN_PROHIBIT:
1977 rt->dst.error = -EACCES;
7150aede
K
1978 rt->dst.output = ip6_pkt_prohibit_out;
1979 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1980 break;
b4949ab2 1981 case RTN_THROW:
0315e382 1982 case RTN_UNREACHABLE:
ef2c7d7b 1983 default:
7150aede 1984 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1985 : (cfg->fc_type == RTN_UNREACHABLE)
1986 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1987 rt->dst.output = ip6_pkt_discard_out;
1988 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1989 break;
1990 }
1da177e4
LT
1991 goto install_route;
1992 }
1993
86872cb5 1994 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1995 const struct in6_addr *gw_addr;
1da177e4
LT
1996 int gwa_type;
1997
86872cb5 1998 gw_addr = &cfg->fc_gateway;
330567b7 1999 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2000
2001 /* if gw_addr is local we will fail to detect this in case
2002 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2003 * will return already-added prefix route via interface that
2004 * prefix route was assigned to, which might be non-loopback.
2005 */
2006 err = -EINVAL;
330567b7
FW
2007 if (ipv6_chk_addr_and_flags(net, gw_addr,
2008 gwa_type & IPV6_ADDR_LINKLOCAL ?
2009 dev : NULL, 0, 0))
48ed7b26
FW
2010 goto out;
2011
4e3fd7a0 2012 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2013
2014 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2015 struct rt6_info *grt = NULL;
1da177e4
LT
2016
2017 /* IPv6 strictly inhibits using not link-local
2018 addresses as nexthop address.
2019 Otherwise, router will not able to send redirects.
2020 It is very good, but in some (rare!) circumstances
2021 (SIT, PtP, NBMA NOARP links) it is handy to allow
2022 some exceptions. --ANK
96d5822c
EN
2023 We allow IPv4-mapped nexthops to support RFC4798-type
2024 addressing
1da177e4 2025 */
96d5822c
EN
2026 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2027 IPV6_ADDR_MAPPED)))
1da177e4
LT
2028 goto out;
2029
a435a07f 2030 if (cfg->fc_table) {
8c14586f
DA
2031 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2032
a435a07f
VB
2033 if (grt) {
2034 if (grt->rt6i_flags & RTF_GATEWAY ||
2035 (dev && dev != grt->dst.dev)) {
2036 ip6_rt_put(grt);
2037 grt = NULL;
2038 }
2039 }
2040 }
2041
8c14586f
DA
2042 if (!grt)
2043 grt = rt6_lookup(net, gw_addr, NULL,
2044 cfg->fc_ifindex, 1);
1da177e4
LT
2045
2046 err = -EHOSTUNREACH;
38308473 2047 if (!grt)
1da177e4
LT
2048 goto out;
2049 if (dev) {
d1918542 2050 if (dev != grt->dst.dev) {
94e187c0 2051 ip6_rt_put(grt);
1da177e4
LT
2052 goto out;
2053 }
2054 } else {
d1918542 2055 dev = grt->dst.dev;
1da177e4
LT
2056 idev = grt->rt6i_idev;
2057 dev_hold(dev);
2058 in6_dev_hold(grt->rt6i_idev);
2059 }
38308473 2060 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2061 err = 0;
94e187c0 2062 ip6_rt_put(grt);
1da177e4
LT
2063
2064 if (err)
2065 goto out;
2066 }
2067 err = -EINVAL;
38308473 2068 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
2069 goto out;
2070 }
2071
2072 err = -ENODEV;
38308473 2073 if (!dev)
1da177e4
LT
2074 goto out;
2075
c3968a85
DW
2076 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2077 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2078 err = -EINVAL;
2079 goto out;
2080 }
4e3fd7a0 2081 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2082 rt->rt6i_prefsrc.plen = 128;
2083 } else
2084 rt->rt6i_prefsrc.plen = 0;
2085
86872cb5 2086 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2087
2088install_route:
d8d1f30b 2089 rt->dst.dev = dev;
1da177e4 2090 rt->rt6i_idev = idev;
c71099ac 2091 rt->rt6i_table = table;
63152fc0 2092
c346dca1 2093 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2094
8c5b83f0 2095 return rt;
6b9ea5a6
RP
2096out:
2097 if (dev)
2098 dev_put(dev);
2099 if (idev)
2100 in6_dev_put(idev);
2101 if (rt)
2102 dst_free(&rt->dst);
2103
8c5b83f0 2104 return ERR_PTR(err);
6b9ea5a6
RP
2105}
2106
2107int ip6_route_add(struct fib6_config *cfg)
2108{
2109 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2110 struct rt6_info *rt;
6b9ea5a6
RP
2111 int err;
2112
8c5b83f0
RP
2113 rt = ip6_route_info_create(cfg);
2114 if (IS_ERR(rt)) {
2115 err = PTR_ERR(rt);
2116 rt = NULL;
6b9ea5a6 2117 goto out;
8c5b83f0 2118 }
6b9ea5a6 2119
e715b6d3
FW
2120 err = ip6_convert_metrics(&mxc, cfg);
2121 if (err)
2122 goto out;
1da177e4 2123
e715b6d3
FW
2124 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2125
2126 kfree(mxc.mx);
6b9ea5a6 2127
e715b6d3 2128 return err;
1da177e4 2129out:
1da177e4 2130 if (rt)
d8d1f30b 2131 dst_free(&rt->dst);
6b9ea5a6 2132
1da177e4
LT
2133 return err;
2134}
2135
86872cb5 2136static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2137{
2138 int err;
c71099ac 2139 struct fib6_table *table;
d1918542 2140 struct net *net = dev_net(rt->dst.dev);
1da177e4 2141
8e3d5be7
MKL
2142 if (rt == net->ipv6.ip6_null_entry ||
2143 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2144 err = -ENOENT;
2145 goto out;
2146 }
6c813a72 2147
c71099ac
TG
2148 table = rt->rt6i_table;
2149 write_lock_bh(&table->tb6_lock);
86872cb5 2150 err = fib6_del(rt, info);
c71099ac 2151 write_unlock_bh(&table->tb6_lock);
1da177e4 2152
6825a26c 2153out:
94e187c0 2154 ip6_rt_put(rt);
1da177e4
LT
2155 return err;
2156}
2157
e0a1ad73
TG
2158int ip6_del_rt(struct rt6_info *rt)
2159{
4d1169c1 2160 struct nl_info info = {
d1918542 2161 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2162 };
528c4ceb 2163 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2164}
2165
86872cb5 2166static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2167{
c71099ac 2168 struct fib6_table *table;
1da177e4
LT
2169 struct fib6_node *fn;
2170 struct rt6_info *rt;
2171 int err = -ESRCH;
2172
5578689a 2173 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2174 if (!table)
c71099ac
TG
2175 return err;
2176
2177 read_lock_bh(&table->tb6_lock);
1da177e4 2178
c71099ac 2179 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2180 &cfg->fc_dst, cfg->fc_dst_len,
2181 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2182
1da177e4 2183 if (fn) {
d8d1f30b 2184 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2185 if ((rt->rt6i_flags & RTF_CACHE) &&
2186 !(cfg->fc_flags & RTF_CACHE))
2187 continue;
86872cb5 2188 if (cfg->fc_ifindex &&
d1918542
DM
2189 (!rt->dst.dev ||
2190 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2191 continue;
86872cb5
TG
2192 if (cfg->fc_flags & RTF_GATEWAY &&
2193 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2194 continue;
86872cb5 2195 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2196 continue;
c2ed1880
M
2197 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2198 continue;
d8d1f30b 2199 dst_hold(&rt->dst);
c71099ac 2200 read_unlock_bh(&table->tb6_lock);
1da177e4 2201
86872cb5 2202 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1da177e4
LT
2203 }
2204 }
c71099ac 2205 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2206
2207 return err;
2208}
2209
6700c270 2210static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2211{
a6279458 2212 struct netevent_redirect netevent;
e8599ff4 2213 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2214 struct ndisc_options ndopts;
2215 struct inet6_dev *in6_dev;
2216 struct neighbour *neigh;
71bcdba0 2217 struct rd_msg *msg;
6e157b6a
DM
2218 int optlen, on_link;
2219 u8 *lladdr;
e8599ff4 2220
29a3cad5 2221 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2222 optlen -= sizeof(*msg);
e8599ff4
DM
2223
2224 if (optlen < 0) {
6e157b6a 2225 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2226 return;
2227 }
2228
71bcdba0 2229 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2230
71bcdba0 2231 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2232 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2233 return;
2234 }
2235
6e157b6a 2236 on_link = 0;
71bcdba0 2237 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2238 on_link = 1;
71bcdba0 2239 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2240 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2241 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2242 return;
2243 }
2244
2245 in6_dev = __in6_dev_get(skb->dev);
2246 if (!in6_dev)
2247 return;
2248 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2249 return;
2250
2251 /* RFC2461 8.1:
2252 * The IP source address of the Redirect MUST be the same as the current
2253 * first-hop router for the specified ICMP Destination Address.
2254 */
2255
f997c55c 2256 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2257 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2258 return;
2259 }
6e157b6a
DM
2260
2261 lladdr = NULL;
e8599ff4
DM
2262 if (ndopts.nd_opts_tgt_lladdr) {
2263 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2264 skb->dev);
2265 if (!lladdr) {
2266 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2267 return;
2268 }
2269 }
2270
6e157b6a 2271 rt = (struct rt6_info *) dst;
ec13ad1d 2272 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2273 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2274 return;
6e157b6a 2275 }
e8599ff4 2276
6e157b6a
DM
2277 /* Redirect received -> path was valid.
2278 * Look, redirects are sent only in response to data packets,
2279 * so that this nexthop apparently is reachable. --ANK
2280 */
2281 dst_confirm(&rt->dst);
a6279458 2282
71bcdba0 2283 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2284 if (!neigh)
2285 return;
a6279458 2286
1da177e4
LT
2287 /*
2288 * We have finally decided to accept it.
2289 */
2290
f997c55c 2291 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2292 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2293 NEIGH_UPDATE_F_OVERRIDE|
2294 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2295 NEIGH_UPDATE_F_ISROUTER)),
2296 NDISC_REDIRECT, &ndopts);
1da177e4 2297
83a09abd 2298 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2299 if (!nrt)
1da177e4
LT
2300 goto out;
2301
2302 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2303 if (on_link)
2304 nrt->rt6i_flags &= ~RTF_GATEWAY;
2305
4e3fd7a0 2306 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2307
40e22e8f 2308 if (ip6_ins_rt(nrt))
1da177e4
LT
2309 goto out;
2310
d8d1f30b
CG
2311 netevent.old = &rt->dst;
2312 netevent.new = &nrt->dst;
71bcdba0 2313 netevent.daddr = &msg->dest;
60592833 2314 netevent.neigh = neigh;
8d71740c
TT
2315 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2316
38308473 2317 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2318 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2319 ip6_del_rt(rt);
1da177e4
LT
2320 }
2321
2322out:
e8599ff4 2323 neigh_release(neigh);
6e157b6a
DM
2324}
2325
1da177e4
LT
2326/*
2327 * Misc support functions
2328 */
2329
4b32b5ad
MKL
2330static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2331{
2332 BUG_ON(from->dst.from);
2333
2334 rt->rt6i_flags &= ~RTF_EXPIRES;
2335 dst_hold(&from->dst);
2336 rt->dst.from = &from->dst;
2337 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2338}
2339
83a09abd
MKL
2340static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2341{
2342 rt->dst.input = ort->dst.input;
2343 rt->dst.output = ort->dst.output;
2344 rt->rt6i_dst = ort->rt6i_dst;
2345 rt->dst.error = ort->dst.error;
2346 rt->rt6i_idev = ort->rt6i_idev;
2347 if (rt->rt6i_idev)
2348 in6_dev_hold(rt->rt6i_idev);
2349 rt->dst.lastuse = jiffies;
2350 rt->rt6i_gateway = ort->rt6i_gateway;
2351 rt->rt6i_flags = ort->rt6i_flags;
2352 rt6_set_from(rt, ort);
2353 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2354#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2355 rt->rt6i_src = ort->rt6i_src;
1da177e4 2356#endif
83a09abd
MKL
2357 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2358 rt->rt6i_table = ort->rt6i_table;
61adedf3 2359 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2360}
2361
70ceb4f5 2362#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2363static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2364 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2365 const struct in6_addr *gwaddr,
2366 struct net_device *dev)
70ceb4f5 2367{
830218c1
DA
2368 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2369 int ifindex = dev->ifindex;
70ceb4f5
YH
2370 struct fib6_node *fn;
2371 struct rt6_info *rt = NULL;
c71099ac
TG
2372 struct fib6_table *table;
2373
830218c1 2374 table = fib6_get_table(net, tb_id);
38308473 2375 if (!table)
c71099ac 2376 return NULL;
70ceb4f5 2377
5744dd9b 2378 read_lock_bh(&table->tb6_lock);
67ba4152 2379 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2380 if (!fn)
2381 goto out;
2382
d8d1f30b 2383 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2384 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2385 continue;
2386 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2387 continue;
2388 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2389 continue;
d8d1f30b 2390 dst_hold(&rt->dst);
70ceb4f5
YH
2391 break;
2392 }
2393out:
5744dd9b 2394 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2395 return rt;
2396}
2397
efa2cea0 2398static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2399 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2400 const struct in6_addr *gwaddr,
2401 struct net_device *dev,
95c96174 2402 unsigned int pref)
70ceb4f5 2403{
86872cb5 2404 struct fib6_config cfg = {
238fc7ea 2405 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2406 .fc_ifindex = dev->ifindex,
86872cb5
TG
2407 .fc_dst_len = prefixlen,
2408 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2409 RTF_UP | RTF_PREF(pref),
15e47304 2410 .fc_nlinfo.portid = 0,
efa2cea0
DL
2411 .fc_nlinfo.nlh = NULL,
2412 .fc_nlinfo.nl_net = net,
86872cb5
TG
2413 };
2414
830218c1 2415 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2416 cfg.fc_dst = *prefix;
2417 cfg.fc_gateway = *gwaddr;
70ceb4f5 2418
e317da96
YH
2419 /* We should treat it as a default route if prefix length is 0. */
2420 if (!prefixlen)
86872cb5 2421 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2422
86872cb5 2423 ip6_route_add(&cfg);
70ceb4f5 2424
830218c1 2425 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2426}
2427#endif
2428
b71d1d42 2429struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2430{
830218c1 2431 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2432 struct rt6_info *rt;
c71099ac 2433 struct fib6_table *table;
1da177e4 2434
830218c1 2435 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2436 if (!table)
c71099ac 2437 return NULL;
1da177e4 2438
5744dd9b 2439 read_lock_bh(&table->tb6_lock);
67ba4152 2440 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2441 if (dev == rt->dst.dev &&
045927ff 2442 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2443 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2444 break;
2445 }
2446 if (rt)
d8d1f30b 2447 dst_hold(&rt->dst);
5744dd9b 2448 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2449 return rt;
2450}
2451
b71d1d42 2452struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2453 struct net_device *dev,
2454 unsigned int pref)
1da177e4 2455{
86872cb5 2456 struct fib6_config cfg = {
ca254490 2457 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2458 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2459 .fc_ifindex = dev->ifindex,
2460 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2461 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2462 .fc_nlinfo.portid = 0,
5578689a 2463 .fc_nlinfo.nlh = NULL,
c346dca1 2464 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2465 };
1da177e4 2466
4e3fd7a0 2467 cfg.fc_gateway = *gwaddr;
1da177e4 2468
830218c1
DA
2469 if (!ip6_route_add(&cfg)) {
2470 struct fib6_table *table;
2471
2472 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2473 if (table)
2474 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2475 }
1da177e4 2476
1da177e4
LT
2477 return rt6_get_dflt_router(gwaddr, dev);
2478}
2479
830218c1 2480static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2481{
2482 struct rt6_info *rt;
2483
2484restart:
c71099ac 2485 read_lock_bh(&table->tb6_lock);
d8d1f30b 2486 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2487 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2488 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2489 dst_hold(&rt->dst);
c71099ac 2490 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2491 ip6_del_rt(rt);
1da177e4
LT
2492 goto restart;
2493 }
2494 }
c71099ac 2495 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2496
2497 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2498}
2499
2500void rt6_purge_dflt_routers(struct net *net)
2501{
2502 struct fib6_table *table;
2503 struct hlist_head *head;
2504 unsigned int h;
2505
2506 rcu_read_lock();
2507
2508 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2509 head = &net->ipv6.fib_table_hash[h];
2510 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2511 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2512 __rt6_purge_dflt_routers(table);
2513 }
2514 }
2515
2516 rcu_read_unlock();
1da177e4
LT
2517}
2518
5578689a
DL
2519static void rtmsg_to_fib6_config(struct net *net,
2520 struct in6_rtmsg *rtmsg,
86872cb5
TG
2521 struct fib6_config *cfg)
2522{
2523 memset(cfg, 0, sizeof(*cfg));
2524
ca254490
DA
2525 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2526 : RT6_TABLE_MAIN;
86872cb5
TG
2527 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2528 cfg->fc_metric = rtmsg->rtmsg_metric;
2529 cfg->fc_expires = rtmsg->rtmsg_info;
2530 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2531 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2532 cfg->fc_flags = rtmsg->rtmsg_flags;
2533
5578689a 2534 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2535
4e3fd7a0
AD
2536 cfg->fc_dst = rtmsg->rtmsg_dst;
2537 cfg->fc_src = rtmsg->rtmsg_src;
2538 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2539}
2540
5578689a 2541int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2542{
86872cb5 2543 struct fib6_config cfg;
1da177e4
LT
2544 struct in6_rtmsg rtmsg;
2545 int err;
2546
67ba4152 2547 switch (cmd) {
1da177e4
LT
2548 case SIOCADDRT: /* Add a route */
2549 case SIOCDELRT: /* Delete a route */
af31f412 2550 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2551 return -EPERM;
2552 err = copy_from_user(&rtmsg, arg,
2553 sizeof(struct in6_rtmsg));
2554 if (err)
2555 return -EFAULT;
86872cb5 2556
5578689a 2557 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2558
1da177e4
LT
2559 rtnl_lock();
2560 switch (cmd) {
2561 case SIOCADDRT:
86872cb5 2562 err = ip6_route_add(&cfg);
1da177e4
LT
2563 break;
2564 case SIOCDELRT:
86872cb5 2565 err = ip6_route_del(&cfg);
1da177e4
LT
2566 break;
2567 default:
2568 err = -EINVAL;
2569 }
2570 rtnl_unlock();
2571
2572 return err;
3ff50b79 2573 }
1da177e4
LT
2574
2575 return -EINVAL;
2576}
2577
2578/*
2579 * Drop the packet on the floor
2580 */
2581
d5fdd6ba 2582static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2583{
612f09e8 2584 int type;
adf30907 2585 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2586 switch (ipstats_mib_noroutes) {
2587 case IPSTATS_MIB_INNOROUTES:
0660e03f 2588 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2589 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2590 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2591 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2592 break;
2593 }
2594 /* FALLTHROUGH */
2595 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2596 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2597 ipstats_mib_noroutes);
612f09e8
YH
2598 break;
2599 }
3ffe533c 2600 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2601 kfree_skb(skb);
2602 return 0;
2603}
2604
9ce8ade0
TG
2605static int ip6_pkt_discard(struct sk_buff *skb)
2606{
612f09e8 2607 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2608}
2609
ede2059d 2610static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2611{
adf30907 2612 skb->dev = skb_dst(skb)->dev;
612f09e8 2613 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2614}
2615
9ce8ade0
TG
2616static int ip6_pkt_prohibit(struct sk_buff *skb)
2617{
612f09e8 2618 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2619}
2620
ede2059d 2621static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2622{
adf30907 2623 skb->dev = skb_dst(skb)->dev;
612f09e8 2624 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2625}
2626
1da177e4
LT
2627/*
2628 * Allocate a dst for local (unicast / anycast) address.
2629 */
2630
2631struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2632 const struct in6_addr *addr,
8f031519 2633 bool anycast)
1da177e4 2634{
ca254490 2635 u32 tb_id;
c346dca1 2636 struct net *net = dev_net(idev->dev);
5f02ce24
DA
2637 struct net_device *dev = net->loopback_dev;
2638 struct rt6_info *rt;
2639
2640 /* use L3 Master device as loopback for host routes if device
2641 * is enslaved and address is not link local or multicast
2642 */
2643 if (!rt6_need_strict(addr))
2644 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2645
2646 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2647 if (!rt)
1da177e4
LT
2648 return ERR_PTR(-ENOMEM);
2649
1da177e4
LT
2650 in6_dev_hold(idev);
2651
11d53b49 2652 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2653 rt->dst.input = ip6_input;
2654 rt->dst.output = ip6_output;
1da177e4 2655 rt->rt6i_idev = idev;
1da177e4
LT
2656
2657 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2658 if (anycast)
2659 rt->rt6i_flags |= RTF_ANYCAST;
2660 else
1da177e4 2661 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2662
550bab42 2663 rt->rt6i_gateway = *addr;
4e3fd7a0 2664 rt->rt6i_dst.addr = *addr;
1da177e4 2665 rt->rt6i_dst.plen = 128;
ca254490
DA
2666 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2667 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2668 rt->dst.flags |= DST_NOCACHE;
1da177e4 2669
d8d1f30b 2670 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2671
2672 return rt;
2673}
2674
c3968a85
DW
2675/* remove deleted ip from prefsrc entries */
2676struct arg_dev_net_ip {
2677 struct net_device *dev;
2678 struct net *net;
2679 struct in6_addr *addr;
2680};
2681
2682static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2683{
2684 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2685 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2686 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2687
d1918542 2688 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2689 rt != net->ipv6.ip6_null_entry &&
2690 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2691 /* remove prefsrc entry */
2692 rt->rt6i_prefsrc.plen = 0;
2693 }
2694 return 0;
2695}
2696
2697void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2698{
2699 struct net *net = dev_net(ifp->idev->dev);
2700 struct arg_dev_net_ip adni = {
2701 .dev = ifp->idev->dev,
2702 .net = net,
2703 .addr = &ifp->addr,
2704 };
0c3584d5 2705 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2706}
2707
be7a010d
DJ
2708#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2709#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2710
2711/* Remove routers and update dst entries when gateway turn into host. */
2712static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2713{
2714 struct in6_addr *gateway = (struct in6_addr *)arg;
2715
2716 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2717 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2718 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2719 return -1;
2720 }
2721 return 0;
2722}
2723
2724void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2725{
2726 fib6_clean_all(net, fib6_clean_tohost, gateway);
2727}
2728
8ed67789
DL
2729struct arg_dev_net {
2730 struct net_device *dev;
2731 struct net *net;
2732};
2733
1da177e4
LT
2734static int fib6_ifdown(struct rt6_info *rt, void *arg)
2735{
bc3ef660 2736 const struct arg_dev_net *adn = arg;
2737 const struct net_device *dev = adn->dev;
8ed67789 2738
d1918542 2739 if ((rt->dst.dev == dev || !dev) &&
c159d30c 2740 rt != adn->net->ipv6.ip6_null_entry)
1da177e4 2741 return -1;
c159d30c 2742
1da177e4
LT
2743 return 0;
2744}
2745
f3db4851 2746void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2747{
8ed67789
DL
2748 struct arg_dev_net adn = {
2749 .dev = dev,
2750 .net = net,
2751 };
2752
0c3584d5 2753 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2754 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2755 if (dev)
2756 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2757}
2758
95c96174 2759struct rt6_mtu_change_arg {
1da177e4 2760 struct net_device *dev;
95c96174 2761 unsigned int mtu;
1da177e4
LT
2762};
2763
2764static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2765{
2766 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2767 struct inet6_dev *idev;
2768
2769 /* In IPv6 pmtu discovery is not optional,
2770 so that RTAX_MTU lock cannot disable it.
2771 We still use this lock to block changes
2772 caused by addrconf/ndisc.
2773 */
2774
2775 idev = __in6_dev_get(arg->dev);
38308473 2776 if (!idev)
1da177e4
LT
2777 return 0;
2778
2779 /* For administrative MTU increase, there is no way to discover
2780 IPv6 PMTU increase, so PMTU increase should be updated here.
2781 Since RFC 1981 doesn't include administrative MTU increase
2782 update PMTU increase is a MUST. (i.e. jumbo frame)
2783 */
2784 /*
2785 If new MTU is less than route PMTU, this new MTU will be the
2786 lowest MTU in the path, update the route PMTU to reflect PMTU
2787 decreases; if new MTU is greater than route PMTU, and the
2788 old MTU is the lowest MTU in the path, update the route PMTU
2789 to reflect the increase. In this case if the other nodes' MTU
2790 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2791 PMTU discovery.
1da177e4 2792 */
d1918542 2793 if (rt->dst.dev == arg->dev &&
fb56be83 2794 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2795 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2796 if (rt->rt6i_flags & RTF_CACHE) {
2797 /* For RTF_CACHE with rt6i_pmtu == 0
2798 * (i.e. a redirected route),
2799 * the metrics of its rt->dst.from has already
2800 * been updated.
2801 */
2802 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2803 rt->rt6i_pmtu = arg->mtu;
2804 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2805 (dst_mtu(&rt->dst) < arg->mtu &&
2806 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2807 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2808 }
566cfd8f 2809 }
1da177e4
LT
2810 return 0;
2811}
2812
95c96174 2813void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2814{
c71099ac
TG
2815 struct rt6_mtu_change_arg arg = {
2816 .dev = dev,
2817 .mtu = mtu,
2818 };
1da177e4 2819
0c3584d5 2820 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2821}
2822
ef7c79ed 2823static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2824 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2825 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2826 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2827 [RTA_PRIORITY] = { .type = NLA_U32 },
2828 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2829 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2830 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2831 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2832 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2833 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2834 [RTA_UID] = { .type = NLA_U32 },
86872cb5
TG
2835};
2836
2837static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2838 struct fib6_config *cfg)
1da177e4 2839{
86872cb5
TG
2840 struct rtmsg *rtm;
2841 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2842 unsigned int pref;
86872cb5 2843 int err;
1da177e4 2844
86872cb5
TG
2845 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2846 if (err < 0)
2847 goto errout;
1da177e4 2848
86872cb5
TG
2849 err = -EINVAL;
2850 rtm = nlmsg_data(nlh);
2851 memset(cfg, 0, sizeof(*cfg));
2852
2853 cfg->fc_table = rtm->rtm_table;
2854 cfg->fc_dst_len = rtm->rtm_dst_len;
2855 cfg->fc_src_len = rtm->rtm_src_len;
2856 cfg->fc_flags = RTF_UP;
2857 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2858 cfg->fc_type = rtm->rtm_type;
86872cb5 2859
ef2c7d7b
ND
2860 if (rtm->rtm_type == RTN_UNREACHABLE ||
2861 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2862 rtm->rtm_type == RTN_PROHIBIT ||
2863 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2864 cfg->fc_flags |= RTF_REJECT;
2865
ab79ad14
2866 if (rtm->rtm_type == RTN_LOCAL)
2867 cfg->fc_flags |= RTF_LOCAL;
2868
1f56a01f
MKL
2869 if (rtm->rtm_flags & RTM_F_CLONED)
2870 cfg->fc_flags |= RTF_CACHE;
2871
15e47304 2872 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2873 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2874 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2875
2876 if (tb[RTA_GATEWAY]) {
67b61f6c 2877 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2878 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2879 }
86872cb5
TG
2880
2881 if (tb[RTA_DST]) {
2882 int plen = (rtm->rtm_dst_len + 7) >> 3;
2883
2884 if (nla_len(tb[RTA_DST]) < plen)
2885 goto errout;
2886
2887 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2888 }
86872cb5
TG
2889
2890 if (tb[RTA_SRC]) {
2891 int plen = (rtm->rtm_src_len + 7) >> 3;
2892
2893 if (nla_len(tb[RTA_SRC]) < plen)
2894 goto errout;
2895
2896 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2897 }
86872cb5 2898
c3968a85 2899 if (tb[RTA_PREFSRC])
67b61f6c 2900 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2901
86872cb5
TG
2902 if (tb[RTA_OIF])
2903 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2904
2905 if (tb[RTA_PRIORITY])
2906 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2907
2908 if (tb[RTA_METRICS]) {
2909 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2910 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2911 }
86872cb5
TG
2912
2913 if (tb[RTA_TABLE])
2914 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2915
51ebd318
ND
2916 if (tb[RTA_MULTIPATH]) {
2917 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2918 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
2919
2920 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2921 cfg->fc_mp_len);
2922 if (err < 0)
2923 goto errout;
51ebd318
ND
2924 }
2925
c78ba6d6
LR
2926 if (tb[RTA_PREF]) {
2927 pref = nla_get_u8(tb[RTA_PREF]);
2928 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2929 pref != ICMPV6_ROUTER_PREF_HIGH)
2930 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2931 cfg->fc_flags |= RTF_PREF(pref);
2932 }
2933
19e42e45
RP
2934 if (tb[RTA_ENCAP])
2935 cfg->fc_encap = tb[RTA_ENCAP];
2936
9ed59592 2937 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
2938 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2939
9ed59592
DA
2940 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2941 if (err < 0)
2942 goto errout;
2943 }
2944
32bc201e
XL
2945 if (tb[RTA_EXPIRES]) {
2946 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2947
2948 if (addrconf_finite_timeout(timeout)) {
2949 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2950 cfg->fc_flags |= RTF_EXPIRES;
2951 }
2952 }
2953
86872cb5
TG
2954 err = 0;
2955errout:
2956 return err;
1da177e4
LT
2957}
2958
6b9ea5a6
RP
2959struct rt6_nh {
2960 struct rt6_info *rt6_info;
2961 struct fib6_config r_cfg;
2962 struct mx6_config mxc;
2963 struct list_head next;
2964};
2965
2966static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2967{
2968 struct rt6_nh *nh;
2969
2970 list_for_each_entry(nh, rt6_nh_list, next) {
2971 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2972 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2973 nh->r_cfg.fc_ifindex);
2974 }
2975}
2976
2977static int ip6_route_info_append(struct list_head *rt6_nh_list,
2978 struct rt6_info *rt, struct fib6_config *r_cfg)
2979{
2980 struct rt6_nh *nh;
2981 struct rt6_info *rtnh;
2982 int err = -EEXIST;
2983
2984 list_for_each_entry(nh, rt6_nh_list, next) {
2985 /* check if rt6_info already exists */
2986 rtnh = nh->rt6_info;
2987
2988 if (rtnh->dst.dev == rt->dst.dev &&
2989 rtnh->rt6i_idev == rt->rt6i_idev &&
2990 ipv6_addr_equal(&rtnh->rt6i_gateway,
2991 &rt->rt6i_gateway))
2992 return err;
2993 }
2994
2995 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2996 if (!nh)
2997 return -ENOMEM;
2998 nh->rt6_info = rt;
2999 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3000 if (err) {
3001 kfree(nh);
3002 return err;
3003 }
3004 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3005 list_add_tail(&nh->next, rt6_nh_list);
3006
3007 return 0;
3008}
3009
3010static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318
ND
3011{
3012 struct fib6_config r_cfg;
3013 struct rtnexthop *rtnh;
6b9ea5a6
RP
3014 struct rt6_info *rt;
3015 struct rt6_nh *err_nh;
3016 struct rt6_nh *nh, *nh_safe;
51ebd318
ND
3017 int remaining;
3018 int attrlen;
6b9ea5a6
RP
3019 int err = 1;
3020 int nhn = 0;
3021 int replace = (cfg->fc_nlinfo.nlh &&
3022 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3023 LIST_HEAD(rt6_nh_list);
51ebd318 3024
35f1b4e9 3025 remaining = cfg->fc_mp_len;
51ebd318 3026 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3027
6b9ea5a6
RP
3028 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3029 * rt6_info structs per nexthop
3030 */
51ebd318
ND
3031 while (rtnh_ok(rtnh, remaining)) {
3032 memcpy(&r_cfg, cfg, sizeof(*cfg));
3033 if (rtnh->rtnh_ifindex)
3034 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3035
3036 attrlen = rtnh_attrlen(rtnh);
3037 if (attrlen > 0) {
3038 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3039
3040 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3041 if (nla) {
67b61f6c 3042 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3043 r_cfg.fc_flags |= RTF_GATEWAY;
3044 }
19e42e45
RP
3045 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3046 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3047 if (nla)
3048 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3049 }
6b9ea5a6 3050
8c5b83f0
RP
3051 rt = ip6_route_info_create(&r_cfg);
3052 if (IS_ERR(rt)) {
3053 err = PTR_ERR(rt);
3054 rt = NULL;
6b9ea5a6 3055 goto cleanup;
8c5b83f0 3056 }
6b9ea5a6
RP
3057
3058 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3059 if (err) {
6b9ea5a6
RP
3060 dst_free(&rt->dst);
3061 goto cleanup;
3062 }
3063
3064 rtnh = rtnh_next(rtnh, &remaining);
3065 }
3066
3067 err_nh = NULL;
3068 list_for_each_entry(nh, &rt6_nh_list, next) {
3069 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3070 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3071 nh->rt6_info = NULL;
3072 if (err) {
3073 if (replace && nhn)
3074 ip6_print_replace_route_err(&rt6_nh_list);
3075 err_nh = nh;
3076 goto add_errout;
51ebd318 3077 }
6b9ea5a6 3078
1a72418b 3079 /* Because each route is added like a single route we remove
27596472
MK
3080 * these flags after the first nexthop: if there is a collision,
3081 * we have already failed to add the first nexthop:
3082 * fib6_add_rt2node() has rejected it; when replacing, old
3083 * nexthops have been replaced by first new, the rest should
3084 * be added to it.
1a72418b 3085 */
27596472
MK
3086 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3087 NLM_F_REPLACE);
6b9ea5a6
RP
3088 nhn++;
3089 }
3090
3091 goto cleanup;
3092
3093add_errout:
3094 /* Delete routes that were already added */
3095 list_for_each_entry(nh, &rt6_nh_list, next) {
3096 if (err_nh == nh)
3097 break;
3098 ip6_route_del(&nh->r_cfg);
3099 }
3100
3101cleanup:
3102 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3103 if (nh->rt6_info)
3104 dst_free(&nh->rt6_info->dst);
52fe51f8 3105 kfree(nh->mxc.mx);
6b9ea5a6
RP
3106 list_del(&nh->next);
3107 kfree(nh);
3108 }
3109
3110 return err;
3111}
3112
3113static int ip6_route_multipath_del(struct fib6_config *cfg)
3114{
3115 struct fib6_config r_cfg;
3116 struct rtnexthop *rtnh;
3117 int remaining;
3118 int attrlen;
3119 int err = 1, last_err = 0;
3120
3121 remaining = cfg->fc_mp_len;
3122 rtnh = (struct rtnexthop *)cfg->fc_mp;
3123
3124 /* Parse a Multipath Entry */
3125 while (rtnh_ok(rtnh, remaining)) {
3126 memcpy(&r_cfg, cfg, sizeof(*cfg));
3127 if (rtnh->rtnh_ifindex)
3128 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3129
3130 attrlen = rtnh_attrlen(rtnh);
3131 if (attrlen > 0) {
3132 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3133
3134 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3135 if (nla) {
3136 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3137 r_cfg.fc_flags |= RTF_GATEWAY;
3138 }
3139 }
3140 err = ip6_route_del(&r_cfg);
3141 if (err)
3142 last_err = err;
3143
51ebd318
ND
3144 rtnh = rtnh_next(rtnh, &remaining);
3145 }
3146
3147 return last_err;
3148}
3149
67ba4152 3150static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3151{
86872cb5
TG
3152 struct fib6_config cfg;
3153 int err;
1da177e4 3154
86872cb5
TG
3155 err = rtm_to_fib6_config(skb, nlh, &cfg);
3156 if (err < 0)
3157 return err;
3158
51ebd318 3159 if (cfg.fc_mp)
6b9ea5a6 3160 return ip6_route_multipath_del(&cfg);
51ebd318
ND
3161 else
3162 return ip6_route_del(&cfg);
1da177e4
LT
3163}
3164
67ba4152 3165static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3166{
86872cb5
TG
3167 struct fib6_config cfg;
3168 int err;
1da177e4 3169
86872cb5
TG
3170 err = rtm_to_fib6_config(skb, nlh, &cfg);
3171 if (err < 0)
3172 return err;
3173
51ebd318 3174 if (cfg.fc_mp)
6b9ea5a6 3175 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3176 else
3177 return ip6_route_add(&cfg);
1da177e4
LT
3178}
3179
19e42e45 3180static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f
TG
3181{
3182 return NLMSG_ALIGN(sizeof(struct rtmsg))
3183 + nla_total_size(16) /* RTA_SRC */
3184 + nla_total_size(16) /* RTA_DST */
3185 + nla_total_size(16) /* RTA_GATEWAY */
3186 + nla_total_size(16) /* RTA_PREFSRC */
3187 + nla_total_size(4) /* RTA_TABLE */
3188 + nla_total_size(4) /* RTA_IIF */
3189 + nla_total_size(4) /* RTA_OIF */
3190 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3191 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3192 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3193 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3194 + nla_total_size(1) /* RTA_PREF */
61adedf3 3195 + lwtunnel_get_encap_size(rt->dst.lwtstate);
339bf98f
TG
3196}
3197
191cd582
BH
3198static int rt6_fill_node(struct net *net,
3199 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3200 struct in6_addr *dst, struct in6_addr *src,
15e47304 3201 int iif, int type, u32 portid, u32 seq,
7bc570c8 3202 int prefix, int nowait, unsigned int flags)
1da177e4 3203{
4b32b5ad 3204 u32 metrics[RTAX_MAX];
1da177e4 3205 struct rtmsg *rtm;
2d7202bf 3206 struct nlmsghdr *nlh;
e3703b3d 3207 long expires;
9e762a4a 3208 u32 table;
1da177e4
LT
3209
3210 if (prefix) { /* user wants prefix routes only */
3211 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3212 /* success since this is not a prefix route */
3213 return 1;
3214 }
3215 }
3216
15e47304 3217 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3218 if (!nlh)
26932566 3219 return -EMSGSIZE;
2d7202bf
TG
3220
3221 rtm = nlmsg_data(nlh);
1da177e4
LT
3222 rtm->rtm_family = AF_INET6;
3223 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3224 rtm->rtm_src_len = rt->rt6i_src.plen;
3225 rtm->rtm_tos = 0;
c71099ac 3226 if (rt->rt6i_table)
9e762a4a 3227 table = rt->rt6i_table->tb6_id;
c71099ac 3228 else
9e762a4a
PM
3229 table = RT6_TABLE_UNSPEC;
3230 rtm->rtm_table = table;
c78679e8
DM
3231 if (nla_put_u32(skb, RTA_TABLE, table))
3232 goto nla_put_failure;
ef2c7d7b
ND
3233 if (rt->rt6i_flags & RTF_REJECT) {
3234 switch (rt->dst.error) {
3235 case -EINVAL:
3236 rtm->rtm_type = RTN_BLACKHOLE;
3237 break;
3238 case -EACCES:
3239 rtm->rtm_type = RTN_PROHIBIT;
3240 break;
b4949ab2
ND
3241 case -EAGAIN:
3242 rtm->rtm_type = RTN_THROW;
3243 break;
ef2c7d7b
ND
3244 default:
3245 rtm->rtm_type = RTN_UNREACHABLE;
3246 break;
3247 }
3248 }
38308473 3249 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3250 rtm->rtm_type = RTN_LOCAL;
d1918542 3251 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3252 rtm->rtm_type = RTN_LOCAL;
3253 else
3254 rtm->rtm_type = RTN_UNICAST;
3255 rtm->rtm_flags = 0;
35103d11 3256 if (!netif_carrier_ok(rt->dst.dev)) {
cea45e20 3257 rtm->rtm_flags |= RTNH_F_LINKDOWN;
35103d11
AG
3258 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3259 rtm->rtm_flags |= RTNH_F_DEAD;
3260 }
1da177e4
LT
3261 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3262 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3263 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3264 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3265 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3266 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3267 rtm->rtm_protocol = RTPROT_RA;
3268 else
3269 rtm->rtm_protocol = RTPROT_KERNEL;
3270 }
1da177e4 3271
38308473 3272 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3273 rtm->rtm_flags |= RTM_F_CLONED;
3274
3275 if (dst) {
930345ea 3276 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3277 goto nla_put_failure;
1ab1457c 3278 rtm->rtm_dst_len = 128;
1da177e4 3279 } else if (rtm->rtm_dst_len)
930345ea 3280 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3281 goto nla_put_failure;
1da177e4
LT
3282#ifdef CONFIG_IPV6_SUBTREES
3283 if (src) {
930345ea 3284 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3285 goto nla_put_failure;
1ab1457c 3286 rtm->rtm_src_len = 128;
c78679e8 3287 } else if (rtm->rtm_src_len &&
930345ea 3288 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3289 goto nla_put_failure;
1da177e4 3290#endif
7bc570c8
YH
3291 if (iif) {
3292#ifdef CONFIG_IPV6_MROUTE
3293 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2cf75070
NA
3294 int err = ip6mr_get_route(net, skb, rtm, nowait,
3295 portid);
3296
7bc570c8
YH
3297 if (err <= 0) {
3298 if (!nowait) {
3299 if (err == 0)
3300 return 0;
3301 goto nla_put_failure;
3302 } else {
3303 if (err == -EMSGSIZE)
3304 goto nla_put_failure;
3305 }
3306 }
3307 } else
3308#endif
c78679e8
DM
3309 if (nla_put_u32(skb, RTA_IIF, iif))
3310 goto nla_put_failure;
7bc570c8 3311 } else if (dst) {
1da177e4 3312 struct in6_addr saddr_buf;
c78679e8 3313 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3314 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3315 goto nla_put_failure;
1da177e4 3316 }
2d7202bf 3317
c3968a85
DW
3318 if (rt->rt6i_prefsrc.plen) {
3319 struct in6_addr saddr_buf;
4e3fd7a0 3320 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3321 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3322 goto nla_put_failure;
c3968a85
DW
3323 }
3324
4b32b5ad
MKL
3325 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3326 if (rt->rt6i_pmtu)
3327 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3328 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3329 goto nla_put_failure;
3330
dd0cbf29 3331 if (rt->rt6i_flags & RTF_GATEWAY) {
930345ea 3332 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
94f826b8 3333 goto nla_put_failure;
94f826b8 3334 }
2d7202bf 3335
c78679e8
DM
3336 if (rt->dst.dev &&
3337 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3338 goto nla_put_failure;
3339 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3340 goto nla_put_failure;
8253947e
LW
3341
3342 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3343
87a50699 3344 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3345 goto nla_put_failure;
2d7202bf 3346
c78ba6d6
LR
3347 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3348 goto nla_put_failure;
3349
ea7a8085
DA
3350 if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3351 goto nla_put_failure;
19e42e45 3352
053c095a
JB
3353 nlmsg_end(skb, nlh);
3354 return 0;
2d7202bf
TG
3355
3356nla_put_failure:
26932566
PM
3357 nlmsg_cancel(skb, nlh);
3358 return -EMSGSIZE;
1da177e4
LT
3359}
3360
1b43af54 3361int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3362{
3363 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3364 int prefix;
3365
2d7202bf
TG
3366 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3367 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
1da177e4
LT
3368 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3369 } else
3370 prefix = 0;
3371
191cd582
BH
3372 return rt6_fill_node(arg->net,
3373 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3374 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
7bc570c8 3375 prefix, 0, NLM_F_MULTI);
1da177e4
LT
3376}
3377
67ba4152 3378static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3379{
3b1e0a65 3380 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3381 struct nlattr *tb[RTA_MAX+1];
3382 struct rt6_info *rt;
1da177e4 3383 struct sk_buff *skb;
ab364a6f 3384 struct rtmsg *rtm;
4c9483b2 3385 struct flowi6 fl6;
72331bc0 3386 int err, iif = 0, oif = 0;
1da177e4 3387
ab364a6f
TG
3388 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3389 if (err < 0)
3390 goto errout;
1da177e4 3391
ab364a6f 3392 err = -EINVAL;
4c9483b2 3393 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3394 rtm = nlmsg_data(nlh);
3395 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
1da177e4 3396
ab364a6f
TG
3397 if (tb[RTA_SRC]) {
3398 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3399 goto errout;
3400
4e3fd7a0 3401 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3402 }
3403
3404 if (tb[RTA_DST]) {
3405 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3406 goto errout;
3407
4e3fd7a0 3408 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3409 }
3410
3411 if (tb[RTA_IIF])
3412 iif = nla_get_u32(tb[RTA_IIF]);
3413
3414 if (tb[RTA_OIF])
72331bc0 3415 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3416
2e47b291
LC
3417 if (tb[RTA_MARK])
3418 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3419
622ec2c9
LC
3420 if (tb[RTA_UID])
3421 fl6.flowi6_uid = make_kuid(current_user_ns(),
3422 nla_get_u32(tb[RTA_UID]));
3423 else
3424 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3425
1da177e4
LT
3426 if (iif) {
3427 struct net_device *dev;
72331bc0
SL
3428 int flags = 0;
3429
5578689a 3430 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3431 if (!dev) {
3432 err = -ENODEV;
ab364a6f 3433 goto errout;
1da177e4 3434 }
72331bc0
SL
3435
3436 fl6.flowi6_iif = iif;
3437
3438 if (!ipv6_addr_any(&fl6.saddr))
3439 flags |= RT6_LOOKUP_F_HAS_SADDR;
3440
3441 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3442 flags);
3443 } else {
3444 fl6.flowi6_oif = oif;
3445
3446 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3447 }
3448
ab364a6f 3449 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3450 if (!skb) {
94e187c0 3451 ip6_rt_put(rt);
ab364a6f
TG
3452 err = -ENOBUFS;
3453 goto errout;
3454 }
1da177e4 3455
ab364a6f
TG
3456 /* Reserve room for dummy headers, this skb can pass
3457 through good chunk of routing engine.
3458 */
459a98ed 3459 skb_reset_mac_header(skb);
ab364a6f 3460 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1da177e4 3461
d8d1f30b 3462 skb_dst_set(skb, &rt->dst);
1da177e4 3463
4c9483b2 3464 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3465 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
7bc570c8 3466 nlh->nlmsg_seq, 0, 0, 0);
1da177e4 3467 if (err < 0) {
ab364a6f
TG
3468 kfree_skb(skb);
3469 goto errout;
1da177e4
LT
3470 }
3471
15e47304 3472 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3473errout:
1da177e4 3474 return err;
1da177e4
LT
3475}
3476
37a1d361
RP
3477void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3478 unsigned int nlm_flags)
1da177e4
LT
3479{
3480 struct sk_buff *skb;
5578689a 3481 struct net *net = info->nl_net;
528c4ceb
DL
3482 u32 seq;
3483 int err;
3484
3485 err = -ENOBUFS;
38308473 3486 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3487
19e42e45 3488 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3489 if (!skb)
21713ebc
TG
3490 goto errout;
3491
191cd582 3492 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
37a1d361 3493 event, info->portid, seq, 0, 0, nlm_flags);
26932566
PM
3494 if (err < 0) {
3495 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3496 WARN_ON(err == -EMSGSIZE);
3497 kfree_skb(skb);
3498 goto errout;
3499 }
15e47304 3500 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3501 info->nlh, gfp_any());
3502 return;
21713ebc
TG
3503errout:
3504 if (err < 0)
5578689a 3505 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3506}
3507
8ed67789 3508static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3509 unsigned long event, void *ptr)
8ed67789 3510{
351638e7 3511 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3512 struct net *net = dev_net(dev);
8ed67789 3513
e12b8e2f
WC
3514 if (!(dev->flags & IFF_LOOPBACK))
3515 return NOTIFY_OK;
3516
3517 if (event == NETDEV_REGISTER) {
d8d1f30b 3518 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3519 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3520#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3521 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3522 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3523 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 3524 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
e12b8e2f
WC
3525#endif
3526 } else if (event == NETDEV_UNREGISTER) {
3527 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3528#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3529 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3530 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
3531#endif
3532 }
3533
3534 return NOTIFY_OK;
3535}
3536
1da177e4
LT
3537/*
3538 * /proc
3539 */
3540
3541#ifdef CONFIG_PROC_FS
3542
33120b30
AD
3543static const struct file_operations ipv6_route_proc_fops = {
3544 .owner = THIS_MODULE,
3545 .open = ipv6_route_open,
3546 .read = seq_read,
3547 .llseek = seq_lseek,
8d2ca1d7 3548 .release = seq_release_net,
33120b30
AD
3549};
3550
1da177e4
LT
3551static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3552{
69ddb805 3553 struct net *net = (struct net *)seq->private;
1da177e4 3554 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3555 net->ipv6.rt6_stats->fib_nodes,
3556 net->ipv6.rt6_stats->fib_route_nodes,
3557 net->ipv6.rt6_stats->fib_rt_alloc,
3558 net->ipv6.rt6_stats->fib_rt_entries,
3559 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3560 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3561 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3562
3563 return 0;
3564}
3565
3566static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3567{
de05c557 3568 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3569}
3570
9a32144e 3571static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3572 .owner = THIS_MODULE,
3573 .open = rt6_stats_seq_open,
3574 .read = seq_read,
3575 .llseek = seq_lseek,
b6fcbdb4 3576 .release = single_release_net,
1da177e4
LT
3577};
3578#endif /* CONFIG_PROC_FS */
3579
3580#ifdef CONFIG_SYSCTL
3581
1da177e4 3582static
fe2c6338 3583int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3584 void __user *buffer, size_t *lenp, loff_t *ppos)
3585{
c486da34
LAG
3586 struct net *net;
3587 int delay;
3588 if (!write)
1da177e4 3589 return -EINVAL;
c486da34
LAG
3590
3591 net = (struct net *)ctl->extra1;
3592 delay = net->ipv6.sysctl.flush_delay;
3593 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3594 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3595 return 0;
1da177e4
LT
3596}
3597
fe2c6338 3598struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3599 {
1da177e4 3600 .procname = "flush",
4990509f 3601 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3602 .maxlen = sizeof(int),
89c8b3a1 3603 .mode = 0200,
6d9f239a 3604 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3605 },
3606 {
1da177e4 3607 .procname = "gc_thresh",
9a7ec3a9 3608 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3609 .maxlen = sizeof(int),
3610 .mode = 0644,
6d9f239a 3611 .proc_handler = proc_dointvec,
1da177e4
LT
3612 },
3613 {
1da177e4 3614 .procname = "max_size",
4990509f 3615 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3616 .maxlen = sizeof(int),
3617 .mode = 0644,
6d9f239a 3618 .proc_handler = proc_dointvec,
1da177e4
LT
3619 },
3620 {
1da177e4 3621 .procname = "gc_min_interval",
4990509f 3622 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3623 .maxlen = sizeof(int),
3624 .mode = 0644,
6d9f239a 3625 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3626 },
3627 {
1da177e4 3628 .procname = "gc_timeout",
4990509f 3629 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3630 .maxlen = sizeof(int),
3631 .mode = 0644,
6d9f239a 3632 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3633 },
3634 {
1da177e4 3635 .procname = "gc_interval",
4990509f 3636 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3637 .maxlen = sizeof(int),
3638 .mode = 0644,
6d9f239a 3639 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3640 },
3641 {
1da177e4 3642 .procname = "gc_elasticity",
4990509f 3643 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3644 .maxlen = sizeof(int),
3645 .mode = 0644,
f3d3f616 3646 .proc_handler = proc_dointvec,
1da177e4
LT
3647 },
3648 {
1da177e4 3649 .procname = "mtu_expires",
4990509f 3650 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3651 .maxlen = sizeof(int),
3652 .mode = 0644,
6d9f239a 3653 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3654 },
3655 {
1da177e4 3656 .procname = "min_adv_mss",
4990509f 3657 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3658 .maxlen = sizeof(int),
3659 .mode = 0644,
f3d3f616 3660 .proc_handler = proc_dointvec,
1da177e4
LT
3661 },
3662 {
1da177e4 3663 .procname = "gc_min_interval_ms",
4990509f 3664 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3665 .maxlen = sizeof(int),
3666 .mode = 0644,
6d9f239a 3667 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3668 },
f8572d8f 3669 { }
1da177e4
LT
3670};
3671
2c8c1e72 3672struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3673{
3674 struct ctl_table *table;
3675
3676 table = kmemdup(ipv6_route_table_template,
3677 sizeof(ipv6_route_table_template),
3678 GFP_KERNEL);
5ee09105
YH
3679
3680 if (table) {
3681 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3682 table[0].extra1 = net;
86393e52 3683 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3684 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3685 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3686 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3687 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3688 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3689 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3690 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3691 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3692
3693 /* Don't export sysctls to unprivileged users */
3694 if (net->user_ns != &init_user_ns)
3695 table[0].procname = NULL;
5ee09105
YH
3696 }
3697
760f2d01
DL
3698 return table;
3699}
1da177e4
LT
3700#endif
3701
2c8c1e72 3702static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3703{
633d424b 3704 int ret = -ENOMEM;
8ed67789 3705
86393e52
AD
3706 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3707 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3708
fc66f95c
ED
3709 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3710 goto out_ip6_dst_ops;
3711
8ed67789
DL
3712 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3713 sizeof(*net->ipv6.ip6_null_entry),
3714 GFP_KERNEL);
3715 if (!net->ipv6.ip6_null_entry)
fc66f95c 3716 goto out_ip6_dst_entries;
d8d1f30b 3717 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3718 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3719 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3720 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3721 ip6_template_metrics, true);
8ed67789
DL
3722
3723#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3724 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3725 sizeof(*net->ipv6.ip6_prohibit_entry),
3726 GFP_KERNEL);
68fffc67
PZ
3727 if (!net->ipv6.ip6_prohibit_entry)
3728 goto out_ip6_null_entry;
d8d1f30b 3729 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3730 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3731 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3732 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3733 ip6_template_metrics, true);
8ed67789
DL
3734
3735 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3736 sizeof(*net->ipv6.ip6_blk_hole_entry),
3737 GFP_KERNEL);
68fffc67
PZ
3738 if (!net->ipv6.ip6_blk_hole_entry)
3739 goto out_ip6_prohibit_entry;
d8d1f30b 3740 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3741 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3742 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3743 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3744 ip6_template_metrics, true);
8ed67789
DL
3745#endif
3746
b339a47c
PZ
3747 net->ipv6.sysctl.flush_delay = 0;
3748 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3749 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3750 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3751 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3752 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3753 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3754 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3755
6891a346
BT
3756 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3757
8ed67789
DL
3758 ret = 0;
3759out:
3760 return ret;
f2fc6a54 3761
68fffc67
PZ
3762#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3763out_ip6_prohibit_entry:
3764 kfree(net->ipv6.ip6_prohibit_entry);
3765out_ip6_null_entry:
3766 kfree(net->ipv6.ip6_null_entry);
3767#endif
fc66f95c
ED
3768out_ip6_dst_entries:
3769 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3770out_ip6_dst_ops:
f2fc6a54 3771 goto out;
cdb18761
DL
3772}
3773
2c8c1e72 3774static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3775{
8ed67789
DL
3776 kfree(net->ipv6.ip6_null_entry);
3777#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3778 kfree(net->ipv6.ip6_prohibit_entry);
3779 kfree(net->ipv6.ip6_blk_hole_entry);
3780#endif
41bb78b4 3781 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3782}
3783
d189634e
TG
3784static int __net_init ip6_route_net_init_late(struct net *net)
3785{
3786#ifdef CONFIG_PROC_FS
d4beaa66
G
3787 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3788 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3789#endif
3790 return 0;
3791}
3792
3793static void __net_exit ip6_route_net_exit_late(struct net *net)
3794{
3795#ifdef CONFIG_PROC_FS
ece31ffd
G
3796 remove_proc_entry("ipv6_route", net->proc_net);
3797 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3798#endif
3799}
3800
cdb18761
DL
3801static struct pernet_operations ip6_route_net_ops = {
3802 .init = ip6_route_net_init,
3803 .exit = ip6_route_net_exit,
3804};
3805
c3426b47
DM
3806static int __net_init ipv6_inetpeer_init(struct net *net)
3807{
3808 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3809
3810 if (!bp)
3811 return -ENOMEM;
3812 inet_peer_base_init(bp);
3813 net->ipv6.peers = bp;
3814 return 0;
3815}
3816
3817static void __net_exit ipv6_inetpeer_exit(struct net *net)
3818{
3819 struct inet_peer_base *bp = net->ipv6.peers;
3820
3821 net->ipv6.peers = NULL;
56a6b248 3822 inetpeer_invalidate_tree(bp);
c3426b47
DM
3823 kfree(bp);
3824}
3825
2b823f72 3826static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3827 .init = ipv6_inetpeer_init,
3828 .exit = ipv6_inetpeer_exit,
3829};
3830
d189634e
TG
3831static struct pernet_operations ip6_route_net_late_ops = {
3832 .init = ip6_route_net_init_late,
3833 .exit = ip6_route_net_exit_late,
3834};
3835
8ed67789
DL
3836static struct notifier_block ip6_route_dev_notifier = {
3837 .notifier_call = ip6_route_dev_notify,
e12b8e2f 3838 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
3839};
3840
3f0bbd5f
WC
3841void __init ip6_route_init_special_entries(void)
3842{
3843 /* Registering of the loopback is done before this portion of code,
3844 * the loopback reference in rt6_info will not be taken, do it
3845 * manually for init_net */
3846 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3847 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3848 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3849 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3850 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3851 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3852 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3853 #endif
3854}
3855
433d49c3 3856int __init ip6_route_init(void)
1da177e4 3857{
433d49c3 3858 int ret;
8d0b94af 3859 int cpu;
433d49c3 3860
9a7ec3a9
DL
3861 ret = -ENOMEM;
3862 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3863 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3864 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3865 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 3866 goto out;
14e50e57 3867
fc66f95c 3868 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 3869 if (ret)
bdb3289f 3870 goto out_kmem_cache;
bdb3289f 3871
c3426b47
DM
3872 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3873 if (ret)
e8803b6c 3874 goto out_dst_entries;
2a0c451a 3875
7e52b33b
DM
3876 ret = register_pernet_subsys(&ip6_route_net_ops);
3877 if (ret)
3878 goto out_register_inetpeer;
c3426b47 3879
5dc121e9
AE
3880 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3881
e8803b6c 3882 ret = fib6_init();
433d49c3 3883 if (ret)
8ed67789 3884 goto out_register_subsys;
433d49c3 3885
433d49c3
DL
3886 ret = xfrm6_init();
3887 if (ret)
e8803b6c 3888 goto out_fib6_init;
c35b7e72 3889
433d49c3
DL
3890 ret = fib6_rules_init();
3891 if (ret)
3892 goto xfrm6_init;
7e5449c2 3893
d189634e
TG
3894 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3895 if (ret)
3896 goto fib6_rules_init;
3897
433d49c3 3898 ret = -ENOBUFS;
c7ac8679
GR
3899 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3900 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3901 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 3902 goto out_register_late_subsys;
c127ea2c 3903
8ed67789 3904 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 3905 if (ret)
d189634e 3906 goto out_register_late_subsys;
8ed67789 3907
8d0b94af
MKL
3908 for_each_possible_cpu(cpu) {
3909 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3910
3911 INIT_LIST_HEAD(&ul->head);
3912 spin_lock_init(&ul->lock);
3913 }
3914
433d49c3
DL
3915out:
3916 return ret;
3917
d189634e
TG
3918out_register_late_subsys:
3919 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 3920fib6_rules_init:
433d49c3
DL
3921 fib6_rules_cleanup();
3922xfrm6_init:
433d49c3 3923 xfrm6_fini();
2a0c451a
TG
3924out_fib6_init:
3925 fib6_gc_cleanup();
8ed67789
DL
3926out_register_subsys:
3927 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
3928out_register_inetpeer:
3929 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
3930out_dst_entries:
3931 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 3932out_kmem_cache:
f2fc6a54 3933 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 3934 goto out;
1da177e4
LT
3935}
3936
3937void ip6_route_cleanup(void)
3938{
8ed67789 3939 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 3940 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 3941 fib6_rules_cleanup();
1da177e4 3942 xfrm6_fini();
1da177e4 3943 fib6_gc_cleanup();
c3426b47 3944 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 3945 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 3946 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 3947 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 3948}