]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
xfrm: policy: remove garbage_collect callback
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
1da177e4 107
70ceb4f5 108#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 109static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 110 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
95c96174 113 unsigned int pref);
efa2cea0 114static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
70ceb4f5
YH
118#endif
119
8d0b94af
MKL
120struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123};
124
125static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127static void rt6_uncached_list_add(struct rt6_info *rt)
128{
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
131 rt->dst.flags |= DST_NOCACHE;
132 rt->rt6i_uncached_list = ul;
133
134 spin_lock_bh(&ul->lock);
135 list_add_tail(&rt->rt6i_uncached, &ul->head);
136 spin_unlock_bh(&ul->lock);
137}
138
139static void rt6_uncached_list_del(struct rt6_info *rt)
140{
141 if (!list_empty(&rt->rt6i_uncached)) {
142 struct uncached_list *ul = rt->rt6i_uncached_list;
143
144 spin_lock_bh(&ul->lock);
145 list_del(&rt->rt6i_uncached);
146 spin_unlock_bh(&ul->lock);
147 }
148}
149
150static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151{
152 struct net_device *loopback_dev = net->loopback_dev;
153 int cpu;
154
e332bc67
EB
155 if (dev == loopback_dev)
156 return;
157
8d0b94af
MKL
158 for_each_possible_cpu(cpu) {
159 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160 struct rt6_info *rt;
161
162 spin_lock_bh(&ul->lock);
163 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164 struct inet6_dev *rt_idev = rt->rt6i_idev;
165 struct net_device *rt_dev = rt->dst.dev;
166
e332bc67 167 if (rt_idev->dev == dev) {
8d0b94af
MKL
168 rt->rt6i_idev = in6_dev_get(loopback_dev);
169 in6_dev_put(rt_idev);
170 }
171
e332bc67 172 if (rt_dev == dev) {
8d0b94af
MKL
173 rt->dst.dev = loopback_dev;
174 dev_hold(rt->dst.dev);
175 dev_put(rt_dev);
176 }
177 }
178 spin_unlock_bh(&ul->lock);
179 }
180}
181
d52d3997
MKL
182static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183{
184 return dst_metrics_write_ptr(rt->dst.from);
185}
186
06582540
DM
187static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188{
4b32b5ad 189 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 190
d52d3997
MKL
191 if (rt->rt6i_flags & RTF_PCPU)
192 return rt6_pcpu_cow_metrics(rt);
193 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
194 return NULL;
195 else
3b471175 196 return dst_cow_metrics_generic(dst, old);
06582540
DM
197}
198
f894cbf8
DM
199static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200 struct sk_buff *skb,
201 const void *daddr)
39232973
DM
202{
203 struct in6_addr *p = &rt->rt6i_gateway;
204
a7563f34 205 if (!ipv6_addr_any(p))
39232973 206 return (const void *) p;
f894cbf8
DM
207 else if (skb)
208 return &ipv6_hdr(skb)->daddr;
39232973
DM
209 return daddr;
210}
211
f894cbf8
DM
212static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213 struct sk_buff *skb,
214 const void *daddr)
d3aaeb38 215{
39232973
DM
216 struct rt6_info *rt = (struct rt6_info *) dst;
217 struct neighbour *n;
218
f894cbf8 219 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 220 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
221 if (n)
222 return n;
223 return neigh_create(&nd_tbl, daddr, dst->dev);
224}
225
9a7ec3a9 226static struct dst_ops ip6_dst_ops_template = {
1da177e4 227 .family = AF_INET6,
1da177e4
LT
228 .gc = ip6_dst_gc,
229 .gc_thresh = 1024,
230 .check = ip6_dst_check,
0dbaee3b 231 .default_advmss = ip6_default_advmss,
ebb762f2 232 .mtu = ip6_mtu,
06582540 233 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
234 .destroy = ip6_dst_destroy,
235 .ifdown = ip6_dst_ifdown,
236 .negative_advice = ip6_negative_advice,
237 .link_failure = ip6_link_failure,
238 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 239 .redirect = rt6_do_redirect,
9f8955cc 240 .local_out = __ip6_local_out,
d3aaeb38 241 .neigh_lookup = ip6_neigh_lookup,
1da177e4
LT
242};
243
ebb762f2 244static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 245{
618f9bc7
SK
246 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
247
248 return mtu ? : dst->dev->mtu;
ec831ea7
RD
249}
250
6700c270
DM
251static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
252 struct sk_buff *skb, u32 mtu)
14e50e57
DM
253{
254}
255
6700c270
DM
256static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
257 struct sk_buff *skb)
b587ee3b
DM
258{
259}
260
14e50e57
DM
261static struct dst_ops ip6_dst_blackhole_ops = {
262 .family = AF_INET6,
14e50e57
DM
263 .destroy = ip6_dst_destroy,
264 .check = ip6_dst_check,
ebb762f2 265 .mtu = ip6_blackhole_mtu,
214f45c9 266 .default_advmss = ip6_default_advmss,
14e50e57 267 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 268 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 269 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 270 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
271};
272
62fa8a84 273static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 274 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
275};
276
fb0af4c7 277static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
278 .dst = {
279 .__refcnt = ATOMIC_INIT(1),
280 .__use = 1,
2c20cbd7 281 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 282 .error = -ENETUNREACH,
d8d1f30b
CG
283 .input = ip6_pkt_discard,
284 .output = ip6_pkt_discard_out,
1da177e4
LT
285 },
286 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 287 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
288 .rt6i_metric = ~(u32) 0,
289 .rt6i_ref = ATOMIC_INIT(1),
290};
291
101367c2
TG
292#ifdef CONFIG_IPV6_MULTIPLE_TABLES
293
fb0af4c7 294static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
295 .dst = {
296 .__refcnt = ATOMIC_INIT(1),
297 .__use = 1,
2c20cbd7 298 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 299 .error = -EACCES,
d8d1f30b
CG
300 .input = ip6_pkt_prohibit,
301 .output = ip6_pkt_prohibit_out,
101367c2
TG
302 },
303 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 304 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
305 .rt6i_metric = ~(u32) 0,
306 .rt6i_ref = ATOMIC_INIT(1),
307};
308
fb0af4c7 309static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
310 .dst = {
311 .__refcnt = ATOMIC_INIT(1),
312 .__use = 1,
2c20cbd7 313 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 314 .error = -EINVAL,
d8d1f30b 315 .input = dst_discard,
ede2059d 316 .output = dst_discard_out,
101367c2
TG
317 },
318 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 319 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
320 .rt6i_metric = ~(u32) 0,
321 .rt6i_ref = ATOMIC_INIT(1),
322};
323
324#endif
325
ebfa45f0
MKL
326static void rt6_info_init(struct rt6_info *rt)
327{
328 struct dst_entry *dst = &rt->dst;
329
330 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
331 INIT_LIST_HEAD(&rt->rt6i_siblings);
332 INIT_LIST_HEAD(&rt->rt6i_uncached);
333}
334
1da177e4 335/* allocate dst with ip6_dst_ops */
d52d3997
MKL
336static struct rt6_info *__ip6_dst_alloc(struct net *net,
337 struct net_device *dev,
ad706862 338 int flags)
1da177e4 339{
97bab73f 340 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
6f3118b5 341 0, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 342
ebfa45f0
MKL
343 if (rt)
344 rt6_info_init(rt);
8104891b 345
cf911662 346 return rt;
1da177e4
LT
347}
348
9ab179d8
DA
349struct rt6_info *ip6_dst_alloc(struct net *net,
350 struct net_device *dev,
351 int flags)
d52d3997 352{
ad706862 353 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
354
355 if (rt) {
356 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
357 if (rt->rt6i_pcpu) {
358 int cpu;
359
360 for_each_possible_cpu(cpu) {
361 struct rt6_info **p;
362
363 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
364 /* no one shares rt */
365 *p = NULL;
366 }
367 } else {
368 dst_destroy((struct dst_entry *)rt);
369 return NULL;
370 }
371 }
372
373 return rt;
374}
9ab179d8 375EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 376
1da177e4
LT
377static void ip6_dst_destroy(struct dst_entry *dst)
378{
379 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 380 struct dst_entry *from = dst->from;
8d0b94af 381 struct inet6_dev *idev;
1da177e4 382
4b32b5ad 383 dst_destroy_metrics_generic(dst);
87775312 384 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
385 rt6_uncached_list_del(rt);
386
387 idev = rt->rt6i_idev;
38308473 388 if (idev) {
1da177e4
LT
389 rt->rt6i_idev = NULL;
390 in6_dev_put(idev);
1ab1457c 391 }
1716a961 392
ecd98837
YH
393 dst->from = NULL;
394 dst_release(from);
b3419363
DM
395}
396
1da177e4
LT
397static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
398 int how)
399{
400 struct rt6_info *rt = (struct rt6_info *)dst;
401 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 402 struct net_device *loopback_dev =
c346dca1 403 dev_net(dev)->loopback_dev;
1da177e4 404
97cac082
DM
405 if (dev != loopback_dev) {
406 if (idev && idev->dev == dev) {
407 struct inet6_dev *loopback_idev =
408 in6_dev_get(loopback_dev);
409 if (loopback_idev) {
410 rt->rt6i_idev = loopback_idev;
411 in6_dev_put(idev);
412 }
413 }
1da177e4
LT
414 }
415}
416
5973fb1e
MKL
417static bool __rt6_check_expired(const struct rt6_info *rt)
418{
419 if (rt->rt6i_flags & RTF_EXPIRES)
420 return time_after(jiffies, rt->dst.expires);
421 else
422 return false;
423}
424
a50feda5 425static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 426{
1716a961
G
427 if (rt->rt6i_flags & RTF_EXPIRES) {
428 if (time_after(jiffies, rt->dst.expires))
a50feda5 429 return true;
1716a961 430 } else if (rt->dst.from) {
3fd91fb3 431 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 432 }
a50feda5 433 return false;
1da177e4
LT
434}
435
51ebd318
ND
436/* Multipath route selection:
437 * Hash based function using packet header and flowlabel.
438 * Adapted from fib_info_hashfn()
439 */
440static int rt6_info_hash_nhsfn(unsigned int candidate_count,
441 const struct flowi6 *fl6)
442{
644d0e65 443 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
444}
445
446static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
447 struct flowi6 *fl6, int oif,
448 int strict)
51ebd318
ND
449{
450 struct rt6_info *sibling, *next_sibling;
451 int route_choosen;
452
453 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
454 /* Don't change the route, if route_choosen == 0
455 * (siblings does not include ourself)
456 */
457 if (route_choosen)
458 list_for_each_entry_safe(sibling, next_sibling,
459 &match->rt6i_siblings, rt6i_siblings) {
460 route_choosen--;
461 if (route_choosen == 0) {
52bd4c0c
ND
462 if (rt6_score_route(sibling, oif, strict) < 0)
463 break;
51ebd318
ND
464 match = sibling;
465 break;
466 }
467 }
468 return match;
469}
470
1da177e4 471/*
c71099ac 472 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
473 */
474
8ed67789
DL
475static inline struct rt6_info *rt6_device_match(struct net *net,
476 struct rt6_info *rt,
b71d1d42 477 const struct in6_addr *saddr,
1da177e4 478 int oif,
d420895e 479 int flags)
1da177e4
LT
480{
481 struct rt6_info *local = NULL;
482 struct rt6_info *sprt;
483
dd3abc4e
YH
484 if (!oif && ipv6_addr_any(saddr))
485 goto out;
486
d8d1f30b 487 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 488 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
489
490 if (oif) {
1da177e4
LT
491 if (dev->ifindex == oif)
492 return sprt;
493 if (dev->flags & IFF_LOOPBACK) {
38308473 494 if (!sprt->rt6i_idev ||
1da177e4 495 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 496 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 497 continue;
17fb0b2b
DA
498 if (local &&
499 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
500 continue;
501 }
502 local = sprt;
503 }
dd3abc4e
YH
504 } else {
505 if (ipv6_chk_addr(net, saddr, dev,
506 flags & RT6_LOOKUP_F_IFACE))
507 return sprt;
1da177e4 508 }
dd3abc4e 509 }
1da177e4 510
dd3abc4e 511 if (oif) {
1da177e4
LT
512 if (local)
513 return local;
514
d420895e 515 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 516 return net->ipv6.ip6_null_entry;
1da177e4 517 }
dd3abc4e 518out:
1da177e4
LT
519 return rt;
520}
521
27097255 522#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
523struct __rt6_probe_work {
524 struct work_struct work;
525 struct in6_addr target;
526 struct net_device *dev;
527};
528
529static void rt6_probe_deferred(struct work_struct *w)
530{
531 struct in6_addr mcaddr;
532 struct __rt6_probe_work *work =
533 container_of(w, struct __rt6_probe_work, work);
534
535 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 536 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 537 dev_put(work->dev);
662f5533 538 kfree(work);
c2f17e82
HFS
539}
540
27097255
YH
541static void rt6_probe(struct rt6_info *rt)
542{
990edb42 543 struct __rt6_probe_work *work;
f2c31e32 544 struct neighbour *neigh;
27097255
YH
545 /*
546 * Okay, this does not seem to be appropriate
547 * for now, however, we need to check if it
548 * is really so; aka Router Reachability Probing.
549 *
550 * Router Reachability Probe MUST be rate-limited
551 * to no more than one per minute.
552 */
2152caea 553 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 554 return;
2152caea
YH
555 rcu_read_lock_bh();
556 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
557 if (neigh) {
8d6c31bf
MKL
558 if (neigh->nud_state & NUD_VALID)
559 goto out;
560
990edb42 561 work = NULL;
2152caea 562 write_lock(&neigh->lock);
990edb42
MKL
563 if (!(neigh->nud_state & NUD_VALID) &&
564 time_after(jiffies,
565 neigh->updated +
566 rt->rt6i_idev->cnf.rtr_probe_interval)) {
567 work = kmalloc(sizeof(*work), GFP_ATOMIC);
568 if (work)
569 __neigh_set_probe_once(neigh);
c2f17e82 570 }
2152caea 571 write_unlock(&neigh->lock);
990edb42
MKL
572 } else {
573 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 574 }
990edb42
MKL
575
576 if (work) {
577 INIT_WORK(&work->work, rt6_probe_deferred);
578 work->target = rt->rt6i_gateway;
579 dev_hold(rt->dst.dev);
580 work->dev = rt->dst.dev;
581 schedule_work(&work->work);
582 }
583
8d6c31bf 584out:
2152caea 585 rcu_read_unlock_bh();
27097255
YH
586}
587#else
588static inline void rt6_probe(struct rt6_info *rt)
589{
27097255
YH
590}
591#endif
592
1da177e4 593/*
554cfb7e 594 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 595 */
b6f99a21 596static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 597{
d1918542 598 struct net_device *dev = rt->dst.dev;
161980f4 599 if (!oif || dev->ifindex == oif)
554cfb7e 600 return 2;
161980f4
DM
601 if ((dev->flags & IFF_LOOPBACK) &&
602 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
603 return 1;
604 return 0;
554cfb7e 605}
1da177e4 606
afc154e9 607static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 608{
f2c31e32 609 struct neighbour *neigh;
afc154e9 610 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 611
4d0c5911
YH
612 if (rt->rt6i_flags & RTF_NONEXTHOP ||
613 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 614 return RT6_NUD_SUCCEED;
145a3621
YH
615
616 rcu_read_lock_bh();
617 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
618 if (neigh) {
619 read_lock(&neigh->lock);
554cfb7e 620 if (neigh->nud_state & NUD_VALID)
afc154e9 621 ret = RT6_NUD_SUCCEED;
398bcbeb 622#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 623 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 624 ret = RT6_NUD_SUCCEED;
7e980569
JB
625 else
626 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 627#endif
145a3621 628 read_unlock(&neigh->lock);
afc154e9
HFS
629 } else {
630 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 631 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 632 }
145a3621
YH
633 rcu_read_unlock_bh();
634
a5a81f0b 635 return ret;
1da177e4
LT
636}
637
554cfb7e
YH
638static int rt6_score_route(struct rt6_info *rt, int oif,
639 int strict)
1da177e4 640{
a5a81f0b 641 int m;
1ab1457c 642
4d0c5911 643 m = rt6_check_dev(rt, oif);
77d16f45 644 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 645 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
646#ifdef CONFIG_IPV6_ROUTER_PREF
647 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
648#endif
afc154e9
HFS
649 if (strict & RT6_LOOKUP_F_REACHABLE) {
650 int n = rt6_check_neigh(rt);
651 if (n < 0)
652 return n;
653 }
554cfb7e
YH
654 return m;
655}
656
f11e6659 657static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
658 int *mpri, struct rt6_info *match,
659 bool *do_rr)
554cfb7e 660{
f11e6659 661 int m;
afc154e9 662 bool match_do_rr = false;
35103d11
AG
663 struct inet6_dev *idev = rt->rt6i_idev;
664 struct net_device *dev = rt->dst.dev;
665
666 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
667 idev->cnf.ignore_routes_with_linkdown &&
668 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 669 goto out;
f11e6659
DM
670
671 if (rt6_check_expired(rt))
672 goto out;
673
674 m = rt6_score_route(rt, oif, strict);
7e980569 675 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
676 match_do_rr = true;
677 m = 0; /* lowest valid score */
7e980569 678 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 679 goto out;
afc154e9
HFS
680 }
681
682 if (strict & RT6_LOOKUP_F_REACHABLE)
683 rt6_probe(rt);
f11e6659 684
7e980569 685 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 686 if (m > *mpri) {
afc154e9 687 *do_rr = match_do_rr;
f11e6659
DM
688 *mpri = m;
689 match = rt;
f11e6659 690 }
f11e6659
DM
691out:
692 return match;
693}
694
695static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
696 struct rt6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
9fbdcfaf 700 struct rt6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf
SK
704 cont = NULL;
705 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
706 if (rt->rt6i_metric != metric) {
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
714 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
715 if (rt->rt6i_metric != metric) {
716 cont = rt;
717 break;
718 }
719
afc154e9 720 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
721 }
722
723 if (match || !cont)
724 return match;
725
726 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 727 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 728
f11e6659
DM
729 return match;
730}
1da177e4 731
f11e6659
DM
732static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
733{
734 struct rt6_info *match, *rt0;
8ed67789 735 struct net *net;
afc154e9 736 bool do_rr = false;
1da177e4 737
f11e6659
DM
738 rt0 = fn->rr_ptr;
739 if (!rt0)
740 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 741
afc154e9
HFS
742 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
743 &do_rr);
1da177e4 744
afc154e9 745 if (do_rr) {
d8d1f30b 746 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 747
554cfb7e 748 /* no entries matched; do round-robin */
f11e6659
DM
749 if (!next || next->rt6i_metric != rt0->rt6i_metric)
750 next = fn->leaf;
751
752 if (next != rt0)
753 fn->rr_ptr = next;
1da177e4 754 }
1da177e4 755
d1918542 756 net = dev_net(rt0->dst.dev);
a02cec21 757 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
758}
759
8b9df265
MKL
760static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
761{
762 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
763}
764
70ceb4f5
YH
765#ifdef CONFIG_IPV6_ROUTE_INFO
766int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 767 const struct in6_addr *gwaddr)
70ceb4f5 768{
c346dca1 769 struct net *net = dev_net(dev);
70ceb4f5
YH
770 struct route_info *rinfo = (struct route_info *) opt;
771 struct in6_addr prefix_buf, *prefix;
772 unsigned int pref;
4bed72e4 773 unsigned long lifetime;
70ceb4f5
YH
774 struct rt6_info *rt;
775
776 if (len < sizeof(struct route_info)) {
777 return -EINVAL;
778 }
779
780 /* Sanity check for prefix_len and length */
781 if (rinfo->length > 3) {
782 return -EINVAL;
783 } else if (rinfo->prefix_len > 128) {
784 return -EINVAL;
785 } else if (rinfo->prefix_len > 64) {
786 if (rinfo->length < 2) {
787 return -EINVAL;
788 }
789 } else if (rinfo->prefix_len > 0) {
790 if (rinfo->length < 1) {
791 return -EINVAL;
792 }
793 }
794
795 pref = rinfo->route_pref;
796 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 797 return -EINVAL;
70ceb4f5 798
4bed72e4 799 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
800
801 if (rinfo->length == 3)
802 prefix = (struct in6_addr *)rinfo->prefix;
803 else {
804 /* this function is safe */
805 ipv6_addr_prefix(&prefix_buf,
806 (struct in6_addr *)rinfo->prefix,
807 rinfo->prefix_len);
808 prefix = &prefix_buf;
809 }
810
f104a567
DJ
811 if (rinfo->prefix_len == 0)
812 rt = rt6_get_dflt_router(gwaddr, dev);
813 else
814 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 815 gwaddr, dev);
70ceb4f5
YH
816
817 if (rt && !lifetime) {
e0a1ad73 818 ip6_del_rt(rt);
70ceb4f5
YH
819 rt = NULL;
820 }
821
822 if (!rt && lifetime)
830218c1
DA
823 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
824 dev, pref);
70ceb4f5
YH
825 else if (rt)
826 rt->rt6i_flags = RTF_ROUTEINFO |
827 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
828
829 if (rt) {
1716a961
G
830 if (!addrconf_finite_timeout(lifetime))
831 rt6_clean_expires(rt);
832 else
833 rt6_set_expires(rt, jiffies + HZ * lifetime);
834
94e187c0 835 ip6_rt_put(rt);
70ceb4f5
YH
836 }
837 return 0;
838}
839#endif
840
a3c00e46
MKL
841static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
842 struct in6_addr *saddr)
843{
844 struct fib6_node *pn;
845 while (1) {
846 if (fn->fn_flags & RTN_TL_ROOT)
847 return NULL;
848 pn = fn->parent;
849 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
850 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
851 else
852 fn = pn;
853 if (fn->fn_flags & RTN_RTINFO)
854 return fn;
855 }
856}
c71099ac 857
8ed67789
DL
858static struct rt6_info *ip6_pol_route_lookup(struct net *net,
859 struct fib6_table *table,
4c9483b2 860 struct flowi6 *fl6, int flags)
1da177e4
LT
861{
862 struct fib6_node *fn;
863 struct rt6_info *rt;
864
c71099ac 865 read_lock_bh(&table->tb6_lock);
4c9483b2 866 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
867restart:
868 rt = fn->leaf;
4c9483b2 869 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 870 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 871 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
872 if (rt == net->ipv6.ip6_null_entry) {
873 fn = fib6_backtrack(fn, &fl6->saddr);
874 if (fn)
875 goto restart;
876 }
d8d1f30b 877 dst_use(&rt->dst, jiffies);
c71099ac 878 read_unlock_bh(&table->tb6_lock);
b811580d
DA
879
880 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
881
c71099ac
TG
882 return rt;
883
884}
885
67ba4152 886struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
887 int flags)
888{
889 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
890}
891EXPORT_SYMBOL_GPL(ip6_route_lookup);
892
9acd9f3a
YH
893struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
894 const struct in6_addr *saddr, int oif, int strict)
c71099ac 895{
4c9483b2
DM
896 struct flowi6 fl6 = {
897 .flowi6_oif = oif,
898 .daddr = *daddr,
c71099ac
TG
899 };
900 struct dst_entry *dst;
77d16f45 901 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 902
adaa70bb 903 if (saddr) {
4c9483b2 904 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
905 flags |= RT6_LOOKUP_F_HAS_SADDR;
906 }
907
4c9483b2 908 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
909 if (dst->error == 0)
910 return (struct rt6_info *) dst;
911
912 dst_release(dst);
913
1da177e4
LT
914 return NULL;
915}
7159039a
YH
916EXPORT_SYMBOL(rt6_lookup);
917
c71099ac 918/* ip6_ins_rt is called with FREE table->tb6_lock.
1da177e4
LT
919 It takes new route entry, the addition fails by any reason the
920 route is freed. In any case, if caller does not hold it, it may
921 be destroyed.
922 */
923
e5fd387a 924static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
e715b6d3 925 struct mx6_config *mxc)
1da177e4
LT
926{
927 int err;
c71099ac 928 struct fib6_table *table;
1da177e4 929
c71099ac
TG
930 table = rt->rt6i_table;
931 write_lock_bh(&table->tb6_lock);
e715b6d3 932 err = fib6_add(&table->tb6_root, rt, info, mxc);
c71099ac 933 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
934
935 return err;
936}
937
40e22e8f
TG
938int ip6_ins_rt(struct rt6_info *rt)
939{
e715b6d3
FW
940 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
941 struct mx6_config mxc = { .mx = NULL, };
942
943 return __ip6_ins_rt(rt, &info, &mxc);
40e22e8f
TG
944}
945
8b9df265
MKL
946static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
947 const struct in6_addr *daddr,
948 const struct in6_addr *saddr)
1da177e4 949{
1da177e4
LT
950 struct rt6_info *rt;
951
952 /*
953 * Clone the route.
954 */
955
d52d3997 956 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 957 ort = (struct rt6_info *)ort->dst.from;
1da177e4 958
ad706862 959 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
960
961 if (!rt)
962 return NULL;
963
964 ip6_rt_copy_init(rt, ort);
965 rt->rt6i_flags |= RTF_CACHE;
966 rt->rt6i_metric = 0;
967 rt->dst.flags |= DST_HOST;
968 rt->rt6i_dst.addr = *daddr;
969 rt->rt6i_dst.plen = 128;
1da177e4 970
83a09abd
MKL
971 if (!rt6_is_gw_or_nonexthop(ort)) {
972 if (ort->rt6i_dst.plen != 128 &&
973 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
974 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 975#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
976 if (rt->rt6i_src.plen && saddr) {
977 rt->rt6i_src.addr = *saddr;
978 rt->rt6i_src.plen = 128;
8b9df265 979 }
83a09abd 980#endif
95a9a5ba 981 }
1da177e4 982
95a9a5ba
YH
983 return rt;
984}
1da177e4 985
d52d3997
MKL
986static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
987{
988 struct rt6_info *pcpu_rt;
989
990 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 991 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
992
993 if (!pcpu_rt)
994 return NULL;
995 ip6_rt_copy_init(pcpu_rt, rt);
996 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
997 pcpu_rt->rt6i_flags |= RTF_PCPU;
998 return pcpu_rt;
999}
1000
1001/* It should be called with read_lock_bh(&tb6_lock) acquired */
1002static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1003{
a73e4195 1004 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1005
1006 p = this_cpu_ptr(rt->rt6i_pcpu);
1007 pcpu_rt = *p;
1008
a73e4195
MKL
1009 if (pcpu_rt) {
1010 dst_hold(&pcpu_rt->dst);
1011 rt6_dst_from_metrics_check(pcpu_rt);
1012 }
1013 return pcpu_rt;
1014}
1015
1016static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1017{
9c7370a1 1018 struct fib6_table *table = rt->rt6i_table;
a73e4195 1019 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1020
1021 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1022 if (!pcpu_rt) {
1023 struct net *net = dev_net(rt->dst.dev);
1024
9c7370a1
MKL
1025 dst_hold(&net->ipv6.ip6_null_entry->dst);
1026 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1027 }
1028
9c7370a1
MKL
1029 read_lock_bh(&table->tb6_lock);
1030 if (rt->rt6i_pcpu) {
1031 p = this_cpu_ptr(rt->rt6i_pcpu);
1032 prev = cmpxchg(p, NULL, pcpu_rt);
1033 if (prev) {
1034 /* If someone did it before us, return prev instead */
1035 dst_destroy(&pcpu_rt->dst);
1036 pcpu_rt = prev;
1037 }
1038 } else {
1039 /* rt has been removed from the fib6 tree
1040 * before we have a chance to acquire the read_lock.
1041 * In this case, don't brother to create a pcpu rt
1042 * since rt is going away anyway. The next
1043 * dst_check() will trigger a re-lookup.
1044 */
d52d3997 1045 dst_destroy(&pcpu_rt->dst);
9c7370a1 1046 pcpu_rt = rt;
d52d3997 1047 }
d52d3997
MKL
1048 dst_hold(&pcpu_rt->dst);
1049 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1050 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1051 return pcpu_rt;
1052}
1053
9ff74384
DA
1054struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1055 int oif, struct flowi6 *fl6, int flags)
1da177e4 1056{
367efcb9 1057 struct fib6_node *fn, *saved_fn;
45e4fd26 1058 struct rt6_info *rt;
c71099ac 1059 int strict = 0;
1da177e4 1060
77d16f45 1061 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1062 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1063 if (net->ipv6.devconf_all->forwarding == 0)
1064 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1065
c71099ac 1066 read_lock_bh(&table->tb6_lock);
1da177e4 1067
4c9483b2 1068 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1069 saved_fn = fn;
1da177e4 1070
ca254490
DA
1071 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1072 oif = 0;
1073
a3c00e46 1074redo_rt6_select:
367efcb9 1075 rt = rt6_select(fn, oif, strict);
52bd4c0c 1076 if (rt->rt6i_nsiblings)
367efcb9 1077 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1078 if (rt == net->ipv6.ip6_null_entry) {
1079 fn = fib6_backtrack(fn, &fl6->saddr);
1080 if (fn)
1081 goto redo_rt6_select;
367efcb9
MKL
1082 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1083 /* also consider unreachable route */
1084 strict &= ~RT6_LOOKUP_F_REACHABLE;
1085 fn = saved_fn;
1086 goto redo_rt6_select;
367efcb9 1087 }
a3c00e46
MKL
1088 }
1089
fb9de91e 1090
3da59bd9 1091 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1092 dst_use(&rt->dst, jiffies);
1093 read_unlock_bh(&table->tb6_lock);
1094
1095 rt6_dst_from_metrics_check(rt);
b811580d
DA
1096
1097 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1098 return rt;
3da59bd9
MKL
1099 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1100 !(rt->rt6i_flags & RTF_GATEWAY))) {
1101 /* Create a RTF_CACHE clone which will not be
1102 * owned by the fib6 tree. It is for the special case where
1103 * the daddr in the skb during the neighbor look-up is different
1104 * from the fl6->daddr used to look-up route here.
1105 */
1106
1107 struct rt6_info *uncached_rt;
1108
d52d3997
MKL
1109 dst_use(&rt->dst, jiffies);
1110 read_unlock_bh(&table->tb6_lock);
1111
3da59bd9
MKL
1112 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1113 dst_release(&rt->dst);
c71099ac 1114
3da59bd9 1115 if (uncached_rt)
8d0b94af 1116 rt6_uncached_list_add(uncached_rt);
3da59bd9
MKL
1117 else
1118 uncached_rt = net->ipv6.ip6_null_entry;
d52d3997 1119
3da59bd9 1120 dst_hold(&uncached_rt->dst);
b811580d
DA
1121
1122 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1123 return uncached_rt;
3da59bd9 1124
d52d3997
MKL
1125 } else {
1126 /* Get a percpu copy */
1127
1128 struct rt6_info *pcpu_rt;
1129
1130 rt->dst.lastuse = jiffies;
1131 rt->dst.__use++;
1132 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1133
9c7370a1
MKL
1134 if (pcpu_rt) {
1135 read_unlock_bh(&table->tb6_lock);
1136 } else {
1137 /* We have to do the read_unlock first
1138 * because rt6_make_pcpu_route() may trigger
1139 * ip6_dst_gc() which will take the write_lock.
1140 */
1141 dst_hold(&rt->dst);
1142 read_unlock_bh(&table->tb6_lock);
a73e4195 1143 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1144 dst_release(&rt->dst);
1145 }
d52d3997 1146
b811580d 1147 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1148 return pcpu_rt;
9c7370a1 1149
d52d3997 1150 }
1da177e4 1151}
9ff74384 1152EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1153
8ed67789 1154static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1155 struct flowi6 *fl6, int flags)
4acad72d 1156{
4c9483b2 1157 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1158}
1159
d409b847
MB
1160struct dst_entry *ip6_route_input_lookup(struct net *net,
1161 struct net_device *dev,
1162 struct flowi6 *fl6, int flags)
72331bc0
SL
1163{
1164 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1165 flags |= RT6_LOOKUP_F_IFACE;
1166
1167 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1168}
d409b847 1169EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1170
c71099ac
TG
1171void ip6_route_input(struct sk_buff *skb)
1172{
b71d1d42 1173 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1174 struct net *net = dev_net(skb->dev);
adaa70bb 1175 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1176 struct ip_tunnel_info *tun_info;
4c9483b2 1177 struct flowi6 fl6 = {
e0d56fdd 1178 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1179 .daddr = iph->daddr,
1180 .saddr = iph->saddr,
6502ca52 1181 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1182 .flowi6_mark = skb->mark,
1183 .flowi6_proto = iph->nexthdr,
c71099ac 1184 };
adaa70bb 1185
904af04d 1186 tun_info = skb_tunnel_info(skb);
46fa062a 1187 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1188 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1189 skb_dst_drop(skb);
72331bc0 1190 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1191}
1192
8ed67789 1193static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1194 struct flowi6 *fl6, int flags)
1da177e4 1195{
4c9483b2 1196 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1197}
1198
6f21c96a
PA
1199struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1200 struct flowi6 *fl6, int flags)
c71099ac 1201{
d46a9d67 1202 bool any_src;
c71099ac 1203
4c1feac5
DA
1204 if (rt6_need_strict(&fl6->daddr)) {
1205 struct dst_entry *dst;
1206
1207 dst = l3mdev_link_scope_lookup(net, fl6);
1208 if (dst)
1209 return dst;
1210 }
ca254490 1211
1fb9489b 1212 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1213
d46a9d67 1214 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1215 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1216 (fl6->flowi6_oif && any_src))
77d16f45 1217 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1218
d46a9d67 1219 if (!any_src)
adaa70bb 1220 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1221 else if (sk)
1222 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1223
4c9483b2 1224 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1225}
6f21c96a 1226EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1227
2774c131 1228struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1229{
5c1e6aa3 1230 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
14e50e57
DM
1231 struct dst_entry *new = NULL;
1232
f5b0a874 1233 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
14e50e57 1234 if (rt) {
0a1f5962 1235 rt6_info_init(rt);
8104891b 1236
0a1f5962 1237 new = &rt->dst;
14e50e57 1238 new->__use = 1;
352e512c 1239 new->input = dst_discard;
ede2059d 1240 new->output = dst_discard_out;
14e50e57 1241
0a1f5962 1242 dst_copy_metrics(new, &ort->dst);
14e50e57
DM
1243 rt->rt6i_idev = ort->rt6i_idev;
1244 if (rt->rt6i_idev)
1245 in6_dev_hold(rt->rt6i_idev);
14e50e57 1246
4e3fd7a0 1247 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1248 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1249 rt->rt6i_metric = 0;
1250
1251 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1252#ifdef CONFIG_IPV6_SUBTREES
1253 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1254#endif
1255
1256 dst_free(new);
1257 }
1258
69ead7af
DM
1259 dst_release(dst_orig);
1260 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1261}
14e50e57 1262
1da177e4
LT
1263/*
1264 * Destination cache support functions
1265 */
1266
4b32b5ad
MKL
1267static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1268{
1269 if (rt->dst.from &&
1270 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1271 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1272}
1273
3da59bd9
MKL
1274static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1275{
1276 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1277 return NULL;
1278
1279 if (rt6_check_expired(rt))
1280 return NULL;
1281
1282 return &rt->dst;
1283}
1284
1285static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1286{
5973fb1e
MKL
1287 if (!__rt6_check_expired(rt) &&
1288 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1289 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1290 return &rt->dst;
1291 else
1292 return NULL;
1293}
1294
1da177e4
LT
1295static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1296{
1297 struct rt6_info *rt;
1298
1299 rt = (struct rt6_info *) dst;
1300
6f3118b5
ND
1301 /* All IPV6 dsts are created with ->obsolete set to the value
1302 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1303 * into this function always.
1304 */
e3bc10bd 1305
4b32b5ad
MKL
1306 rt6_dst_from_metrics_check(rt);
1307
02bcf4e0
MKL
1308 if (rt->rt6i_flags & RTF_PCPU ||
1309 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1310 return rt6_dst_from_check(rt, cookie);
1311 else
1312 return rt6_check(rt, cookie);
1da177e4
LT
1313}
1314
1315static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1316{
1317 struct rt6_info *rt = (struct rt6_info *) dst;
1318
1319 if (rt) {
54c1a859
YH
1320 if (rt->rt6i_flags & RTF_CACHE) {
1321 if (rt6_check_expired(rt)) {
1322 ip6_del_rt(rt);
1323 dst = NULL;
1324 }
1325 } else {
1da177e4 1326 dst_release(dst);
54c1a859
YH
1327 dst = NULL;
1328 }
1da177e4 1329 }
54c1a859 1330 return dst;
1da177e4
LT
1331}
1332
1333static void ip6_link_failure(struct sk_buff *skb)
1334{
1335 struct rt6_info *rt;
1336
3ffe533c 1337 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1338
adf30907 1339 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1340 if (rt) {
1eb4f758
HFS
1341 if (rt->rt6i_flags & RTF_CACHE) {
1342 dst_hold(&rt->dst);
8e3d5be7 1343 ip6_del_rt(rt);
1eb4f758 1344 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1345 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1346 }
1da177e4
LT
1347 }
1348}
1349
45e4fd26
MKL
1350static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1351{
1352 struct net *net = dev_net(rt->dst.dev);
1353
1354 rt->rt6i_flags |= RTF_MODIFIED;
1355 rt->rt6i_pmtu = mtu;
1356 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1357}
1358
0d3f6d29
MKL
1359static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1360{
1361 return !(rt->rt6i_flags & RTF_CACHE) &&
1362 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1363}
1364
45e4fd26
MKL
1365static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1366 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1367{
67ba4152 1368 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1369
45e4fd26
MKL
1370 if (rt6->rt6i_flags & RTF_LOCAL)
1371 return;
81aded24 1372
19bda36c
XL
1373 if (dst_metric_locked(dst, RTAX_MTU))
1374 return;
1375
45e4fd26
MKL
1376 dst_confirm(dst);
1377 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1378 if (mtu >= dst_mtu(dst))
1379 return;
9d289715 1380
0d3f6d29 1381 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26
MKL
1382 rt6_do_update_pmtu(rt6, mtu);
1383 } else {
1384 const struct in6_addr *daddr, *saddr;
1385 struct rt6_info *nrt6;
1386
1387 if (iph) {
1388 daddr = &iph->daddr;
1389 saddr = &iph->saddr;
1390 } else if (sk) {
1391 daddr = &sk->sk_v6_daddr;
1392 saddr = &inet6_sk(sk)->saddr;
1393 } else {
1394 return;
1395 }
1396 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1397 if (nrt6) {
1398 rt6_do_update_pmtu(nrt6, mtu);
1399
1400 /* ip6_ins_rt(nrt6) will bump the
1401 * rt6->rt6i_node->fn_sernum
1402 * which will fail the next rt6_check() and
1403 * invalidate the sk->sk_dst_cache.
1404 */
1405 ip6_ins_rt(nrt6);
1406 }
1da177e4
LT
1407 }
1408}
1409
45e4fd26
MKL
1410static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1411 struct sk_buff *skb, u32 mtu)
1412{
1413 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1414}
1415
42ae66c8 1416void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1417 int oif, u32 mark, kuid_t uid)
81aded24
DM
1418{
1419 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1420 struct dst_entry *dst;
1421 struct flowi6 fl6;
1422
1423 memset(&fl6, 0, sizeof(fl6));
1424 fl6.flowi6_oif = oif;
1b3c61dc 1425 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1426 fl6.daddr = iph->daddr;
1427 fl6.saddr = iph->saddr;
6502ca52 1428 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1429 fl6.flowi6_uid = uid;
81aded24
DM
1430
1431 dst = ip6_route_output(net, NULL, &fl6);
1432 if (!dst->error)
45e4fd26 1433 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1434 dst_release(dst);
1435}
1436EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1437
1438void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1439{
33c162a9
MKL
1440 struct dst_entry *dst;
1441
81aded24 1442 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1443 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1444
1445 dst = __sk_dst_get(sk);
1446 if (!dst || !dst->obsolete ||
1447 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1448 return;
1449
1450 bh_lock_sock(sk);
1451 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1452 ip6_datagram_dst_update(sk, false);
1453 bh_unlock_sock(sk);
81aded24
DM
1454}
1455EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1456
b55b76b2
DJ
1457/* Handle redirects */
1458struct ip6rd_flowi {
1459 struct flowi6 fl6;
1460 struct in6_addr gateway;
1461};
1462
1463static struct rt6_info *__ip6_route_redirect(struct net *net,
1464 struct fib6_table *table,
1465 struct flowi6 *fl6,
1466 int flags)
1467{
1468 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1469 struct rt6_info *rt;
1470 struct fib6_node *fn;
1471
1472 /* Get the "current" route for this destination and
67c408cf 1473 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1474 *
1475 * RFC 4861 specifies that redirects should only be
1476 * accepted if they come from the nexthop to the target.
1477 * Due to the way the routes are chosen, this notion
1478 * is a bit fuzzy and one might need to check all possible
1479 * routes.
1480 */
1481
1482 read_lock_bh(&table->tb6_lock);
1483 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1484restart:
1485 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1486 if (rt6_check_expired(rt))
1487 continue;
1488 if (rt->dst.error)
1489 break;
1490 if (!(rt->rt6i_flags & RTF_GATEWAY))
1491 continue;
1492 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1493 continue;
1494 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1495 continue;
1496 break;
1497 }
1498
1499 if (!rt)
1500 rt = net->ipv6.ip6_null_entry;
1501 else if (rt->dst.error) {
1502 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1503 goto out;
1504 }
1505
1506 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1507 fn = fib6_backtrack(fn, &fl6->saddr);
1508 if (fn)
1509 goto restart;
b55b76b2 1510 }
a3c00e46 1511
b0a1ba59 1512out:
b55b76b2
DJ
1513 dst_hold(&rt->dst);
1514
1515 read_unlock_bh(&table->tb6_lock);
1516
b811580d 1517 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1518 return rt;
1519};
1520
1521static struct dst_entry *ip6_route_redirect(struct net *net,
1522 const struct flowi6 *fl6,
1523 const struct in6_addr *gateway)
1524{
1525 int flags = RT6_LOOKUP_F_HAS_SADDR;
1526 struct ip6rd_flowi rdfl;
1527
1528 rdfl.fl6 = *fl6;
1529 rdfl.gateway = *gateway;
1530
1531 return fib6_rule_lookup(net, &rdfl.fl6,
1532 flags, __ip6_route_redirect);
1533}
1534
e2d118a1
LC
1535void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1536 kuid_t uid)
3a5ad2ee
DM
1537{
1538 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1539 struct dst_entry *dst;
1540 struct flowi6 fl6;
1541
1542 memset(&fl6, 0, sizeof(fl6));
e374c618 1543 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1544 fl6.flowi6_oif = oif;
1545 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1546 fl6.daddr = iph->daddr;
1547 fl6.saddr = iph->saddr;
6502ca52 1548 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1549 fl6.flowi6_uid = uid;
3a5ad2ee 1550
b55b76b2
DJ
1551 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1552 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1553 dst_release(dst);
1554}
1555EXPORT_SYMBOL_GPL(ip6_redirect);
1556
c92a59ec
DJ
1557void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1558 u32 mark)
1559{
1560 const struct ipv6hdr *iph = ipv6_hdr(skb);
1561 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1562 struct dst_entry *dst;
1563 struct flowi6 fl6;
1564
1565 memset(&fl6, 0, sizeof(fl6));
e374c618 1566 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1567 fl6.flowi6_oif = oif;
1568 fl6.flowi6_mark = mark;
c92a59ec
DJ
1569 fl6.daddr = msg->dest;
1570 fl6.saddr = iph->daddr;
e2d118a1 1571 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1572
b55b76b2
DJ
1573 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1574 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1575 dst_release(dst);
1576}
1577
3a5ad2ee
DM
1578void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1579{
e2d118a1
LC
1580 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1581 sk->sk_uid);
3a5ad2ee
DM
1582}
1583EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1584
0dbaee3b 1585static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1586{
0dbaee3b
DM
1587 struct net_device *dev = dst->dev;
1588 unsigned int mtu = dst_mtu(dst);
1589 struct net *net = dev_net(dev);
1590
1da177e4
LT
1591 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1592
5578689a
DL
1593 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1594 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1595
1596 /*
1ab1457c
YH
1597 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1598 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1599 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1600 * rely only on pmtu discovery"
1601 */
1602 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1603 mtu = IPV6_MAXPLEN;
1604 return mtu;
1605}
1606
ebb762f2 1607static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1608{
4b32b5ad
MKL
1609 const struct rt6_info *rt = (const struct rt6_info *)dst;
1610 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1611 struct inet6_dev *idev;
618f9bc7 1612
4b32b5ad
MKL
1613 if (mtu)
1614 goto out;
1615
1616 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1617 if (mtu)
30f78d8e 1618 goto out;
618f9bc7
SK
1619
1620 mtu = IPV6_MIN_MTU;
d33e4553
DM
1621
1622 rcu_read_lock();
1623 idev = __in6_dev_get(dst->dev);
1624 if (idev)
1625 mtu = idev->cnf.mtu6;
1626 rcu_read_unlock();
1627
30f78d8e 1628out:
14972cbd
RP
1629 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1630
1631 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1632}
1633
3b00944c
YH
1634static struct dst_entry *icmp6_dst_gc_list;
1635static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1636
3b00944c 1637struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1638 struct flowi6 *fl6)
1da177e4 1639{
87a11578 1640 struct dst_entry *dst;
1da177e4
LT
1641 struct rt6_info *rt;
1642 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1643 struct net *net = dev_net(dev);
1da177e4 1644
38308473 1645 if (unlikely(!idev))
122bdf67 1646 return ERR_PTR(-ENODEV);
1da177e4 1647
ad706862 1648 rt = ip6_dst_alloc(net, dev, 0);
38308473 1649 if (unlikely(!rt)) {
1da177e4 1650 in6_dev_put(idev);
87a11578 1651 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1652 goto out;
1653 }
1654
8e2ec639
YZ
1655 rt->dst.flags |= DST_HOST;
1656 rt->dst.output = ip6_output;
d8d1f30b 1657 atomic_set(&rt->dst.__refcnt, 1);
550bab42 1658 rt->rt6i_gateway = fl6->daddr;
87a11578 1659 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1660 rt->rt6i_dst.plen = 128;
1661 rt->rt6i_idev = idev;
14edd87d 1662 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1663
3b00944c 1664 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1665 rt->dst.next = icmp6_dst_gc_list;
1666 icmp6_dst_gc_list = &rt->dst;
3b00944c 1667 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1668
5578689a 1669 fib6_force_start_gc(net);
1da177e4 1670
87a11578
DM
1671 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1672
1da177e4 1673out:
87a11578 1674 return dst;
1da177e4
LT
1675}
1676
3d0f24a7 1677int icmp6_dst_gc(void)
1da177e4 1678{
e9476e95 1679 struct dst_entry *dst, **pprev;
3d0f24a7 1680 int more = 0;
1da177e4 1681
3b00944c
YH
1682 spin_lock_bh(&icmp6_dst_lock);
1683 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1684
1da177e4
LT
1685 while ((dst = *pprev) != NULL) {
1686 if (!atomic_read(&dst->__refcnt)) {
1687 *pprev = dst->next;
1688 dst_free(dst);
1da177e4
LT
1689 } else {
1690 pprev = &dst->next;
3d0f24a7 1691 ++more;
1da177e4
LT
1692 }
1693 }
1694
3b00944c 1695 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1696
3d0f24a7 1697 return more;
1da177e4
LT
1698}
1699
1e493d19
DM
1700static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1701 void *arg)
1702{
1703 struct dst_entry *dst, **pprev;
1704
1705 spin_lock_bh(&icmp6_dst_lock);
1706 pprev = &icmp6_dst_gc_list;
1707 while ((dst = *pprev) != NULL) {
1708 struct rt6_info *rt = (struct rt6_info *) dst;
1709 if (func(rt, arg)) {
1710 *pprev = dst->next;
1711 dst_free(dst);
1712 } else {
1713 pprev = &dst->next;
1714 }
1715 }
1716 spin_unlock_bh(&icmp6_dst_lock);
1717}
1718
569d3645 1719static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1720{
86393e52 1721 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1722 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1723 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1724 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1725 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1726 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1727 int entries;
7019b78e 1728
fc66f95c 1729 entries = dst_entries_get_fast(ops);
49a18d86 1730 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1731 entries <= rt_max_size)
1da177e4
LT
1732 goto out;
1733
6891a346 1734 net->ipv6.ip6_rt_gc_expire++;
14956643 1735 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1736 entries = dst_entries_get_slow(ops);
1737 if (entries < ops->gc_thresh)
7019b78e 1738 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1739out:
7019b78e 1740 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1741 return entries > rt_max_size;
1da177e4
LT
1742}
1743
e715b6d3
FW
1744static int ip6_convert_metrics(struct mx6_config *mxc,
1745 const struct fib6_config *cfg)
1746{
c3a8d947 1747 bool ecn_ca = false;
e715b6d3
FW
1748 struct nlattr *nla;
1749 int remaining;
1750 u32 *mp;
1751
63159f29 1752 if (!cfg->fc_mx)
e715b6d3
FW
1753 return 0;
1754
1755 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1756 if (unlikely(!mp))
1757 return -ENOMEM;
1758
1759 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1760 int type = nla_type(nla);
1bb14807 1761 u32 val;
e715b6d3 1762
1bb14807
DB
1763 if (!type)
1764 continue;
1765 if (unlikely(type > RTAX_MAX))
1766 goto err;
ea697639 1767
1bb14807
DB
1768 if (type == RTAX_CC_ALGO) {
1769 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1770
1bb14807 1771 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1772 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1773 if (val == TCP_CA_UNSPEC)
1774 goto err;
1775 } else {
1776 val = nla_get_u32(nla);
e715b6d3 1777 }
626abd59
PA
1778 if (type == RTAX_HOPLIMIT && val > 255)
1779 val = 255;
b8d3e416
DB
1780 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1781 goto err;
1bb14807
DB
1782
1783 mp[type - 1] = val;
1784 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1785 }
1786
c3a8d947
DB
1787 if (ecn_ca) {
1788 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1789 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1790 }
e715b6d3 1791
c3a8d947 1792 mxc->mx = mp;
e715b6d3
FW
1793 return 0;
1794 err:
1795 kfree(mp);
1796 return -EINVAL;
1797}
1da177e4 1798
8c14586f
DA
1799static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1800 struct fib6_config *cfg,
1801 const struct in6_addr *gw_addr)
1802{
1803 struct flowi6 fl6 = {
1804 .flowi6_oif = cfg->fc_ifindex,
1805 .daddr = *gw_addr,
1806 .saddr = cfg->fc_prefsrc,
1807 };
1808 struct fib6_table *table;
1809 struct rt6_info *rt;
d5d32e4b 1810 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1811
1812 table = fib6_get_table(net, cfg->fc_table);
1813 if (!table)
1814 return NULL;
1815
1816 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1817 flags |= RT6_LOOKUP_F_HAS_SADDR;
1818
1819 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1820
1821 /* if table lookup failed, fall back to full lookup */
1822 if (rt == net->ipv6.ip6_null_entry) {
1823 ip6_rt_put(rt);
1824 rt = NULL;
1825 }
1826
1827 return rt;
1828}
1829
8c5b83f0 1830static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1da177e4 1831{
5578689a 1832 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1833 struct rt6_info *rt = NULL;
1834 struct net_device *dev = NULL;
1835 struct inet6_dev *idev = NULL;
c71099ac 1836 struct fib6_table *table;
1da177e4 1837 int addr_type;
8c5b83f0 1838 int err = -EINVAL;
1da177e4 1839
86872cb5 1840 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
8c5b83f0 1841 goto out;
1da177e4 1842#ifndef CONFIG_IPV6_SUBTREES
86872cb5 1843 if (cfg->fc_src_len)
8c5b83f0 1844 goto out;
1da177e4 1845#endif
86872cb5 1846 if (cfg->fc_ifindex) {
1da177e4 1847 err = -ENODEV;
5578689a 1848 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1849 if (!dev)
1850 goto out;
1851 idev = in6_dev_get(dev);
1852 if (!idev)
1853 goto out;
1854 }
1855
86872cb5
TG
1856 if (cfg->fc_metric == 0)
1857 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1858
d71314b4 1859 err = -ENOBUFS;
38308473
DM
1860 if (cfg->fc_nlinfo.nlh &&
1861 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1862 table = fib6_get_table(net, cfg->fc_table);
38308473 1863 if (!table) {
f3213831 1864 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1865 table = fib6_new_table(net, cfg->fc_table);
1866 }
1867 } else {
1868 table = fib6_new_table(net, cfg->fc_table);
1869 }
38308473
DM
1870
1871 if (!table)
c71099ac 1872 goto out;
c71099ac 1873
ad706862
MKL
1874 rt = ip6_dst_alloc(net, NULL,
1875 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1876
38308473 1877 if (!rt) {
1da177e4
LT
1878 err = -ENOMEM;
1879 goto out;
1880 }
1881
1716a961
G
1882 if (cfg->fc_flags & RTF_EXPIRES)
1883 rt6_set_expires(rt, jiffies +
1884 clock_t_to_jiffies(cfg->fc_expires));
1885 else
1886 rt6_clean_expires(rt);
1da177e4 1887
86872cb5
TG
1888 if (cfg->fc_protocol == RTPROT_UNSPEC)
1889 cfg->fc_protocol = RTPROT_BOOT;
1890 rt->rt6i_protocol = cfg->fc_protocol;
1891
1892 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1893
1894 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1895 rt->dst.input = ip6_mc_input;
ab79ad14
1896 else if (cfg->fc_flags & RTF_LOCAL)
1897 rt->dst.input = ip6_input;
1da177e4 1898 else
d8d1f30b 1899 rt->dst.input = ip6_forward;
1da177e4 1900
d8d1f30b 1901 rt->dst.output = ip6_output;
1da177e4 1902
19e42e45
RP
1903 if (cfg->fc_encap) {
1904 struct lwtunnel_state *lwtstate;
1905
30357d7d 1906 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd
TH
1907 cfg->fc_encap, AF_INET6, cfg,
1908 &lwtstate);
19e42e45
RP
1909 if (err)
1910 goto out;
61adedf3
JB
1911 rt->dst.lwtstate = lwtstate_get(lwtstate);
1912 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1913 rt->dst.lwtstate->orig_output = rt->dst.output;
1914 rt->dst.output = lwtunnel_output;
25368623 1915 }
61adedf3
JB
1916 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1917 rt->dst.lwtstate->orig_input = rt->dst.input;
1918 rt->dst.input = lwtunnel_input;
25368623 1919 }
19e42e45
RP
1920 }
1921
86872cb5
TG
1922 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1923 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1924 if (rt->rt6i_dst.plen == 128)
e5fd387a 1925 rt->dst.flags |= DST_HOST;
e5fd387a 1926
1da177e4 1927#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1928 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1929 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1930#endif
1931
86872cb5 1932 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1933
1934 /* We cannot add true routes via loopback here,
1935 they would result in kernel looping; promote them to reject routes
1936 */
86872cb5 1937 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1938 (dev && (dev->flags & IFF_LOOPBACK) &&
1939 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1940 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1941 /* hold loopback dev/idev if we haven't done so. */
5578689a 1942 if (dev != net->loopback_dev) {
1da177e4
LT
1943 if (dev) {
1944 dev_put(dev);
1945 in6_dev_put(idev);
1946 }
5578689a 1947 dev = net->loopback_dev;
1da177e4
LT
1948 dev_hold(dev);
1949 idev = in6_dev_get(dev);
1950 if (!idev) {
1951 err = -ENODEV;
1952 goto out;
1953 }
1954 }
1da177e4 1955 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
1956 switch (cfg->fc_type) {
1957 case RTN_BLACKHOLE:
1958 rt->dst.error = -EINVAL;
ede2059d 1959 rt->dst.output = dst_discard_out;
7150aede 1960 rt->dst.input = dst_discard;
ef2c7d7b
ND
1961 break;
1962 case RTN_PROHIBIT:
1963 rt->dst.error = -EACCES;
7150aede
K
1964 rt->dst.output = ip6_pkt_prohibit_out;
1965 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 1966 break;
b4949ab2 1967 case RTN_THROW:
0315e382 1968 case RTN_UNREACHABLE:
ef2c7d7b 1969 default:
7150aede 1970 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
1971 : (cfg->fc_type == RTN_UNREACHABLE)
1972 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
1973 rt->dst.output = ip6_pkt_discard_out;
1974 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
1975 break;
1976 }
1da177e4
LT
1977 goto install_route;
1978 }
1979
86872cb5 1980 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 1981 const struct in6_addr *gw_addr;
1da177e4
LT
1982 int gwa_type;
1983
86872cb5 1984 gw_addr = &cfg->fc_gateway;
330567b7 1985 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
1986
1987 /* if gw_addr is local we will fail to detect this in case
1988 * address is still TENTATIVE (DAD in progress). rt6_lookup()
1989 * will return already-added prefix route via interface that
1990 * prefix route was assigned to, which might be non-loopback.
1991 */
1992 err = -EINVAL;
330567b7
FW
1993 if (ipv6_chk_addr_and_flags(net, gw_addr,
1994 gwa_type & IPV6_ADDR_LINKLOCAL ?
1995 dev : NULL, 0, 0))
48ed7b26
FW
1996 goto out;
1997
4e3fd7a0 1998 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
1999
2000 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2001 struct rt6_info *grt = NULL;
1da177e4
LT
2002
2003 /* IPv6 strictly inhibits using not link-local
2004 addresses as nexthop address.
2005 Otherwise, router will not able to send redirects.
2006 It is very good, but in some (rare!) circumstances
2007 (SIT, PtP, NBMA NOARP links) it is handy to allow
2008 some exceptions. --ANK
96d5822c
EN
2009 We allow IPv4-mapped nexthops to support RFC4798-type
2010 addressing
1da177e4 2011 */
96d5822c
EN
2012 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2013 IPV6_ADDR_MAPPED)))
1da177e4
LT
2014 goto out;
2015
a435a07f 2016 if (cfg->fc_table) {
8c14586f
DA
2017 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2018
a435a07f
VB
2019 if (grt) {
2020 if (grt->rt6i_flags & RTF_GATEWAY ||
2021 (dev && dev != grt->dst.dev)) {
2022 ip6_rt_put(grt);
2023 grt = NULL;
2024 }
2025 }
2026 }
2027
8c14586f
DA
2028 if (!grt)
2029 grt = rt6_lookup(net, gw_addr, NULL,
2030 cfg->fc_ifindex, 1);
1da177e4
LT
2031
2032 err = -EHOSTUNREACH;
38308473 2033 if (!grt)
1da177e4
LT
2034 goto out;
2035 if (dev) {
d1918542 2036 if (dev != grt->dst.dev) {
94e187c0 2037 ip6_rt_put(grt);
1da177e4
LT
2038 goto out;
2039 }
2040 } else {
d1918542 2041 dev = grt->dst.dev;
1da177e4
LT
2042 idev = grt->rt6i_idev;
2043 dev_hold(dev);
2044 in6_dev_hold(grt->rt6i_idev);
2045 }
38308473 2046 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2047 err = 0;
94e187c0 2048 ip6_rt_put(grt);
1da177e4
LT
2049
2050 if (err)
2051 goto out;
2052 }
2053 err = -EINVAL;
38308473 2054 if (!dev || (dev->flags & IFF_LOOPBACK))
1da177e4
LT
2055 goto out;
2056 }
2057
2058 err = -ENODEV;
38308473 2059 if (!dev)
1da177e4
LT
2060 goto out;
2061
c3968a85
DW
2062 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2063 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2064 err = -EINVAL;
2065 goto out;
2066 }
4e3fd7a0 2067 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2068 rt->rt6i_prefsrc.plen = 128;
2069 } else
2070 rt->rt6i_prefsrc.plen = 0;
2071
86872cb5 2072 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2073
2074install_route:
d8d1f30b 2075 rt->dst.dev = dev;
1da177e4 2076 rt->rt6i_idev = idev;
c71099ac 2077 rt->rt6i_table = table;
63152fc0 2078
c346dca1 2079 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2080
8c5b83f0 2081 return rt;
6b9ea5a6
RP
2082out:
2083 if (dev)
2084 dev_put(dev);
2085 if (idev)
2086 in6_dev_put(idev);
2087 if (rt)
2088 dst_free(&rt->dst);
2089
8c5b83f0 2090 return ERR_PTR(err);
6b9ea5a6
RP
2091}
2092
2093int ip6_route_add(struct fib6_config *cfg)
2094{
2095 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2096 struct rt6_info *rt;
6b9ea5a6
RP
2097 int err;
2098
8c5b83f0
RP
2099 rt = ip6_route_info_create(cfg);
2100 if (IS_ERR(rt)) {
2101 err = PTR_ERR(rt);
2102 rt = NULL;
6b9ea5a6 2103 goto out;
8c5b83f0 2104 }
6b9ea5a6 2105
e715b6d3
FW
2106 err = ip6_convert_metrics(&mxc, cfg);
2107 if (err)
2108 goto out;
1da177e4 2109
e715b6d3
FW
2110 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2111
2112 kfree(mxc.mx);
6b9ea5a6 2113
e715b6d3 2114 return err;
1da177e4 2115out:
1da177e4 2116 if (rt)
d8d1f30b 2117 dst_free(&rt->dst);
6b9ea5a6 2118
1da177e4
LT
2119 return err;
2120}
2121
86872cb5 2122static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2123{
2124 int err;
c71099ac 2125 struct fib6_table *table;
d1918542 2126 struct net *net = dev_net(rt->dst.dev);
1da177e4 2127
8e3d5be7
MKL
2128 if (rt == net->ipv6.ip6_null_entry ||
2129 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2130 err = -ENOENT;
2131 goto out;
2132 }
6c813a72 2133
c71099ac
TG
2134 table = rt->rt6i_table;
2135 write_lock_bh(&table->tb6_lock);
86872cb5 2136 err = fib6_del(rt, info);
c71099ac 2137 write_unlock_bh(&table->tb6_lock);
1da177e4 2138
6825a26c 2139out:
94e187c0 2140 ip6_rt_put(rt);
1da177e4
LT
2141 return err;
2142}
2143
e0a1ad73
TG
2144int ip6_del_rt(struct rt6_info *rt)
2145{
4d1169c1 2146 struct nl_info info = {
d1918542 2147 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2148 };
528c4ceb 2149 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2150}
2151
0ae81335
DA
2152static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2153{
2154 struct nl_info *info = &cfg->fc_nlinfo;
16a16cd3 2155 struct sk_buff *skb = NULL;
0ae81335
DA
2156 struct fib6_table *table;
2157 int err;
2158
2159 table = rt->rt6i_table;
2160 write_lock_bh(&table->tb6_lock);
2161
2162 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2163 struct rt6_info *sibling, *next_sibling;
2164
16a16cd3
DA
2165 /* prefer to send a single notification with all hops */
2166 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2167 if (skb) {
2168 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2169
2170 if (rt6_fill_node(info->nl_net, skb, rt,
2171 NULL, NULL, 0, RTM_DELROUTE,
2172 info->portid, seq, 0) < 0) {
2173 kfree_skb(skb);
2174 skb = NULL;
2175 } else
2176 info->skip_notify = 1;
2177 }
2178
0ae81335
DA
2179 list_for_each_entry_safe(sibling, next_sibling,
2180 &rt->rt6i_siblings,
2181 rt6i_siblings) {
2182 err = fib6_del(sibling, info);
2183 if (err)
2184 goto out;
2185 }
2186 }
2187
2188 err = fib6_del(rt, info);
2189out:
2190 write_unlock_bh(&table->tb6_lock);
2191 ip6_rt_put(rt);
16a16cd3
DA
2192
2193 if (skb) {
2194 rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE,
2195 info->nlh, gfp_any());
2196 }
0ae81335
DA
2197 return err;
2198}
2199
86872cb5 2200static int ip6_route_del(struct fib6_config *cfg)
1da177e4 2201{
c71099ac 2202 struct fib6_table *table;
1da177e4
LT
2203 struct fib6_node *fn;
2204 struct rt6_info *rt;
2205 int err = -ESRCH;
2206
5578689a 2207 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
38308473 2208 if (!table)
c71099ac
TG
2209 return err;
2210
2211 read_lock_bh(&table->tb6_lock);
1da177e4 2212
c71099ac 2213 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2214 &cfg->fc_dst, cfg->fc_dst_len,
2215 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2216
1da177e4 2217 if (fn) {
d8d1f30b 2218 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2219 if ((rt->rt6i_flags & RTF_CACHE) &&
2220 !(cfg->fc_flags & RTF_CACHE))
2221 continue;
86872cb5 2222 if (cfg->fc_ifindex &&
d1918542
DM
2223 (!rt->dst.dev ||
2224 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2225 continue;
86872cb5
TG
2226 if (cfg->fc_flags & RTF_GATEWAY &&
2227 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2228 continue;
86872cb5 2229 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2230 continue;
c2ed1880
M
2231 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2232 continue;
d8d1f30b 2233 dst_hold(&rt->dst);
c71099ac 2234 read_unlock_bh(&table->tb6_lock);
1da177e4 2235
0ae81335
DA
2236 /* if gateway was specified only delete the one hop */
2237 if (cfg->fc_flags & RTF_GATEWAY)
2238 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2239
2240 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2241 }
2242 }
c71099ac 2243 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2244
2245 return err;
2246}
2247
6700c270 2248static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2249{
a6279458 2250 struct netevent_redirect netevent;
e8599ff4 2251 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2252 struct ndisc_options ndopts;
2253 struct inet6_dev *in6_dev;
2254 struct neighbour *neigh;
71bcdba0 2255 struct rd_msg *msg;
6e157b6a
DM
2256 int optlen, on_link;
2257 u8 *lladdr;
e8599ff4 2258
29a3cad5 2259 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2260 optlen -= sizeof(*msg);
e8599ff4
DM
2261
2262 if (optlen < 0) {
6e157b6a 2263 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2264 return;
2265 }
2266
71bcdba0 2267 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2268
71bcdba0 2269 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2270 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2271 return;
2272 }
2273
6e157b6a 2274 on_link = 0;
71bcdba0 2275 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2276 on_link = 1;
71bcdba0 2277 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2278 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2279 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2280 return;
2281 }
2282
2283 in6_dev = __in6_dev_get(skb->dev);
2284 if (!in6_dev)
2285 return;
2286 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2287 return;
2288
2289 /* RFC2461 8.1:
2290 * The IP source address of the Redirect MUST be the same as the current
2291 * first-hop router for the specified ICMP Destination Address.
2292 */
2293
f997c55c 2294 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2295 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2296 return;
2297 }
6e157b6a
DM
2298
2299 lladdr = NULL;
e8599ff4
DM
2300 if (ndopts.nd_opts_tgt_lladdr) {
2301 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2302 skb->dev);
2303 if (!lladdr) {
2304 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2305 return;
2306 }
2307 }
2308
6e157b6a 2309 rt = (struct rt6_info *) dst;
ec13ad1d 2310 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2311 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2312 return;
6e157b6a 2313 }
e8599ff4 2314
6e157b6a
DM
2315 /* Redirect received -> path was valid.
2316 * Look, redirects are sent only in response to data packets,
2317 * so that this nexthop apparently is reachable. --ANK
2318 */
2319 dst_confirm(&rt->dst);
a6279458 2320
71bcdba0 2321 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2322 if (!neigh)
2323 return;
a6279458 2324
1da177e4
LT
2325 /*
2326 * We have finally decided to accept it.
2327 */
2328
f997c55c 2329 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2330 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2331 NEIGH_UPDATE_F_OVERRIDE|
2332 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2333 NEIGH_UPDATE_F_ISROUTER)),
2334 NDISC_REDIRECT, &ndopts);
1da177e4 2335
83a09abd 2336 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2337 if (!nrt)
1da177e4
LT
2338 goto out;
2339
2340 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2341 if (on_link)
2342 nrt->rt6i_flags &= ~RTF_GATEWAY;
2343
4e3fd7a0 2344 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2345
40e22e8f 2346 if (ip6_ins_rt(nrt))
1da177e4
LT
2347 goto out;
2348
d8d1f30b
CG
2349 netevent.old = &rt->dst;
2350 netevent.new = &nrt->dst;
71bcdba0 2351 netevent.daddr = &msg->dest;
60592833 2352 netevent.neigh = neigh;
8d71740c
TT
2353 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2354
38308473 2355 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2356 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2357 ip6_del_rt(rt);
1da177e4
LT
2358 }
2359
2360out:
e8599ff4 2361 neigh_release(neigh);
6e157b6a
DM
2362}
2363
1da177e4
LT
2364/*
2365 * Misc support functions
2366 */
2367
4b32b5ad
MKL
2368static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2369{
2370 BUG_ON(from->dst.from);
2371
2372 rt->rt6i_flags &= ~RTF_EXPIRES;
2373 dst_hold(&from->dst);
2374 rt->dst.from = &from->dst;
2375 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2376}
2377
83a09abd
MKL
2378static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2379{
2380 rt->dst.input = ort->dst.input;
2381 rt->dst.output = ort->dst.output;
2382 rt->rt6i_dst = ort->rt6i_dst;
2383 rt->dst.error = ort->dst.error;
2384 rt->rt6i_idev = ort->rt6i_idev;
2385 if (rt->rt6i_idev)
2386 in6_dev_hold(rt->rt6i_idev);
2387 rt->dst.lastuse = jiffies;
2388 rt->rt6i_gateway = ort->rt6i_gateway;
2389 rt->rt6i_flags = ort->rt6i_flags;
2390 rt6_set_from(rt, ort);
2391 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2392#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2393 rt->rt6i_src = ort->rt6i_src;
1da177e4 2394#endif
83a09abd
MKL
2395 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2396 rt->rt6i_table = ort->rt6i_table;
61adedf3 2397 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2398}
2399
70ceb4f5 2400#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2401static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2402 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2403 const struct in6_addr *gwaddr,
2404 struct net_device *dev)
70ceb4f5 2405{
830218c1
DA
2406 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2407 int ifindex = dev->ifindex;
70ceb4f5
YH
2408 struct fib6_node *fn;
2409 struct rt6_info *rt = NULL;
c71099ac
TG
2410 struct fib6_table *table;
2411
830218c1 2412 table = fib6_get_table(net, tb_id);
38308473 2413 if (!table)
c71099ac 2414 return NULL;
70ceb4f5 2415
5744dd9b 2416 read_lock_bh(&table->tb6_lock);
67ba4152 2417 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2418 if (!fn)
2419 goto out;
2420
d8d1f30b 2421 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2422 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2423 continue;
2424 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2425 continue;
2426 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2427 continue;
d8d1f30b 2428 dst_hold(&rt->dst);
70ceb4f5
YH
2429 break;
2430 }
2431out:
5744dd9b 2432 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2433 return rt;
2434}
2435
efa2cea0 2436static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2437 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2438 const struct in6_addr *gwaddr,
2439 struct net_device *dev,
95c96174 2440 unsigned int pref)
70ceb4f5 2441{
86872cb5 2442 struct fib6_config cfg = {
238fc7ea 2443 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2444 .fc_ifindex = dev->ifindex,
86872cb5
TG
2445 .fc_dst_len = prefixlen,
2446 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2447 RTF_UP | RTF_PREF(pref),
15e47304 2448 .fc_nlinfo.portid = 0,
efa2cea0
DL
2449 .fc_nlinfo.nlh = NULL,
2450 .fc_nlinfo.nl_net = net,
86872cb5
TG
2451 };
2452
830218c1 2453 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2454 cfg.fc_dst = *prefix;
2455 cfg.fc_gateway = *gwaddr;
70ceb4f5 2456
e317da96
YH
2457 /* We should treat it as a default route if prefix length is 0. */
2458 if (!prefixlen)
86872cb5 2459 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2460
86872cb5 2461 ip6_route_add(&cfg);
70ceb4f5 2462
830218c1 2463 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2464}
2465#endif
2466
b71d1d42 2467struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2468{
830218c1 2469 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2470 struct rt6_info *rt;
c71099ac 2471 struct fib6_table *table;
1da177e4 2472
830218c1 2473 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2474 if (!table)
c71099ac 2475 return NULL;
1da177e4 2476
5744dd9b 2477 read_lock_bh(&table->tb6_lock);
67ba4152 2478 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2479 if (dev == rt->dst.dev &&
045927ff 2480 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2481 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2482 break;
2483 }
2484 if (rt)
d8d1f30b 2485 dst_hold(&rt->dst);
5744dd9b 2486 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2487 return rt;
2488}
2489
b71d1d42 2490struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2491 struct net_device *dev,
2492 unsigned int pref)
1da177e4 2493{
86872cb5 2494 struct fib6_config cfg = {
ca254490 2495 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2496 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2497 .fc_ifindex = dev->ifindex,
2498 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2499 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2500 .fc_nlinfo.portid = 0,
5578689a 2501 .fc_nlinfo.nlh = NULL,
c346dca1 2502 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2503 };
1da177e4 2504
4e3fd7a0 2505 cfg.fc_gateway = *gwaddr;
1da177e4 2506
830218c1
DA
2507 if (!ip6_route_add(&cfg)) {
2508 struct fib6_table *table;
2509
2510 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2511 if (table)
2512 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2513 }
1da177e4 2514
1da177e4
LT
2515 return rt6_get_dflt_router(gwaddr, dev);
2516}
2517
830218c1 2518static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2519{
2520 struct rt6_info *rt;
2521
2522restart:
c71099ac 2523 read_lock_bh(&table->tb6_lock);
d8d1f30b 2524 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2525 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2526 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2527 dst_hold(&rt->dst);
c71099ac 2528 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2529 ip6_del_rt(rt);
1da177e4
LT
2530 goto restart;
2531 }
2532 }
c71099ac 2533 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2534
2535 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2536}
2537
2538void rt6_purge_dflt_routers(struct net *net)
2539{
2540 struct fib6_table *table;
2541 struct hlist_head *head;
2542 unsigned int h;
2543
2544 rcu_read_lock();
2545
2546 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2547 head = &net->ipv6.fib_table_hash[h];
2548 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2549 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2550 __rt6_purge_dflt_routers(table);
2551 }
2552 }
2553
2554 rcu_read_unlock();
1da177e4
LT
2555}
2556
5578689a
DL
2557static void rtmsg_to_fib6_config(struct net *net,
2558 struct in6_rtmsg *rtmsg,
86872cb5
TG
2559 struct fib6_config *cfg)
2560{
2561 memset(cfg, 0, sizeof(*cfg));
2562
ca254490
DA
2563 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2564 : RT6_TABLE_MAIN;
86872cb5
TG
2565 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2566 cfg->fc_metric = rtmsg->rtmsg_metric;
2567 cfg->fc_expires = rtmsg->rtmsg_info;
2568 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2569 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2570 cfg->fc_flags = rtmsg->rtmsg_flags;
2571
5578689a 2572 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2573
4e3fd7a0
AD
2574 cfg->fc_dst = rtmsg->rtmsg_dst;
2575 cfg->fc_src = rtmsg->rtmsg_src;
2576 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2577}
2578
5578689a 2579int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2580{
86872cb5 2581 struct fib6_config cfg;
1da177e4
LT
2582 struct in6_rtmsg rtmsg;
2583 int err;
2584
67ba4152 2585 switch (cmd) {
1da177e4
LT
2586 case SIOCADDRT: /* Add a route */
2587 case SIOCDELRT: /* Delete a route */
af31f412 2588 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2589 return -EPERM;
2590 err = copy_from_user(&rtmsg, arg,
2591 sizeof(struct in6_rtmsg));
2592 if (err)
2593 return -EFAULT;
86872cb5 2594
5578689a 2595 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2596
1da177e4
LT
2597 rtnl_lock();
2598 switch (cmd) {
2599 case SIOCADDRT:
86872cb5 2600 err = ip6_route_add(&cfg);
1da177e4
LT
2601 break;
2602 case SIOCDELRT:
86872cb5 2603 err = ip6_route_del(&cfg);
1da177e4
LT
2604 break;
2605 default:
2606 err = -EINVAL;
2607 }
2608 rtnl_unlock();
2609
2610 return err;
3ff50b79 2611 }
1da177e4
LT
2612
2613 return -EINVAL;
2614}
2615
2616/*
2617 * Drop the packet on the floor
2618 */
2619
d5fdd6ba 2620static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2621{
612f09e8 2622 int type;
adf30907 2623 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2624 switch (ipstats_mib_noroutes) {
2625 case IPSTATS_MIB_INNOROUTES:
0660e03f 2626 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2627 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2628 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2629 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2630 break;
2631 }
2632 /* FALLTHROUGH */
2633 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2634 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2635 ipstats_mib_noroutes);
612f09e8
YH
2636 break;
2637 }
3ffe533c 2638 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2639 kfree_skb(skb);
2640 return 0;
2641}
2642
9ce8ade0
TG
2643static int ip6_pkt_discard(struct sk_buff *skb)
2644{
612f09e8 2645 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2646}
2647
ede2059d 2648static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2649{
adf30907 2650 skb->dev = skb_dst(skb)->dev;
612f09e8 2651 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2652}
2653
9ce8ade0
TG
2654static int ip6_pkt_prohibit(struct sk_buff *skb)
2655{
612f09e8 2656 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2657}
2658
ede2059d 2659static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2660{
adf30907 2661 skb->dev = skb_dst(skb)->dev;
612f09e8 2662 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2663}
2664
1da177e4
LT
2665/*
2666 * Allocate a dst for local (unicast / anycast) address.
2667 */
2668
2669struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2670 const struct in6_addr *addr,
8f031519 2671 bool anycast)
1da177e4 2672{
ca254490 2673 u32 tb_id;
c346dca1 2674 struct net *net = dev_net(idev->dev);
5f02ce24
DA
2675 struct net_device *dev = net->loopback_dev;
2676 struct rt6_info *rt;
2677
2678 /* use L3 Master device as loopback for host routes if device
2679 * is enslaved and address is not link local or multicast
2680 */
2681 if (!rt6_need_strict(addr))
2682 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2683
2684 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2685 if (!rt)
1da177e4
LT
2686 return ERR_PTR(-ENOMEM);
2687
1da177e4
LT
2688 in6_dev_hold(idev);
2689
11d53b49 2690 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2691 rt->dst.input = ip6_input;
2692 rt->dst.output = ip6_output;
1da177e4 2693 rt->rt6i_idev = idev;
1da177e4 2694
94b5e0f9 2695 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 2696 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2697 if (anycast)
2698 rt->rt6i_flags |= RTF_ANYCAST;
2699 else
1da177e4 2700 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2701
550bab42 2702 rt->rt6i_gateway = *addr;
4e3fd7a0 2703 rt->rt6i_dst.addr = *addr;
1da177e4 2704 rt->rt6i_dst.plen = 128;
ca254490
DA
2705 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2706 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2707 rt->dst.flags |= DST_NOCACHE;
1da177e4 2708
d8d1f30b 2709 atomic_set(&rt->dst.__refcnt, 1);
1da177e4
LT
2710
2711 return rt;
2712}
2713
c3968a85
DW
2714/* remove deleted ip from prefsrc entries */
2715struct arg_dev_net_ip {
2716 struct net_device *dev;
2717 struct net *net;
2718 struct in6_addr *addr;
2719};
2720
2721static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2722{
2723 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2724 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2725 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2726
d1918542 2727 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2728 rt != net->ipv6.ip6_null_entry &&
2729 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2730 /* remove prefsrc entry */
2731 rt->rt6i_prefsrc.plen = 0;
2732 }
2733 return 0;
2734}
2735
2736void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2737{
2738 struct net *net = dev_net(ifp->idev->dev);
2739 struct arg_dev_net_ip adni = {
2740 .dev = ifp->idev->dev,
2741 .net = net,
2742 .addr = &ifp->addr,
2743 };
0c3584d5 2744 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2745}
2746
be7a010d
DJ
2747#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2748#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2749
2750/* Remove routers and update dst entries when gateway turn into host. */
2751static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2752{
2753 struct in6_addr *gateway = (struct in6_addr *)arg;
2754
2755 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2756 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2757 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2758 return -1;
2759 }
2760 return 0;
2761}
2762
2763void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2764{
2765 fib6_clean_all(net, fib6_clean_tohost, gateway);
2766}
2767
8ed67789
DL
2768struct arg_dev_net {
2769 struct net_device *dev;
2770 struct net *net;
2771};
2772
a1a22c12 2773/* called with write lock held for table with rt */
1da177e4
LT
2774static int fib6_ifdown(struct rt6_info *rt, void *arg)
2775{
bc3ef660 2776 const struct arg_dev_net *adn = arg;
2777 const struct net_device *dev = adn->dev;
8ed67789 2778
d1918542 2779 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
2780 rt != adn->net->ipv6.ip6_null_entry &&
2781 (rt->rt6i_nsiblings == 0 ||
2782 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 2783 return -1;
c159d30c 2784
1da177e4
LT
2785 return 0;
2786}
2787
f3db4851 2788void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2789{
8ed67789
DL
2790 struct arg_dev_net adn = {
2791 .dev = dev,
2792 .net = net,
2793 };
2794
0c3584d5 2795 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2796 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2797 if (dev)
2798 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2799}
2800
95c96174 2801struct rt6_mtu_change_arg {
1da177e4 2802 struct net_device *dev;
95c96174 2803 unsigned int mtu;
1da177e4
LT
2804};
2805
2806static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2807{
2808 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2809 struct inet6_dev *idev;
2810
2811 /* In IPv6 pmtu discovery is not optional,
2812 so that RTAX_MTU lock cannot disable it.
2813 We still use this lock to block changes
2814 caused by addrconf/ndisc.
2815 */
2816
2817 idev = __in6_dev_get(arg->dev);
38308473 2818 if (!idev)
1da177e4
LT
2819 return 0;
2820
2821 /* For administrative MTU increase, there is no way to discover
2822 IPv6 PMTU increase, so PMTU increase should be updated here.
2823 Since RFC 1981 doesn't include administrative MTU increase
2824 update PMTU increase is a MUST. (i.e. jumbo frame)
2825 */
2826 /*
2827 If new MTU is less than route PMTU, this new MTU will be the
2828 lowest MTU in the path, update the route PMTU to reflect PMTU
2829 decreases; if new MTU is greater than route PMTU, and the
2830 old MTU is the lowest MTU in the path, update the route PMTU
2831 to reflect the increase. In this case if the other nodes' MTU
2832 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2833 PMTU discovery.
1da177e4 2834 */
d1918542 2835 if (rt->dst.dev == arg->dev &&
fb56be83 2836 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2837 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2838 if (rt->rt6i_flags & RTF_CACHE) {
2839 /* For RTF_CACHE with rt6i_pmtu == 0
2840 * (i.e. a redirected route),
2841 * the metrics of its rt->dst.from has already
2842 * been updated.
2843 */
2844 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2845 rt->rt6i_pmtu = arg->mtu;
2846 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2847 (dst_mtu(&rt->dst) < arg->mtu &&
2848 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2849 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2850 }
566cfd8f 2851 }
1da177e4
LT
2852 return 0;
2853}
2854
95c96174 2855void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2856{
c71099ac
TG
2857 struct rt6_mtu_change_arg arg = {
2858 .dev = dev,
2859 .mtu = mtu,
2860 };
1da177e4 2861
0c3584d5 2862 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2863}
2864
ef7c79ed 2865static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2866 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2867 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2868 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2869 [RTA_PRIORITY] = { .type = NLA_U32 },
2870 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2871 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2872 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2873 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2874 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2875 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2876 [RTA_UID] = { .type = NLA_U32 },
86872cb5
TG
2877};
2878
2879static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2880 struct fib6_config *cfg)
1da177e4 2881{
86872cb5
TG
2882 struct rtmsg *rtm;
2883 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2884 unsigned int pref;
86872cb5 2885 int err;
1da177e4 2886
86872cb5
TG
2887 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2888 if (err < 0)
2889 goto errout;
1da177e4 2890
86872cb5
TG
2891 err = -EINVAL;
2892 rtm = nlmsg_data(nlh);
2893 memset(cfg, 0, sizeof(*cfg));
2894
2895 cfg->fc_table = rtm->rtm_table;
2896 cfg->fc_dst_len = rtm->rtm_dst_len;
2897 cfg->fc_src_len = rtm->rtm_src_len;
2898 cfg->fc_flags = RTF_UP;
2899 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2900 cfg->fc_type = rtm->rtm_type;
86872cb5 2901
ef2c7d7b
ND
2902 if (rtm->rtm_type == RTN_UNREACHABLE ||
2903 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2904 rtm->rtm_type == RTN_PROHIBIT ||
2905 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2906 cfg->fc_flags |= RTF_REJECT;
2907
ab79ad14
2908 if (rtm->rtm_type == RTN_LOCAL)
2909 cfg->fc_flags |= RTF_LOCAL;
2910
1f56a01f
MKL
2911 if (rtm->rtm_flags & RTM_F_CLONED)
2912 cfg->fc_flags |= RTF_CACHE;
2913
15e47304 2914 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2915 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2916 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2917
2918 if (tb[RTA_GATEWAY]) {
67b61f6c 2919 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2920 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2921 }
86872cb5
TG
2922
2923 if (tb[RTA_DST]) {
2924 int plen = (rtm->rtm_dst_len + 7) >> 3;
2925
2926 if (nla_len(tb[RTA_DST]) < plen)
2927 goto errout;
2928
2929 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 2930 }
86872cb5
TG
2931
2932 if (tb[RTA_SRC]) {
2933 int plen = (rtm->rtm_src_len + 7) >> 3;
2934
2935 if (nla_len(tb[RTA_SRC]) < plen)
2936 goto errout;
2937
2938 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 2939 }
86872cb5 2940
c3968a85 2941 if (tb[RTA_PREFSRC])
67b61f6c 2942 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 2943
86872cb5
TG
2944 if (tb[RTA_OIF])
2945 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2946
2947 if (tb[RTA_PRIORITY])
2948 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2949
2950 if (tb[RTA_METRICS]) {
2951 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2952 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 2953 }
86872cb5
TG
2954
2955 if (tb[RTA_TABLE])
2956 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2957
51ebd318
ND
2958 if (tb[RTA_MULTIPATH]) {
2959 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2960 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
2961
2962 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2963 cfg->fc_mp_len);
2964 if (err < 0)
2965 goto errout;
51ebd318
ND
2966 }
2967
c78ba6d6
LR
2968 if (tb[RTA_PREF]) {
2969 pref = nla_get_u8(tb[RTA_PREF]);
2970 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2971 pref != ICMPV6_ROUTER_PREF_HIGH)
2972 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2973 cfg->fc_flags |= RTF_PREF(pref);
2974 }
2975
19e42e45
RP
2976 if (tb[RTA_ENCAP])
2977 cfg->fc_encap = tb[RTA_ENCAP];
2978
9ed59592 2979 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
2980 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2981
9ed59592
DA
2982 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2983 if (err < 0)
2984 goto errout;
2985 }
2986
32bc201e
XL
2987 if (tb[RTA_EXPIRES]) {
2988 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2989
2990 if (addrconf_finite_timeout(timeout)) {
2991 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2992 cfg->fc_flags |= RTF_EXPIRES;
2993 }
2994 }
2995
86872cb5
TG
2996 err = 0;
2997errout:
2998 return err;
1da177e4
LT
2999}
3000
6b9ea5a6
RP
3001struct rt6_nh {
3002 struct rt6_info *rt6_info;
3003 struct fib6_config r_cfg;
3004 struct mx6_config mxc;
3005 struct list_head next;
3006};
3007
3008static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3009{
3010 struct rt6_nh *nh;
3011
3012 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3013 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3014 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3015 nh->r_cfg.fc_ifindex);
3016 }
3017}
3018
3019static int ip6_route_info_append(struct list_head *rt6_nh_list,
3020 struct rt6_info *rt, struct fib6_config *r_cfg)
3021{
3022 struct rt6_nh *nh;
3023 struct rt6_info *rtnh;
3024 int err = -EEXIST;
3025
3026 list_for_each_entry(nh, rt6_nh_list, next) {
3027 /* check if rt6_info already exists */
3028 rtnh = nh->rt6_info;
3029
3030 if (rtnh->dst.dev == rt->dst.dev &&
3031 rtnh->rt6i_idev == rt->rt6i_idev &&
3032 ipv6_addr_equal(&rtnh->rt6i_gateway,
3033 &rt->rt6i_gateway))
3034 return err;
3035 }
3036
3037 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3038 if (!nh)
3039 return -ENOMEM;
3040 nh->rt6_info = rt;
3041 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3042 if (err) {
3043 kfree(nh);
3044 return err;
3045 }
3046 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3047 list_add_tail(&nh->next, rt6_nh_list);
3048
3049 return 0;
3050}
3051
3b1137fe
DA
3052static void ip6_route_mpath_notify(struct rt6_info *rt,
3053 struct rt6_info *rt_last,
3054 struct nl_info *info,
3055 __u16 nlflags)
3056{
3057 /* if this is an APPEND route, then rt points to the first route
3058 * inserted and rt_last points to last route inserted. Userspace
3059 * wants a consistent dump of the route which starts at the first
3060 * nexthop. Since sibling routes are always added at the end of
3061 * the list, find the first sibling of the last route appended
3062 */
3063 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3064 rt = list_first_entry(&rt_last->rt6i_siblings,
3065 struct rt6_info,
3066 rt6i_siblings);
3067 }
3068
3069 if (rt)
3070 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3071}
3072
6b9ea5a6 3073static int ip6_route_multipath_add(struct fib6_config *cfg)
51ebd318 3074{
3b1137fe
DA
3075 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3076 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3077 struct fib6_config r_cfg;
3078 struct rtnexthop *rtnh;
6b9ea5a6
RP
3079 struct rt6_info *rt;
3080 struct rt6_nh *err_nh;
3081 struct rt6_nh *nh, *nh_safe;
3b1137fe 3082 __u16 nlflags;
51ebd318
ND
3083 int remaining;
3084 int attrlen;
6b9ea5a6
RP
3085 int err = 1;
3086 int nhn = 0;
3087 int replace = (cfg->fc_nlinfo.nlh &&
3088 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3089 LIST_HEAD(rt6_nh_list);
51ebd318 3090
3b1137fe
DA
3091 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3092 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3093 nlflags |= NLM_F_APPEND;
3094
35f1b4e9 3095 remaining = cfg->fc_mp_len;
51ebd318 3096 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3097
6b9ea5a6
RP
3098 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3099 * rt6_info structs per nexthop
3100 */
51ebd318
ND
3101 while (rtnh_ok(rtnh, remaining)) {
3102 memcpy(&r_cfg, cfg, sizeof(*cfg));
3103 if (rtnh->rtnh_ifindex)
3104 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3105
3106 attrlen = rtnh_attrlen(rtnh);
3107 if (attrlen > 0) {
3108 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3109
3110 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3111 if (nla) {
67b61f6c 3112 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3113 r_cfg.fc_flags |= RTF_GATEWAY;
3114 }
19e42e45
RP
3115 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3116 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3117 if (nla)
3118 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3119 }
6b9ea5a6 3120
8c5b83f0
RP
3121 rt = ip6_route_info_create(&r_cfg);
3122 if (IS_ERR(rt)) {
3123 err = PTR_ERR(rt);
3124 rt = NULL;
6b9ea5a6 3125 goto cleanup;
8c5b83f0 3126 }
6b9ea5a6
RP
3127
3128 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3129 if (err) {
6b9ea5a6
RP
3130 dst_free(&rt->dst);
3131 goto cleanup;
3132 }
3133
3134 rtnh = rtnh_next(rtnh, &remaining);
3135 }
3136
3b1137fe
DA
3137 /* for add and replace send one notification with all nexthops.
3138 * Skip the notification in fib6_add_rt2node and send one with
3139 * the full route when done
3140 */
3141 info->skip_notify = 1;
3142
6b9ea5a6
RP
3143 err_nh = NULL;
3144 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe
DA
3145 rt_last = nh->rt6_info;
3146 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc);
3147 /* save reference to first route for notification */
3148 if (!rt_notif && !err)
3149 rt_notif = nh->rt6_info;
3150
6b9ea5a6
RP
3151 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3152 nh->rt6_info = NULL;
3153 if (err) {
3154 if (replace && nhn)
3155 ip6_print_replace_route_err(&rt6_nh_list);
3156 err_nh = nh;
3157 goto add_errout;
51ebd318 3158 }
6b9ea5a6 3159
1a72418b 3160 /* Because each route is added like a single route we remove
27596472
MK
3161 * these flags after the first nexthop: if there is a collision,
3162 * we have already failed to add the first nexthop:
3163 * fib6_add_rt2node() has rejected it; when replacing, old
3164 * nexthops have been replaced by first new, the rest should
3165 * be added to it.
1a72418b 3166 */
27596472
MK
3167 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3168 NLM_F_REPLACE);
6b9ea5a6
RP
3169 nhn++;
3170 }
3171
3b1137fe
DA
3172 /* success ... tell user about new route */
3173 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3174 goto cleanup;
3175
3176add_errout:
3b1137fe
DA
3177 /* send notification for routes that were added so that
3178 * the delete notifications sent by ip6_route_del are
3179 * coherent
3180 */
3181 if (rt_notif)
3182 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3183
6b9ea5a6
RP
3184 /* Delete routes that were already added */
3185 list_for_each_entry(nh, &rt6_nh_list, next) {
3186 if (err_nh == nh)
3187 break;
3188 ip6_route_del(&nh->r_cfg);
3189 }
3190
3191cleanup:
3192 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3193 if (nh->rt6_info)
3194 dst_free(&nh->rt6_info->dst);
52fe51f8 3195 kfree(nh->mxc.mx);
6b9ea5a6
RP
3196 list_del(&nh->next);
3197 kfree(nh);
3198 }
3199
3200 return err;
3201}
3202
3203static int ip6_route_multipath_del(struct fib6_config *cfg)
3204{
3205 struct fib6_config r_cfg;
3206 struct rtnexthop *rtnh;
3207 int remaining;
3208 int attrlen;
3209 int err = 1, last_err = 0;
3210
3211 remaining = cfg->fc_mp_len;
3212 rtnh = (struct rtnexthop *)cfg->fc_mp;
3213
3214 /* Parse a Multipath Entry */
3215 while (rtnh_ok(rtnh, remaining)) {
3216 memcpy(&r_cfg, cfg, sizeof(*cfg));
3217 if (rtnh->rtnh_ifindex)
3218 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3219
3220 attrlen = rtnh_attrlen(rtnh);
3221 if (attrlen > 0) {
3222 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3223
3224 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3225 if (nla) {
3226 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3227 r_cfg.fc_flags |= RTF_GATEWAY;
3228 }
3229 }
3230 err = ip6_route_del(&r_cfg);
3231 if (err)
3232 last_err = err;
3233
51ebd318
ND
3234 rtnh = rtnh_next(rtnh, &remaining);
3235 }
3236
3237 return last_err;
3238}
3239
67ba4152 3240static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3241{
86872cb5
TG
3242 struct fib6_config cfg;
3243 int err;
1da177e4 3244
86872cb5
TG
3245 err = rtm_to_fib6_config(skb, nlh, &cfg);
3246 if (err < 0)
3247 return err;
3248
51ebd318 3249 if (cfg.fc_mp)
6b9ea5a6 3250 return ip6_route_multipath_del(&cfg);
0ae81335
DA
3251 else {
3252 cfg.fc_delete_all_nh = 1;
51ebd318 3253 return ip6_route_del(&cfg);
0ae81335 3254 }
1da177e4
LT
3255}
3256
67ba4152 3257static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
1da177e4 3258{
86872cb5
TG
3259 struct fib6_config cfg;
3260 int err;
1da177e4 3261
86872cb5
TG
3262 err = rtm_to_fib6_config(skb, nlh, &cfg);
3263 if (err < 0)
3264 return err;
3265
51ebd318 3266 if (cfg.fc_mp)
6b9ea5a6 3267 return ip6_route_multipath_add(&cfg);
51ebd318
ND
3268 else
3269 return ip6_route_add(&cfg);
1da177e4
LT
3270}
3271
beb1afac 3272static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3273{
beb1afac
DA
3274 int nexthop_len = 0;
3275
3276 if (rt->rt6i_nsiblings) {
3277 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3278 + NLA_ALIGN(sizeof(struct rtnexthop))
3279 + nla_total_size(16) /* RTA_GATEWAY */
3280 + nla_total_size(4) /* RTA_OIF */
3281 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3282
3283 nexthop_len *= rt->rt6i_nsiblings;
3284 }
3285
339bf98f
TG
3286 return NLMSG_ALIGN(sizeof(struct rtmsg))
3287 + nla_total_size(16) /* RTA_SRC */
3288 + nla_total_size(16) /* RTA_DST */
3289 + nla_total_size(16) /* RTA_GATEWAY */
3290 + nla_total_size(16) /* RTA_PREFSRC */
3291 + nla_total_size(4) /* RTA_TABLE */
3292 + nla_total_size(4) /* RTA_IIF */
3293 + nla_total_size(4) /* RTA_OIF */
3294 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3295 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3296 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3297 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3298 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3299 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3300 + nexthop_len;
3301}
3302
3303static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
3304 unsigned int *flags)
3305{
3306 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3307 *flags |= RTNH_F_LINKDOWN;
3308 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3309 *flags |= RTNH_F_DEAD;
3310 }
3311
3312 if (rt->rt6i_flags & RTF_GATEWAY) {
3313 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3314 goto nla_put_failure;
3315 }
3316
3317 if (rt->dst.dev &&
3318 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3319 goto nla_put_failure;
3320
3321 if (rt->dst.lwtstate &&
3322 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3323 goto nla_put_failure;
3324
3325 return 0;
3326
3327nla_put_failure:
3328 return -EMSGSIZE;
3329}
3330
3331static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3332{
3333 struct rtnexthop *rtnh;
3334 unsigned int flags = 0;
3335
3336 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3337 if (!rtnh)
3338 goto nla_put_failure;
3339
3340 rtnh->rtnh_hops = 0;
3341 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3342
3343 if (rt6_nexthop_info(skb, rt, &flags) < 0)
3344 goto nla_put_failure;
3345
3346 rtnh->rtnh_flags = flags;
3347
3348 /* length of rtnetlink header + attributes */
3349 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3350
3351 return 0;
3352
3353nla_put_failure:
3354 return -EMSGSIZE;
339bf98f
TG
3355}
3356
191cd582
BH
3357static int rt6_fill_node(struct net *net,
3358 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3359 struct in6_addr *dst, struct in6_addr *src,
15e47304 3360 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3361 unsigned int flags)
1da177e4 3362{
4b32b5ad 3363 u32 metrics[RTAX_MAX];
1da177e4 3364 struct rtmsg *rtm;
2d7202bf 3365 struct nlmsghdr *nlh;
e3703b3d 3366 long expires;
9e762a4a 3367 u32 table;
1da177e4 3368
15e47304 3369 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3370 if (!nlh)
26932566 3371 return -EMSGSIZE;
2d7202bf
TG
3372
3373 rtm = nlmsg_data(nlh);
1da177e4
LT
3374 rtm->rtm_family = AF_INET6;
3375 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3376 rtm->rtm_src_len = rt->rt6i_src.plen;
3377 rtm->rtm_tos = 0;
c71099ac 3378 if (rt->rt6i_table)
9e762a4a 3379 table = rt->rt6i_table->tb6_id;
c71099ac 3380 else
9e762a4a
PM
3381 table = RT6_TABLE_UNSPEC;
3382 rtm->rtm_table = table;
c78679e8
DM
3383 if (nla_put_u32(skb, RTA_TABLE, table))
3384 goto nla_put_failure;
ef2c7d7b
ND
3385 if (rt->rt6i_flags & RTF_REJECT) {
3386 switch (rt->dst.error) {
3387 case -EINVAL:
3388 rtm->rtm_type = RTN_BLACKHOLE;
3389 break;
3390 case -EACCES:
3391 rtm->rtm_type = RTN_PROHIBIT;
3392 break;
b4949ab2
ND
3393 case -EAGAIN:
3394 rtm->rtm_type = RTN_THROW;
3395 break;
ef2c7d7b
ND
3396 default:
3397 rtm->rtm_type = RTN_UNREACHABLE;
3398 break;
3399 }
3400 }
38308473 3401 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3402 rtm->rtm_type = RTN_LOCAL;
d1918542 3403 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3404 rtm->rtm_type = RTN_LOCAL;
3405 else
3406 rtm->rtm_type = RTN_UNICAST;
3407 rtm->rtm_flags = 0;
3408 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3409 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3410 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3411 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3412 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3413 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3414 rtm->rtm_protocol = RTPROT_RA;
3415 else
3416 rtm->rtm_protocol = RTPROT_KERNEL;
3417 }
1da177e4 3418
38308473 3419 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3420 rtm->rtm_flags |= RTM_F_CLONED;
3421
3422 if (dst) {
930345ea 3423 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3424 goto nla_put_failure;
1ab1457c 3425 rtm->rtm_dst_len = 128;
1da177e4 3426 } else if (rtm->rtm_dst_len)
930345ea 3427 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3428 goto nla_put_failure;
1da177e4
LT
3429#ifdef CONFIG_IPV6_SUBTREES
3430 if (src) {
930345ea 3431 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3432 goto nla_put_failure;
1ab1457c 3433 rtm->rtm_src_len = 128;
c78679e8 3434 } else if (rtm->rtm_src_len &&
930345ea 3435 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3436 goto nla_put_failure;
1da177e4 3437#endif
7bc570c8
YH
3438 if (iif) {
3439#ifdef CONFIG_IPV6_MROUTE
3440 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3441 int err = ip6mr_get_route(net, skb, rtm, portid);
3442
3443 if (err == 0)
3444 return 0;
3445 if (err < 0)
3446 goto nla_put_failure;
7bc570c8
YH
3447 } else
3448#endif
c78679e8
DM
3449 if (nla_put_u32(skb, RTA_IIF, iif))
3450 goto nla_put_failure;
7bc570c8 3451 } else if (dst) {
1da177e4 3452 struct in6_addr saddr_buf;
c78679e8 3453 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3454 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3455 goto nla_put_failure;
1da177e4 3456 }
2d7202bf 3457
c3968a85
DW
3458 if (rt->rt6i_prefsrc.plen) {
3459 struct in6_addr saddr_buf;
4e3fd7a0 3460 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3461 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3462 goto nla_put_failure;
c3968a85
DW
3463 }
3464
4b32b5ad
MKL
3465 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3466 if (rt->rt6i_pmtu)
3467 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3468 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3469 goto nla_put_failure;
3470
c78679e8
DM
3471 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3472 goto nla_put_failure;
8253947e 3473
beb1afac
DA
3474 /* For multipath routes, walk the siblings list and add
3475 * each as a nexthop within RTA_MULTIPATH.
3476 */
3477 if (rt->rt6i_nsiblings) {
3478 struct rt6_info *sibling, *next_sibling;
3479 struct nlattr *mp;
3480
3481 mp = nla_nest_start(skb, RTA_MULTIPATH);
3482 if (!mp)
3483 goto nla_put_failure;
3484
3485 if (rt6_add_nexthop(skb, rt) < 0)
3486 goto nla_put_failure;
3487
3488 list_for_each_entry_safe(sibling, next_sibling,
3489 &rt->rt6i_siblings, rt6i_siblings) {
3490 if (rt6_add_nexthop(skb, sibling) < 0)
3491 goto nla_put_failure;
3492 }
3493
3494 nla_nest_end(skb, mp);
3495 } else {
3496 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags) < 0)
3497 goto nla_put_failure;
3498 }
3499
8253947e 3500 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3501
87a50699 3502 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3503 goto nla_put_failure;
2d7202bf 3504
c78ba6d6
LR
3505 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3506 goto nla_put_failure;
3507
19e42e45 3508
053c095a
JB
3509 nlmsg_end(skb, nlh);
3510 return 0;
2d7202bf
TG
3511
3512nla_put_failure:
26932566
PM
3513 nlmsg_cancel(skb, nlh);
3514 return -EMSGSIZE;
1da177e4
LT
3515}
3516
1b43af54 3517int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3518{
3519 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
3520 struct net *net = arg->net;
3521
3522 if (rt == net->ipv6.ip6_null_entry)
3523 return 0;
1da177e4 3524
2d7202bf
TG
3525 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3526 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
3527
3528 /* user wants prefix routes only */
3529 if (rtm->rtm_flags & RTM_F_PREFIX &&
3530 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3531 /* success since this is not a prefix route */
3532 return 1;
3533 }
3534 }
1da177e4 3535
1f17e2f2 3536 return rt6_fill_node(net,
191cd582 3537 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3538 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 3539 NLM_F_MULTI);
1da177e4
LT
3540}
3541
67ba4152 3542static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
1da177e4 3543{
3b1e0a65 3544 struct net *net = sock_net(in_skb->sk);
ab364a6f
TG
3545 struct nlattr *tb[RTA_MAX+1];
3546 struct rt6_info *rt;
1da177e4 3547 struct sk_buff *skb;
ab364a6f 3548 struct rtmsg *rtm;
4c9483b2 3549 struct flowi6 fl6;
72331bc0 3550 int err, iif = 0, oif = 0;
1da177e4 3551
ab364a6f
TG
3552 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3553 if (err < 0)
3554 goto errout;
1da177e4 3555
ab364a6f 3556 err = -EINVAL;
4c9483b2 3557 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3558 rtm = nlmsg_data(nlh);
3559 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
1da177e4 3560
ab364a6f
TG
3561 if (tb[RTA_SRC]) {
3562 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3563 goto errout;
3564
4e3fd7a0 3565 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3566 }
3567
3568 if (tb[RTA_DST]) {
3569 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3570 goto errout;
3571
4e3fd7a0 3572 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3573 }
3574
3575 if (tb[RTA_IIF])
3576 iif = nla_get_u32(tb[RTA_IIF]);
3577
3578 if (tb[RTA_OIF])
72331bc0 3579 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3580
2e47b291
LC
3581 if (tb[RTA_MARK])
3582 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3583
622ec2c9
LC
3584 if (tb[RTA_UID])
3585 fl6.flowi6_uid = make_kuid(current_user_ns(),
3586 nla_get_u32(tb[RTA_UID]));
3587 else
3588 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3589
1da177e4
LT
3590 if (iif) {
3591 struct net_device *dev;
72331bc0
SL
3592 int flags = 0;
3593
5578689a 3594 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3595 if (!dev) {
3596 err = -ENODEV;
ab364a6f 3597 goto errout;
1da177e4 3598 }
72331bc0
SL
3599
3600 fl6.flowi6_iif = iif;
3601
3602 if (!ipv6_addr_any(&fl6.saddr))
3603 flags |= RT6_LOOKUP_F_HAS_SADDR;
3604
3605 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3606 flags);
3607 } else {
3608 fl6.flowi6_oif = oif;
3609
3610 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
1da177e4
LT
3611 }
3612
ab364a6f 3613 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3614 if (!skb) {
94e187c0 3615 ip6_rt_put(rt);
ab364a6f
TG
3616 err = -ENOBUFS;
3617 goto errout;
3618 }
1da177e4 3619
d8d1f30b 3620 skb_dst_set(skb, &rt->dst);
1da177e4 3621
4c9483b2 3622 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
15e47304 3623 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
f8cfe2ce 3624 nlh->nlmsg_seq, 0);
1da177e4 3625 if (err < 0) {
ab364a6f
TG
3626 kfree_skb(skb);
3627 goto errout;
1da177e4
LT
3628 }
3629
15e47304 3630 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3631errout:
1da177e4 3632 return err;
1da177e4
LT
3633}
3634
37a1d361
RP
3635void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3636 unsigned int nlm_flags)
1da177e4
LT
3637{
3638 struct sk_buff *skb;
5578689a 3639 struct net *net = info->nl_net;
528c4ceb
DL
3640 u32 seq;
3641 int err;
3642
3643 err = -ENOBUFS;
38308473 3644 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3645
19e42e45 3646 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3647 if (!skb)
21713ebc
TG
3648 goto errout;
3649
191cd582 3650 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 3651 event, info->portid, seq, nlm_flags);
26932566
PM
3652 if (err < 0) {
3653 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3654 WARN_ON(err == -EMSGSIZE);
3655 kfree_skb(skb);
3656 goto errout;
3657 }
15e47304 3658 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3659 info->nlh, gfp_any());
3660 return;
21713ebc
TG
3661errout:
3662 if (err < 0)
5578689a 3663 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3664}
3665
8ed67789 3666static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3667 unsigned long event, void *ptr)
8ed67789 3668{
351638e7 3669 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3670 struct net *net = dev_net(dev);
8ed67789
DL
3671
3672 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
d8d1f30b 3673 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3674 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3675#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3676 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3677 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3678 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789
DL
3679 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3680#endif
3681 }
3682
3683 return NOTIFY_OK;
3684}
3685
1da177e4
LT
3686/*
3687 * /proc
3688 */
3689
3690#ifdef CONFIG_PROC_FS
3691
33120b30
AD
3692static const struct file_operations ipv6_route_proc_fops = {
3693 .owner = THIS_MODULE,
3694 .open = ipv6_route_open,
3695 .read = seq_read,
3696 .llseek = seq_lseek,
8d2ca1d7 3697 .release = seq_release_net,
33120b30
AD
3698};
3699
1da177e4
LT
3700static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3701{
69ddb805 3702 struct net *net = (struct net *)seq->private;
1da177e4 3703 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3704 net->ipv6.rt6_stats->fib_nodes,
3705 net->ipv6.rt6_stats->fib_route_nodes,
3706 net->ipv6.rt6_stats->fib_rt_alloc,
3707 net->ipv6.rt6_stats->fib_rt_entries,
3708 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3709 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3710 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3711
3712 return 0;
3713}
3714
3715static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3716{
de05c557 3717 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3718}
3719
9a32144e 3720static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3721 .owner = THIS_MODULE,
3722 .open = rt6_stats_seq_open,
3723 .read = seq_read,
3724 .llseek = seq_lseek,
b6fcbdb4 3725 .release = single_release_net,
1da177e4
LT
3726};
3727#endif /* CONFIG_PROC_FS */
3728
3729#ifdef CONFIG_SYSCTL
3730
1da177e4 3731static
fe2c6338 3732int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3733 void __user *buffer, size_t *lenp, loff_t *ppos)
3734{
c486da34
LAG
3735 struct net *net;
3736 int delay;
3737 if (!write)
1da177e4 3738 return -EINVAL;
c486da34
LAG
3739
3740 net = (struct net *)ctl->extra1;
3741 delay = net->ipv6.sysctl.flush_delay;
3742 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3743 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3744 return 0;
1da177e4
LT
3745}
3746
fe2c6338 3747struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3748 {
1da177e4 3749 .procname = "flush",
4990509f 3750 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3751 .maxlen = sizeof(int),
89c8b3a1 3752 .mode = 0200,
6d9f239a 3753 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3754 },
3755 {
1da177e4 3756 .procname = "gc_thresh",
9a7ec3a9 3757 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3758 .maxlen = sizeof(int),
3759 .mode = 0644,
6d9f239a 3760 .proc_handler = proc_dointvec,
1da177e4
LT
3761 },
3762 {
1da177e4 3763 .procname = "max_size",
4990509f 3764 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3765 .maxlen = sizeof(int),
3766 .mode = 0644,
6d9f239a 3767 .proc_handler = proc_dointvec,
1da177e4
LT
3768 },
3769 {
1da177e4 3770 .procname = "gc_min_interval",
4990509f 3771 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3772 .maxlen = sizeof(int),
3773 .mode = 0644,
6d9f239a 3774 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3775 },
3776 {
1da177e4 3777 .procname = "gc_timeout",
4990509f 3778 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3779 .maxlen = sizeof(int),
3780 .mode = 0644,
6d9f239a 3781 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3782 },
3783 {
1da177e4 3784 .procname = "gc_interval",
4990509f 3785 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3786 .maxlen = sizeof(int),
3787 .mode = 0644,
6d9f239a 3788 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3789 },
3790 {
1da177e4 3791 .procname = "gc_elasticity",
4990509f 3792 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3793 .maxlen = sizeof(int),
3794 .mode = 0644,
f3d3f616 3795 .proc_handler = proc_dointvec,
1da177e4
LT
3796 },
3797 {
1da177e4 3798 .procname = "mtu_expires",
4990509f 3799 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3800 .maxlen = sizeof(int),
3801 .mode = 0644,
6d9f239a 3802 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3803 },
3804 {
1da177e4 3805 .procname = "min_adv_mss",
4990509f 3806 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3807 .maxlen = sizeof(int),
3808 .mode = 0644,
f3d3f616 3809 .proc_handler = proc_dointvec,
1da177e4
LT
3810 },
3811 {
1da177e4 3812 .procname = "gc_min_interval_ms",
4990509f 3813 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3814 .maxlen = sizeof(int),
3815 .mode = 0644,
6d9f239a 3816 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3817 },
f8572d8f 3818 { }
1da177e4
LT
3819};
3820
2c8c1e72 3821struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3822{
3823 struct ctl_table *table;
3824
3825 table = kmemdup(ipv6_route_table_template,
3826 sizeof(ipv6_route_table_template),
3827 GFP_KERNEL);
5ee09105
YH
3828
3829 if (table) {
3830 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3831 table[0].extra1 = net;
86393e52 3832 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3833 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3834 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3835 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3836 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3837 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3838 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3839 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3840 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3841
3842 /* Don't export sysctls to unprivileged users */
3843 if (net->user_ns != &init_user_ns)
3844 table[0].procname = NULL;
5ee09105
YH
3845 }
3846
760f2d01
DL
3847 return table;
3848}
1da177e4
LT
3849#endif
3850
2c8c1e72 3851static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3852{
633d424b 3853 int ret = -ENOMEM;
8ed67789 3854
86393e52
AD
3855 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3856 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3857
fc66f95c
ED
3858 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3859 goto out_ip6_dst_ops;
3860
8ed67789
DL
3861 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3862 sizeof(*net->ipv6.ip6_null_entry),
3863 GFP_KERNEL);
3864 if (!net->ipv6.ip6_null_entry)
fc66f95c 3865 goto out_ip6_dst_entries;
d8d1f30b 3866 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3867 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3868 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3869 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3870 ip6_template_metrics, true);
8ed67789
DL
3871
3872#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3873 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3874 sizeof(*net->ipv6.ip6_prohibit_entry),
3875 GFP_KERNEL);
68fffc67
PZ
3876 if (!net->ipv6.ip6_prohibit_entry)
3877 goto out_ip6_null_entry;
d8d1f30b 3878 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 3879 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 3880 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3881 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3882 ip6_template_metrics, true);
8ed67789
DL
3883
3884 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3885 sizeof(*net->ipv6.ip6_blk_hole_entry),
3886 GFP_KERNEL);
68fffc67
PZ
3887 if (!net->ipv6.ip6_blk_hole_entry)
3888 goto out_ip6_prohibit_entry;
d8d1f30b 3889 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 3890 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 3891 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3892 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3893 ip6_template_metrics, true);
8ed67789
DL
3894#endif
3895
b339a47c
PZ
3896 net->ipv6.sysctl.flush_delay = 0;
3897 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3898 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3899 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3900 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3901 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3902 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3903 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3904
6891a346
BT
3905 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3906
8ed67789
DL
3907 ret = 0;
3908out:
3909 return ret;
f2fc6a54 3910
68fffc67
PZ
3911#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3912out_ip6_prohibit_entry:
3913 kfree(net->ipv6.ip6_prohibit_entry);
3914out_ip6_null_entry:
3915 kfree(net->ipv6.ip6_null_entry);
3916#endif
fc66f95c
ED
3917out_ip6_dst_entries:
3918 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 3919out_ip6_dst_ops:
f2fc6a54 3920 goto out;
cdb18761
DL
3921}
3922
2c8c1e72 3923static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 3924{
8ed67789
DL
3925 kfree(net->ipv6.ip6_null_entry);
3926#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3927 kfree(net->ipv6.ip6_prohibit_entry);
3928 kfree(net->ipv6.ip6_blk_hole_entry);
3929#endif
41bb78b4 3930 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
3931}
3932
d189634e
TG
3933static int __net_init ip6_route_net_init_late(struct net *net)
3934{
3935#ifdef CONFIG_PROC_FS
d4beaa66
G
3936 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3937 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
3938#endif
3939 return 0;
3940}
3941
3942static void __net_exit ip6_route_net_exit_late(struct net *net)
3943{
3944#ifdef CONFIG_PROC_FS
ece31ffd
G
3945 remove_proc_entry("ipv6_route", net->proc_net);
3946 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
3947#endif
3948}
3949
cdb18761
DL
3950static struct pernet_operations ip6_route_net_ops = {
3951 .init = ip6_route_net_init,
3952 .exit = ip6_route_net_exit,
3953};
3954
c3426b47
DM
3955static int __net_init ipv6_inetpeer_init(struct net *net)
3956{
3957 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3958
3959 if (!bp)
3960 return -ENOMEM;
3961 inet_peer_base_init(bp);
3962 net->ipv6.peers = bp;
3963 return 0;
3964}
3965
3966static void __net_exit ipv6_inetpeer_exit(struct net *net)
3967{
3968 struct inet_peer_base *bp = net->ipv6.peers;
3969
3970 net->ipv6.peers = NULL;
56a6b248 3971 inetpeer_invalidate_tree(bp);
c3426b47
DM
3972 kfree(bp);
3973}
3974
2b823f72 3975static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
3976 .init = ipv6_inetpeer_init,
3977 .exit = ipv6_inetpeer_exit,
3978};
3979
d189634e
TG
3980static struct pernet_operations ip6_route_net_late_ops = {
3981 .init = ip6_route_net_init_late,
3982 .exit = ip6_route_net_exit_late,
3983};
3984
8ed67789
DL
3985static struct notifier_block ip6_route_dev_notifier = {
3986 .notifier_call = ip6_route_dev_notify,
3987 .priority = 0,
3988};
3989
433d49c3 3990int __init ip6_route_init(void)
1da177e4 3991{
433d49c3 3992 int ret;
8d0b94af 3993 int cpu;
433d49c3 3994
9a7ec3a9
DL
3995 ret = -ENOMEM;
3996 ip6_dst_ops_template.kmem_cachep =
e5d679f3 3997 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 3998 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 3999 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4000 goto out;
14e50e57 4001
fc66f95c 4002 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4003 if (ret)
bdb3289f 4004 goto out_kmem_cache;
bdb3289f 4005
c3426b47
DM
4006 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4007 if (ret)
e8803b6c 4008 goto out_dst_entries;
2a0c451a 4009
7e52b33b
DM
4010 ret = register_pernet_subsys(&ip6_route_net_ops);
4011 if (ret)
4012 goto out_register_inetpeer;
c3426b47 4013
5dc121e9
AE
4014 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4015
8ed67789
DL
4016 /* Registering of the loopback is done before this portion of code,
4017 * the loopback reference in rt6_info will not be taken, do it
4018 * manually for init_net */
d8d1f30b 4019 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
4020 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4021 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4022 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
8ed67789 4023 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
d8d1f30b 4024 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
8ed67789
DL
4025 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4026 #endif
e8803b6c 4027 ret = fib6_init();
433d49c3 4028 if (ret)
8ed67789 4029 goto out_register_subsys;
433d49c3 4030
433d49c3
DL
4031 ret = xfrm6_init();
4032 if (ret)
e8803b6c 4033 goto out_fib6_init;
c35b7e72 4034
433d49c3
DL
4035 ret = fib6_rules_init();
4036 if (ret)
4037 goto xfrm6_init;
7e5449c2 4038
d189634e
TG
4039 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4040 if (ret)
4041 goto fib6_rules_init;
4042
433d49c3 4043 ret = -ENOBUFS;
c7ac8679
GR
4044 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4045 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4046 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 4047 goto out_register_late_subsys;
c127ea2c 4048
8ed67789 4049 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4050 if (ret)
d189634e 4051 goto out_register_late_subsys;
8ed67789 4052
8d0b94af
MKL
4053 for_each_possible_cpu(cpu) {
4054 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4055
4056 INIT_LIST_HEAD(&ul->head);
4057 spin_lock_init(&ul->lock);
4058 }
4059
433d49c3
DL
4060out:
4061 return ret;
4062
d189634e
TG
4063out_register_late_subsys:
4064 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4065fib6_rules_init:
433d49c3
DL
4066 fib6_rules_cleanup();
4067xfrm6_init:
433d49c3 4068 xfrm6_fini();
2a0c451a
TG
4069out_fib6_init:
4070 fib6_gc_cleanup();
8ed67789
DL
4071out_register_subsys:
4072 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4073out_register_inetpeer:
4074 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4075out_dst_entries:
4076 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4077out_kmem_cache:
f2fc6a54 4078 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4079 goto out;
1da177e4
LT
4080}
4081
4082void ip6_route_cleanup(void)
4083{
8ed67789 4084 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4085 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4086 fib6_rules_cleanup();
1da177e4 4087 xfrm6_fini();
1da177e4 4088 fib6_gc_cleanup();
c3426b47 4089 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4090 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4091 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4092 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4093}