]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - net/ipv6/route.c
Merge tag 'socfpga_updates_for_v4.20_part3' of git://git.kernel.org/pub/scm/linux...
[mirror_ubuntu-focal-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 368 struct fib6_info *from;
8d0b94af 369 struct inet6_dev *idev;
1da177e4 370
4b32b5ad 371 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
38308473 375 if (idev) {
1da177e4
LT
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
1ab1457c 378 }
1716a961 379
a68886a6
DA
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
93531c67 383 fib6_info_release(from);
a68886a6 384 rcu_read_unlock();
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
3b290a31
DA
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
51ebd318 433{
8d1c802b 434 struct fib6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8d1c802b
DA
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4 470{
8d1c802b 471 struct fib6_info *sprt;
1da177e4 472
5e670d84
DA
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 475 return rt;
dd3abc4e 476
8fb11a9a 477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 479
5e670d84 480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
481 continue;
482
dd3abc4e 483 if (oif) {
1da177e4
LT
484 if (dev->ifindex == oif)
485 return sprt;
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
eea68cd3
DA
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
8067bb8c 495
421842ed 496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
497}
498
27097255 499#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 514 dev_put(work->dev);
662f5533 515 kfree(work);
c2f17e82
HFS
516}
517
8d1c802b 518static void rt6_probe(struct fib6_info *rt)
27097255 519{
990edb42 520 struct __rt6_probe_work *work;
5e670d84 521 const struct in6_addr *nh_gw;
f2c31e32 522 struct neighbour *neigh;
5e670d84
DA
523 struct net_device *dev;
524
27097255
YH
525 /*
526 * Okay, this does not seem to be appropriate
527 * for now, however, we need to check if it
528 * is really so; aka Router Reachability Probing.
529 *
530 * Router Reachability Probe MUST be rate-limited
531 * to no more than one per minute.
532 */
93c2fb25 533 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 534 return;
5e670d84
DA
535
536 nh_gw = &rt->fib6_nh.nh_gw;
537 dev = rt->fib6_nh.nh_dev;
2152caea 538 rcu_read_lock_bh();
5e670d84 539 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 540 if (neigh) {
dcd1f572
DA
541 struct inet6_dev *idev;
542
8d6c31bf
MKL
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
dcd1f572 546 idev = __in6_dev_get(dev);
990edb42 547 work = NULL;
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
990edb42
MKL
557 } else {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 559 }
990edb42
MKL
560
561 if (work) {
562 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
990edb42
MKL
566 schedule_work(&work->work);
567 }
568
8d6c31bf 569out:
2152caea 570 rcu_read_unlock_bh();
27097255
YH
571}
572#else
8d1c802b 573static inline void rt6_probe(struct fib6_info *rt)
27097255 574{
27097255
YH
575}
576#endif
577
1da177e4 578/*
554cfb7e 579 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 580 */
8d1c802b 581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 582{
5e670d84
DA
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
161980f4 585 if (!oif || dev->ifindex == oif)
554cfb7e 586 return 2;
161980f4 587 return 0;
554cfb7e 588}
1da177e4 589
8d1c802b 590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 591{
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 593 struct neighbour *neigh;
f2c31e32 594
93c2fb25
DA
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 597 return RT6_NUD_SUCCEED;
145a3621
YH
598
599 rcu_read_lock_bh();
5e670d84
DA
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
145a3621
YH
602 if (neigh) {
603 read_lock(&neigh->lock);
554cfb7e 604 if (neigh->nud_state & NUD_VALID)
afc154e9 605 ret = RT6_NUD_SUCCEED;
398bcbeb 606#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 607 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 608 ret = RT6_NUD_SUCCEED;
7e980569
JB
609 else
610 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 611#endif
145a3621 612 read_unlock(&neigh->lock);
afc154e9
HFS
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 616 }
145a3621
YH
617 rcu_read_unlock_bh();
618
a5a81f0b 619 return ret;
1da177e4
LT
620}
621
8d1c802b 622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 623{
a5a81f0b 624 int m;
1ab1457c 625
4d0c5911 626 m = rt6_check_dev(rt, oif);
77d16f45 627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 628 return RT6_NUD_FAIL_HARD;
ebacaaa0 629#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 631#endif
afc154e9
HFS
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
554cfb7e
YH
637 return m;
638}
639
dcd1f572
DA
640/* called with rc_read_lock held */
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
8d1c802b
DA
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
afc154e9 657 bool *do_rr)
554cfb7e 658{
f11e6659 659 int m;
afc154e9 660 bool match_do_rr = false;
35103d11 661
5e670d84 662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
663 goto out;
664
dcd1f572 665 if (fib6_ignore_linkdown(rt) &&
5e670d84 666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 668 goto out;
f11e6659 669
14895687 670 if (fib6_check_expired(rt))
f11e6659
DM
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
7e980569 674 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
675 match_do_rr = true;
676 m = 0; /* lowest valid score */
7e980569 677 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 678 goto out;
afc154e9
HFS
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
f11e6659 683
7e980569 684 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 685 if (m > *mpri) {
afc154e9 686 *do_rr = match_do_rr;
f11e6659
DM
687 *mpri = m;
688 match = rt;
f11e6659 689 }
f11e6659
DM
690out:
691 return match;
692}
693
8d1c802b
DA
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
8d1c802b 700 struct fib6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf 704 cont = NULL;
8fb11a9a 705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 706 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
66f5d6ce 714 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 715 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 716 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
717 cont = rt;
718 break;
719 }
720
afc154e9 721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
722 }
723
724 if (match || !cont)
725 return match;
726
8fb11a9a 727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 729
f11e6659
DM
730 return match;
731}
1da177e4 732
8d1c802b 733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 734 int oif, int strict)
f11e6659 735{
8d1c802b
DA
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
afc154e9 738 bool do_rr = false;
17ecf590 739 int key_plen;
1da177e4 740
421842ed
DA
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
8d1040e8 743
66f5d6ce 744 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 745 if (!rt0)
66f5d6ce 746 rt0 = leaf;
1da177e4 747
17ecf590
WW
748 /* Double check to make sure fn is not an intermediate node
749 * and fn->leaf does not points to its child's leaf
750 * (This might happen if all routes under fn are deleted from
751 * the tree and fib6_repair_tree() is called on the node.)
752 */
93c2fb25 753 key_plen = rt0->fib6_dst.plen;
17ecf590 754#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
17ecf590
WW
757#endif
758 if (fn->fn_bit != key_plen)
421842ed 759 return net->ipv6.fib6_null_entry;
17ecf590 760
93c2fb25 761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
8fb11a9a 765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
93c2fb25 768 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 769 next = leaf;
f11e6659 770
66f5d6ce 771 if (next != rt0) {
93c2fb25 772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 773 /* make sure next is not being deleted from the tree */
93c2fb25 774 if (next->fib6_node)
66f5d6ce 775 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 }
1da177e4 778 }
1da177e4 779
421842ed 780 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
781}
782
8d1c802b 783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 784{
93c2fb25 785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
8d1c802b 797 struct fib6_info *rt;
70ceb4f5
YH
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567 834 if (rinfo->prefix_len == 0)
afb1d4b5 835 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
afb1d4b5 841 ip6_del_rt(net, rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5 848 else if (rt)
93c2fb25
DA
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
851
852 if (rt) {
1716a961 853 if (!addrconf_finite_timeout(lifetime))
14895687 854 fib6_clean_expires(rt);
1716a961 855 else
14895687 856 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 857
93531c67 858 fib6_info_release(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
ae90d867
DA
864/*
865 * Misc support functions
866 */
867
868/* called with rcu_lock held */
8d1c802b 869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 870{
5e670d84 871 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 872
93c2fb25 873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
874 /* for copies of local routes, dst->dev needs to be the
875 * device if it is a master device, the master device if
876 * device is enslaved, and the loopback as the default
877 */
878 if (netif_is_l3_slave(dev) &&
93c2fb25 879 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883 /* last case is netif_is_l3_master(dev) is true in which
884 * case we want dev returned to be dev
885 */
886 }
887
888 return dev;
889}
890
6edb3c96
DA
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
8d1c802b 911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
8d1c802b 925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
8d1c802b 947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 948{
3b6761d1
DA
949 rt->dst.flags |= fib6_info_dst_flags(ort);
950
93c2fb25 951 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
952 ip6_rt_init_dst_reject(rt, ort);
953 return;
954 }
955
956 rt->dst.error = 0;
957 rt->dst.output = ip6_output;
958
d23c4b63 959 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 960 rt->dst.input = ip6_input;
93c2fb25 961 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
962 rt->dst.input = ip6_mc_input;
963 } else {
964 rt->dst.input = ip6_forward;
965 }
966
967 if (ort->fib6_nh.nh_lwtstate) {
968 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
969 lwtunnel_set_redirect(&rt->dst);
970 }
971
972 rt->dst.lastuse = jiffies;
973}
974
e873e4b9 975/* Caller must already hold reference to @from */
8d1c802b 976static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 977{
ae90d867 978 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 979 rcu_assign_pointer(rt->from, from);
d4ead6b3 980 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
ae90d867
DA
981}
982
e873e4b9 983/* Caller must already hold reference to @ort */
8d1c802b 984static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 985{
dcd1f572
DA
986 struct net_device *dev = fib6_info_nh_dev(ort);
987
6edb3c96
DA
988 ip6_rt_init_dst(rt, ort);
989
93c2fb25 990 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 991 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 992 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 993 rt->rt6i_flags = ort->fib6_flags;
ae90d867 994 rt6_set_from(rt, ort);
ae90d867 995#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 996 rt->rt6i_src = ort->fib6_src;
ae90d867 997#endif
93c2fb25 998 rt->rt6i_prefsrc = ort->fib6_prefsrc;
5e670d84 999 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
ae90d867
DA
1000}
1001
a3c00e46
MKL
1002static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1003 struct in6_addr *saddr)
1004{
66f5d6ce 1005 struct fib6_node *pn, *sn;
a3c00e46
MKL
1006 while (1) {
1007 if (fn->fn_flags & RTN_TL_ROOT)
1008 return NULL;
66f5d6ce
WW
1009 pn = rcu_dereference(fn->parent);
1010 sn = FIB6_SUBTREE(pn);
1011 if (sn && sn != fn)
6454743b 1012 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1013 else
1014 fn = pn;
1015 if (fn->fn_flags & RTN_RTINFO)
1016 return fn;
1017 }
1018}
c71099ac 1019
d3843fe5
WW
1020static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1021 bool null_fallback)
1022{
1023 struct rt6_info *rt = *prt;
1024
1025 if (dst_hold_safe(&rt->dst))
1026 return true;
1027 if (null_fallback) {
1028 rt = net->ipv6.ip6_null_entry;
1029 dst_hold(&rt->dst);
1030 } else {
1031 rt = NULL;
1032 }
1033 *prt = rt;
1034 return false;
1035}
1036
dec9b0e2 1037/* called with rcu_lock held */
8d1c802b 1038static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1039{
3b6761d1 1040 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1041 struct net_device *dev = rt->fib6_nh.nh_dev;
1042 struct rt6_info *nrt;
1043
e873e4b9
WW
1044 if (!fib6_info_hold_safe(rt))
1045 return NULL;
1046
93531c67 1047 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1048 if (nrt)
1049 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1050 else
1051 fib6_info_release(rt);
dec9b0e2
DA
1052
1053 return nrt;
1054}
1055
8ed67789
DL
1056static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1057 struct fib6_table *table,
b75cc8f9
DA
1058 struct flowi6 *fl6,
1059 const struct sk_buff *skb,
1060 int flags)
1da177e4 1061{
8d1c802b 1062 struct fib6_info *f6i;
1da177e4 1063 struct fib6_node *fn;
23fb93a4 1064 struct rt6_info *rt;
1da177e4 1065
b6cdbc85
DA
1066 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1067 flags &= ~RT6_LOOKUP_F_IFACE;
1068
66f5d6ce 1069 rcu_read_lock();
6454743b 1070 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1071restart:
23fb93a4
DA
1072 f6i = rcu_dereference(fn->leaf);
1073 if (!f6i) {
1074 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1075 } else {
23fb93a4 1076 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1077 fl6->flowi6_oif, flags);
93c2fb25 1078 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1079 f6i = fib6_multipath_select(net, f6i, fl6,
1080 fl6->flowi6_oif, skb,
1081 flags);
66f5d6ce 1082 }
23fb93a4 1083 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1084 fn = fib6_backtrack(fn, &fl6->saddr);
1085 if (fn)
1086 goto restart;
1087 }
2b760fcf 1088
d4bea421 1089 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1090
2b760fcf 1091 /* Search through exception table */
23fb93a4
DA
1092 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1093 if (rt) {
dec9b0e2
DA
1094 if (ip6_hold_safe(net, &rt, true))
1095 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1096 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1097 rt = net->ipv6.ip6_null_entry;
1098 dst_hold(&rt->dst);
23fb93a4
DA
1099 } else {
1100 rt = ip6_create_rt_rcu(f6i);
1101 if (!rt) {
1102 rt = net->ipv6.ip6_null_entry;
1103 dst_hold(&rt->dst);
1104 }
dec9b0e2 1105 }
b811580d 1106
66f5d6ce 1107 rcu_read_unlock();
b811580d 1108
c71099ac 1109 return rt;
c71099ac
TG
1110}
1111
67ba4152 1112struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1113 const struct sk_buff *skb, int flags)
ea6e574e 1114{
b75cc8f9 1115 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1116}
1117EXPORT_SYMBOL_GPL(ip6_route_lookup);
1118
9acd9f3a 1119struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1120 const struct in6_addr *saddr, int oif,
1121 const struct sk_buff *skb, int strict)
c71099ac 1122{
4c9483b2
DM
1123 struct flowi6 fl6 = {
1124 .flowi6_oif = oif,
1125 .daddr = *daddr,
c71099ac
TG
1126 };
1127 struct dst_entry *dst;
77d16f45 1128 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1129
adaa70bb 1130 if (saddr) {
4c9483b2 1131 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1132 flags |= RT6_LOOKUP_F_HAS_SADDR;
1133 }
1134
b75cc8f9 1135 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1136 if (dst->error == 0)
1137 return (struct rt6_info *) dst;
1138
1139 dst_release(dst);
1140
1da177e4
LT
1141 return NULL;
1142}
7159039a
YH
1143EXPORT_SYMBOL(rt6_lookup);
1144
c71099ac 1145/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1146 * It takes new route entry, the addition fails by any reason the
1147 * route is released.
1148 * Caller must hold dst before calling it.
1da177e4
LT
1149 */
1150
8d1c802b 1151static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1152 struct netlink_ext_ack *extack)
1da177e4
LT
1153{
1154 int err;
c71099ac 1155 struct fib6_table *table;
1da177e4 1156
93c2fb25 1157 table = rt->fib6_table;
66f5d6ce 1158 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1159 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1160 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1161
1162 return err;
1163}
1164
8d1c802b 1165int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1166{
afb1d4b5 1167 struct nl_info info = { .nl_net = net, };
e715b6d3 1168
d4ead6b3 1169 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1170}
1171
8d1c802b 1172static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1173 const struct in6_addr *daddr,
1174 const struct in6_addr *saddr)
1da177e4 1175{
4832c30d 1176 struct net_device *dev;
1da177e4
LT
1177 struct rt6_info *rt;
1178
1179 /*
1180 * Clone the route.
1181 */
1182
e873e4b9
WW
1183 if (!fib6_info_hold_safe(ort))
1184 return NULL;
1185
4832c30d 1186 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1187 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1188 if (!rt) {
1189 fib6_info_release(ort);
83a09abd 1190 return NULL;
e873e4b9 1191 }
83a09abd
MKL
1192
1193 ip6_rt_copy_init(rt, ort);
1194 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1195 rt->dst.flags |= DST_HOST;
1196 rt->rt6i_dst.addr = *daddr;
1197 rt->rt6i_dst.plen = 128;
1da177e4 1198
83a09abd 1199 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1200 if (ort->fib6_dst.plen != 128 &&
1201 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1202 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1203#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1204 if (rt->rt6i_src.plen && saddr) {
1205 rt->rt6i_src.addr = *saddr;
1206 rt->rt6i_src.plen = 128;
8b9df265 1207 }
83a09abd 1208#endif
95a9a5ba 1209 }
1da177e4 1210
95a9a5ba
YH
1211 return rt;
1212}
1da177e4 1213
8d1c802b 1214static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1215{
3b6761d1 1216 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1217 struct net_device *dev;
d52d3997
MKL
1218 struct rt6_info *pcpu_rt;
1219
e873e4b9
WW
1220 if (!fib6_info_hold_safe(rt))
1221 return NULL;
1222
4832c30d
DA
1223 rcu_read_lock();
1224 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1225 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1226 rcu_read_unlock();
e873e4b9
WW
1227 if (!pcpu_rt) {
1228 fib6_info_release(rt);
d52d3997 1229 return NULL;
e873e4b9 1230 }
d52d3997 1231 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1232 pcpu_rt->rt6i_flags |= RTF_PCPU;
1233 return pcpu_rt;
1234}
1235
66f5d6ce 1236/* It should be called with rcu_read_lock() acquired */
8d1c802b 1237static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1238{
a73e4195 1239 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1240
1241 p = this_cpu_ptr(rt->rt6i_pcpu);
1242 pcpu_rt = *p;
1243
d4ead6b3
DA
1244 if (pcpu_rt)
1245 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1246
a73e4195
MKL
1247 return pcpu_rt;
1248}
1249
afb1d4b5 1250static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1251 struct fib6_info *rt)
a73e4195
MKL
1252{
1253 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1254
1255 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1256 if (!pcpu_rt) {
9c7370a1
MKL
1257 dst_hold(&net->ipv6.ip6_null_entry->dst);
1258 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1259 }
1260
a94b9367
WW
1261 dst_hold(&pcpu_rt->dst);
1262 p = this_cpu_ptr(rt->rt6i_pcpu);
1263 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1264 BUG_ON(prev);
a94b9367 1265
d52d3997
MKL
1266 return pcpu_rt;
1267}
1268
35732d01
WW
1269/* exception hash table implementation
1270 */
1271static DEFINE_SPINLOCK(rt6_exception_lock);
1272
1273/* Remove rt6_ex from hash table and free the memory
1274 * Caller must hold rt6_exception_lock
1275 */
1276static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1277 struct rt6_exception *rt6_ex)
1278{
b2427e67 1279 struct net *net;
81eb8447 1280
35732d01
WW
1281 if (!bucket || !rt6_ex)
1282 return;
b2427e67
CIK
1283
1284 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1285 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1286 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1287 kfree_rcu(rt6_ex, rcu);
1288 WARN_ON_ONCE(!bucket->depth);
1289 bucket->depth--;
81eb8447 1290 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1291}
1292
1293/* Remove oldest rt6_ex in bucket and free the memory
1294 * Caller must hold rt6_exception_lock
1295 */
1296static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1297{
1298 struct rt6_exception *rt6_ex, *oldest = NULL;
1299
1300 if (!bucket)
1301 return;
1302
1303 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1304 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1305 oldest = rt6_ex;
1306 }
1307 rt6_remove_exception(bucket, oldest);
1308}
1309
1310static u32 rt6_exception_hash(const struct in6_addr *dst,
1311 const struct in6_addr *src)
1312{
1313 static u32 seed __read_mostly;
1314 u32 val;
1315
1316 net_get_random_once(&seed, sizeof(seed));
1317 val = jhash(dst, sizeof(*dst), seed);
1318
1319#ifdef CONFIG_IPV6_SUBTREES
1320 if (src)
1321 val = jhash(src, sizeof(*src), val);
1322#endif
1323 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1324}
1325
1326/* Helper function to find the cached rt in the hash table
1327 * and update bucket pointer to point to the bucket for this
1328 * (daddr, saddr) pair
1329 * Caller must hold rt6_exception_lock
1330 */
1331static struct rt6_exception *
1332__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1333 const struct in6_addr *daddr,
1334 const struct in6_addr *saddr)
1335{
1336 struct rt6_exception *rt6_ex;
1337 u32 hval;
1338
1339 if (!(*bucket) || !daddr)
1340 return NULL;
1341
1342 hval = rt6_exception_hash(daddr, saddr);
1343 *bucket += hval;
1344
1345 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1346 struct rt6_info *rt6 = rt6_ex->rt6i;
1347 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1348
1349#ifdef CONFIG_IPV6_SUBTREES
1350 if (matched && saddr)
1351 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1352#endif
1353 if (matched)
1354 return rt6_ex;
1355 }
1356 return NULL;
1357}
1358
1359/* Helper function to find the cached rt in the hash table
1360 * and update bucket pointer to point to the bucket for this
1361 * (daddr, saddr) pair
1362 * Caller must hold rcu_read_lock()
1363 */
1364static struct rt6_exception *
1365__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1366 const struct in6_addr *daddr,
1367 const struct in6_addr *saddr)
1368{
1369 struct rt6_exception *rt6_ex;
1370 u32 hval;
1371
1372 WARN_ON_ONCE(!rcu_read_lock_held());
1373
1374 if (!(*bucket) || !daddr)
1375 return NULL;
1376
1377 hval = rt6_exception_hash(daddr, saddr);
1378 *bucket += hval;
1379
1380 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1381 struct rt6_info *rt6 = rt6_ex->rt6i;
1382 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1383
1384#ifdef CONFIG_IPV6_SUBTREES
1385 if (matched && saddr)
1386 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1387#endif
1388 if (matched)
1389 return rt6_ex;
1390 }
1391 return NULL;
1392}
1393
8d1c802b 1394static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1395{
1396 unsigned int mtu;
1397
dcd1f572
DA
1398 if (rt->fib6_pmtu) {
1399 mtu = rt->fib6_pmtu;
1400 } else {
1401 struct net_device *dev = fib6_info_nh_dev(rt);
1402 struct inet6_dev *idev;
1403
1404 rcu_read_lock();
1405 idev = __in6_dev_get(dev);
1406 mtu = idev->cnf.mtu6;
1407 rcu_read_unlock();
1408 }
1409
d4ead6b3
DA
1410 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1411
1412 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1413}
1414
35732d01 1415static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1416 struct fib6_info *ort)
35732d01 1417{
5e670d84 1418 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1419 struct rt6_exception_bucket *bucket;
1420 struct in6_addr *src_key = NULL;
1421 struct rt6_exception *rt6_ex;
1422 int err = 0;
1423
35732d01
WW
1424 spin_lock_bh(&rt6_exception_lock);
1425
1426 if (ort->exception_bucket_flushed) {
1427 err = -EINVAL;
1428 goto out;
1429 }
1430
1431 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1432 lockdep_is_held(&rt6_exception_lock));
1433 if (!bucket) {
1434 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1435 GFP_ATOMIC);
1436 if (!bucket) {
1437 err = -ENOMEM;
1438 goto out;
1439 }
1440 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1441 }
1442
1443#ifdef CONFIG_IPV6_SUBTREES
1444 /* rt6i_src.plen != 0 indicates ort is in subtree
1445 * and exception table is indexed by a hash of
1446 * both rt6i_dst and rt6i_src.
1447 * Otherwise, the exception table is indexed by
1448 * a hash of only rt6i_dst.
1449 */
93c2fb25 1450 if (ort->fib6_src.plen)
35732d01
WW
1451 src_key = &nrt->rt6i_src.addr;
1452#endif
60006a48
WW
1453
1454 /* Update rt6i_prefsrc as it could be changed
1455 * in rt6_remove_prefsrc()
1456 */
93c2fb25 1457 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1458 /* rt6_mtu_change() might lower mtu on ort.
1459 * Only insert this exception route if its mtu
1460 * is less than ort's mtu value.
1461 */
d4ead6b3 1462 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1463 err = -EINVAL;
1464 goto out;
1465 }
60006a48 1466
35732d01
WW
1467 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1468 src_key);
1469 if (rt6_ex)
1470 rt6_remove_exception(bucket, rt6_ex);
1471
1472 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1473 if (!rt6_ex) {
1474 err = -ENOMEM;
1475 goto out;
1476 }
1477 rt6_ex->rt6i = nrt;
1478 rt6_ex->stamp = jiffies;
35732d01
WW
1479 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1480 bucket->depth++;
81eb8447 1481 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1482
1483 if (bucket->depth > FIB6_MAX_DEPTH)
1484 rt6_exception_remove_oldest(bucket);
1485
1486out:
1487 spin_unlock_bh(&rt6_exception_lock);
1488
1489 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1490 if (!err) {
93c2fb25 1491 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1492 fib6_update_sernum(net, ort);
93c2fb25 1493 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1494 fib6_force_start_gc(net);
1495 }
35732d01
WW
1496
1497 return err;
1498}
1499
8d1c802b 1500void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1501{
1502 struct rt6_exception_bucket *bucket;
1503 struct rt6_exception *rt6_ex;
1504 struct hlist_node *tmp;
1505 int i;
1506
1507 spin_lock_bh(&rt6_exception_lock);
1508 /* Prevent rt6_insert_exception() to recreate the bucket list */
1509 rt->exception_bucket_flushed = 1;
1510
1511 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1512 lockdep_is_held(&rt6_exception_lock));
1513 if (!bucket)
1514 goto out;
1515
1516 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1517 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1518 rt6_remove_exception(bucket, rt6_ex);
1519 WARN_ON_ONCE(bucket->depth);
1520 bucket++;
1521 }
1522
1523out:
1524 spin_unlock_bh(&rt6_exception_lock);
1525}
1526
1527/* Find cached rt in the hash table inside passed in rt
1528 * Caller has to hold rcu_read_lock()
1529 */
8d1c802b 1530static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1531 struct in6_addr *daddr,
1532 struct in6_addr *saddr)
1533{
1534 struct rt6_exception_bucket *bucket;
1535 struct in6_addr *src_key = NULL;
1536 struct rt6_exception *rt6_ex;
1537 struct rt6_info *res = NULL;
1538
1539 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1540
1541#ifdef CONFIG_IPV6_SUBTREES
1542 /* rt6i_src.plen != 0 indicates rt is in subtree
1543 * and exception table is indexed by a hash of
1544 * both rt6i_dst and rt6i_src.
1545 * Otherwise, the exception table is indexed by
1546 * a hash of only rt6i_dst.
1547 */
93c2fb25 1548 if (rt->fib6_src.plen)
35732d01
WW
1549 src_key = saddr;
1550#endif
1551 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1552
1553 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1554 res = rt6_ex->rt6i;
1555
1556 return res;
1557}
1558
1559/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1560static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1561{
35732d01
WW
1562 struct rt6_exception_bucket *bucket;
1563 struct in6_addr *src_key = NULL;
1564 struct rt6_exception *rt6_ex;
8a14e46f 1565 struct fib6_info *from;
35732d01
WW
1566 int err;
1567
091311de 1568 from = rcu_dereference(rt->from);
35732d01 1569 if (!from ||
442d713b 1570 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1571 return -EINVAL;
1572
1573 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1574 return -ENOENT;
1575
1576 spin_lock_bh(&rt6_exception_lock);
1577 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1578 lockdep_is_held(&rt6_exception_lock));
1579#ifdef CONFIG_IPV6_SUBTREES
1580 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1581 * and exception table is indexed by a hash of
1582 * both rt6i_dst and rt6i_src.
1583 * Otherwise, the exception table is indexed by
1584 * a hash of only rt6i_dst.
1585 */
93c2fb25 1586 if (from->fib6_src.plen)
35732d01
WW
1587 src_key = &rt->rt6i_src.addr;
1588#endif
1589 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1590 &rt->rt6i_dst.addr,
1591 src_key);
1592 if (rt6_ex) {
1593 rt6_remove_exception(bucket, rt6_ex);
1594 err = 0;
1595 } else {
1596 err = -ENOENT;
1597 }
1598
1599 spin_unlock_bh(&rt6_exception_lock);
1600 return err;
1601}
1602
1603/* Find rt6_ex which contains the passed in rt cache and
1604 * refresh its stamp
1605 */
1606static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1607{
35732d01 1608 struct rt6_exception_bucket *bucket;
8d1c802b 1609 struct fib6_info *from = rt->from;
35732d01
WW
1610 struct in6_addr *src_key = NULL;
1611 struct rt6_exception *rt6_ex;
1612
1613 if (!from ||
442d713b 1614 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1615 return;
1616
1617 rcu_read_lock();
1618 bucket = rcu_dereference(from->rt6i_exception_bucket);
1619
1620#ifdef CONFIG_IPV6_SUBTREES
1621 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1622 * and exception table is indexed by a hash of
1623 * both rt6i_dst and rt6i_src.
1624 * Otherwise, the exception table is indexed by
1625 * a hash of only rt6i_dst.
1626 */
93c2fb25 1627 if (from->fib6_src.plen)
35732d01
WW
1628 src_key = &rt->rt6i_src.addr;
1629#endif
1630 rt6_ex = __rt6_find_exception_rcu(&bucket,
1631 &rt->rt6i_dst.addr,
1632 src_key);
1633 if (rt6_ex)
1634 rt6_ex->stamp = jiffies;
1635
1636 rcu_read_unlock();
1637}
1638
8d1c802b 1639static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1640{
1641 struct rt6_exception_bucket *bucket;
1642 struct rt6_exception *rt6_ex;
1643 int i;
1644
1645 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1646 lockdep_is_held(&rt6_exception_lock));
1647
1648 if (bucket) {
1649 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1650 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1651 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1652 }
1653 bucket++;
1654 }
1655 }
1656}
1657
e9fa1495
SB
1658static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1659 struct rt6_info *rt, int mtu)
1660{
1661 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1662 * lowest MTU in the path: always allow updating the route PMTU to
1663 * reflect PMTU decreases.
1664 *
1665 * If the new MTU is higher, and the route PMTU is equal to the local
1666 * MTU, this means the old MTU is the lowest in the path, so allow
1667 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1668 * handle this.
1669 */
1670
1671 if (dst_mtu(&rt->dst) >= mtu)
1672 return true;
1673
1674 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1675 return true;
1676
1677 return false;
1678}
1679
1680static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1681 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1682{
1683 struct rt6_exception_bucket *bucket;
1684 struct rt6_exception *rt6_ex;
1685 int i;
1686
1687 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1688 lockdep_is_held(&rt6_exception_lock));
1689
e9fa1495
SB
1690 if (!bucket)
1691 return;
1692
1693 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1694 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1695 struct rt6_info *entry = rt6_ex->rt6i;
1696
1697 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1698 * route), the metrics of its rt->from have already
e9fa1495
SB
1699 * been updated.
1700 */
d4ead6b3 1701 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1702 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1703 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1704 }
e9fa1495 1705 bucket++;
f5bbe7ee
WW
1706 }
1707}
1708
b16cb459
WW
1709#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1710
8d1c802b 1711static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1712 struct in6_addr *gateway)
1713{
1714 struct rt6_exception_bucket *bucket;
1715 struct rt6_exception *rt6_ex;
1716 struct hlist_node *tmp;
1717 int i;
1718
1719 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1720 return;
1721
1722 spin_lock_bh(&rt6_exception_lock);
1723 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1724 lockdep_is_held(&rt6_exception_lock));
1725
1726 if (bucket) {
1727 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1728 hlist_for_each_entry_safe(rt6_ex, tmp,
1729 &bucket->chain, hlist) {
1730 struct rt6_info *entry = rt6_ex->rt6i;
1731
1732 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1733 RTF_CACHE_GATEWAY &&
1734 ipv6_addr_equal(gateway,
1735 &entry->rt6i_gateway)) {
1736 rt6_remove_exception(bucket, rt6_ex);
1737 }
1738 }
1739 bucket++;
1740 }
1741 }
1742
1743 spin_unlock_bh(&rt6_exception_lock);
1744}
1745
c757faa8
WW
1746static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1747 struct rt6_exception *rt6_ex,
1748 struct fib6_gc_args *gc_args,
1749 unsigned long now)
1750{
1751 struct rt6_info *rt = rt6_ex->rt6i;
1752
1859bac0
PA
1753 /* we are pruning and obsoleting aged-out and non gateway exceptions
1754 * even if others have still references to them, so that on next
1755 * dst_check() such references can be dropped.
1756 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1757 * expired, independently from their aging, as per RFC 8201 section 4
1758 */
31afeb42
WW
1759 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1760 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1761 RT6_TRACE("aging clone %p\n", rt);
1762 rt6_remove_exception(bucket, rt6_ex);
1763 return;
1764 }
1765 } else if (time_after(jiffies, rt->dst.expires)) {
1766 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1767 rt6_remove_exception(bucket, rt6_ex);
1768 return;
31afeb42
WW
1769 }
1770
1771 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1772 struct neighbour *neigh;
1773 __u8 neigh_flags = 0;
1774
1bfa26ff
ED
1775 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1776 if (neigh)
c757faa8 1777 neigh_flags = neigh->flags;
1bfa26ff 1778
c757faa8
WW
1779 if (!(neigh_flags & NTF_ROUTER)) {
1780 RT6_TRACE("purging route %p via non-router but gateway\n",
1781 rt);
1782 rt6_remove_exception(bucket, rt6_ex);
1783 return;
1784 }
1785 }
31afeb42 1786
c757faa8
WW
1787 gc_args->more++;
1788}
1789
8d1c802b 1790void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1791 struct fib6_gc_args *gc_args,
1792 unsigned long now)
1793{
1794 struct rt6_exception_bucket *bucket;
1795 struct rt6_exception *rt6_ex;
1796 struct hlist_node *tmp;
1797 int i;
1798
1799 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1800 return;
1801
1bfa26ff
ED
1802 rcu_read_lock_bh();
1803 spin_lock(&rt6_exception_lock);
c757faa8
WW
1804 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1805 lockdep_is_held(&rt6_exception_lock));
1806
1807 if (bucket) {
1808 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1809 hlist_for_each_entry_safe(rt6_ex, tmp,
1810 &bucket->chain, hlist) {
1811 rt6_age_examine_exception(bucket, rt6_ex,
1812 gc_args, now);
1813 }
1814 bucket++;
1815 }
1816 }
1bfa26ff
ED
1817 spin_unlock(&rt6_exception_lock);
1818 rcu_read_unlock_bh();
c757faa8
WW
1819}
1820
1d053da9
DA
1821/* must be called with rcu lock held */
1822struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1823 int oif, struct flowi6 *fl6, int strict)
1da177e4 1824{
367efcb9 1825 struct fib6_node *fn, *saved_fn;
8d1c802b 1826 struct fib6_info *f6i;
1da177e4 1827
6454743b 1828 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1829 saved_fn = fn;
1da177e4 1830
ca254490
DA
1831 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1832 oif = 0;
1833
a3c00e46 1834redo_rt6_select:
23fb93a4 1835 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1836 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1837 fn = fib6_backtrack(fn, &fl6->saddr);
1838 if (fn)
1839 goto redo_rt6_select;
367efcb9
MKL
1840 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1841 /* also consider unreachable route */
1842 strict &= ~RT6_LOOKUP_F_REACHABLE;
1843 fn = saved_fn;
1844 goto redo_rt6_select;
367efcb9 1845 }
a3c00e46
MKL
1846 }
1847
d4bea421 1848 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1849
1d053da9
DA
1850 return f6i;
1851}
1852
1853struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1854 int oif, struct flowi6 *fl6,
1855 const struct sk_buff *skb, int flags)
1856{
1857 struct fib6_info *f6i;
1858 struct rt6_info *rt;
1859 int strict = 0;
1860
1861 strict |= flags & RT6_LOOKUP_F_IFACE;
1862 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1863 if (net->ipv6.devconf_all->forwarding == 0)
1864 strict |= RT6_LOOKUP_F_REACHABLE;
1865
1866 rcu_read_lock();
1867
1868 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1869 if (f6i->fib6_nsiblings)
1870 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1871
23fb93a4 1872 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1873 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1874 rcu_read_unlock();
d3843fe5 1875 dst_hold(&rt->dst);
d3843fe5 1876 return rt;
23fb93a4
DA
1877 }
1878
1879 /*Search through exception table */
1880 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1881 if (rt) {
d4ead6b3 1882 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1883 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1884
66f5d6ce 1885 rcu_read_unlock();
d52d3997 1886 return rt;
3da59bd9 1887 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1888 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1889 /* Create a RTF_CACHE clone which will not be
1890 * owned by the fib6 tree. It is for the special case where
1891 * the daddr in the skb during the neighbor look-up is different
1892 * from the fl6->daddr used to look-up route here.
1893 */
3da59bd9
MKL
1894 struct rt6_info *uncached_rt;
1895
23fb93a4 1896 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1897
4d85cd0c 1898 rcu_read_unlock();
c71099ac 1899
1cfb71ee
WW
1900 if (uncached_rt) {
1901 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1902 * No need for another dst_hold()
1903 */
8d0b94af 1904 rt6_uncached_list_add(uncached_rt);
81eb8447 1905 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1906 } else {
3da59bd9 1907 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1908 dst_hold(&uncached_rt->dst);
1909 }
b811580d 1910
3da59bd9 1911 return uncached_rt;
d52d3997
MKL
1912 } else {
1913 /* Get a percpu copy */
1914
1915 struct rt6_info *pcpu_rt;
1916
951f788a 1917 local_bh_disable();
23fb93a4 1918 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1919
93531c67
DA
1920 if (!pcpu_rt)
1921 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1922
951f788a
ED
1923 local_bh_enable();
1924 rcu_read_unlock();
d4bea421 1925
d52d3997
MKL
1926 return pcpu_rt;
1927 }
1da177e4 1928}
9ff74384 1929EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1930
b75cc8f9
DA
1931static struct rt6_info *ip6_pol_route_input(struct net *net,
1932 struct fib6_table *table,
1933 struct flowi6 *fl6,
1934 const struct sk_buff *skb,
1935 int flags)
4acad72d 1936{
b75cc8f9 1937 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1938}
1939
d409b847
MB
1940struct dst_entry *ip6_route_input_lookup(struct net *net,
1941 struct net_device *dev,
b75cc8f9
DA
1942 struct flowi6 *fl6,
1943 const struct sk_buff *skb,
1944 int flags)
72331bc0
SL
1945{
1946 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1947 flags |= RT6_LOOKUP_F_IFACE;
1948
b75cc8f9 1949 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1950}
d409b847 1951EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1952
23aebdac 1953static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1954 struct flow_keys *keys,
1955 struct flow_keys *flkeys)
23aebdac
JS
1956{
1957 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1958 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1959 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1960 const struct ipv6hdr *inner_iph;
1961 const struct icmp6hdr *icmph;
1962 struct ipv6hdr _inner_iph;
cea67a2d 1963 struct icmp6hdr _icmph;
23aebdac
JS
1964
1965 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1966 goto out;
1967
cea67a2d
ED
1968 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1969 sizeof(_icmph), &_icmph);
1970 if (!icmph)
1971 goto out;
1972
23aebdac
JS
1973 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1974 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1975 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1976 icmph->icmp6_type != ICMPV6_PARAMPROB)
1977 goto out;
1978
1979 inner_iph = skb_header_pointer(skb,
1980 skb_transport_offset(skb) + sizeof(*icmph),
1981 sizeof(_inner_iph), &_inner_iph);
1982 if (!inner_iph)
1983 goto out;
1984
1985 key_iph = inner_iph;
5e5d6fed 1986 _flkeys = NULL;
23aebdac 1987out:
5e5d6fed
RP
1988 if (_flkeys) {
1989 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1990 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1991 keys->tags.flow_label = _flkeys->tags.flow_label;
1992 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1993 } else {
1994 keys->addrs.v6addrs.src = key_iph->saddr;
1995 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1996 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1997 keys->basic.ip_proto = key_iph->nexthdr;
1998 }
23aebdac
JS
1999}
2000
2001/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2002u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2003 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2004{
2005 struct flow_keys hash_keys;
9a2a537a 2006 u32 mhash;
23aebdac 2007
bbfa047a 2008 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2009 case 0:
2010 memset(&hash_keys, 0, sizeof(hash_keys));
2011 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2012 if (skb) {
2013 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2014 } else {
2015 hash_keys.addrs.v6addrs.src = fl6->saddr;
2016 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2017 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2018 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2019 }
2020 break;
2021 case 1:
2022 if (skb) {
2023 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2024 struct flow_keys keys;
2025
2026 /* short-circuit if we already have L4 hash present */
2027 if (skb->l4_hash)
2028 return skb_get_hash_raw(skb) >> 1;
2029
2030 memset(&hash_keys, 0, sizeof(hash_keys));
2031
2032 if (!flkeys) {
2033 skb_flow_dissect_flow_keys(skb, &keys, flag);
2034 flkeys = &keys;
2035 }
2036 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2037 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2038 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2039 hash_keys.ports.src = flkeys->ports.src;
2040 hash_keys.ports.dst = flkeys->ports.dst;
2041 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2042 } else {
2043 memset(&hash_keys, 0, sizeof(hash_keys));
2044 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2045 hash_keys.addrs.v6addrs.src = fl6->saddr;
2046 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2047 hash_keys.ports.src = fl6->fl6_sport;
2048 hash_keys.ports.dst = fl6->fl6_dport;
2049 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2050 }
2051 break;
23aebdac 2052 }
9a2a537a 2053 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2054
9a2a537a 2055 return mhash >> 1;
23aebdac
JS
2056}
2057
c71099ac
TG
2058void ip6_route_input(struct sk_buff *skb)
2059{
b71d1d42 2060 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2061 struct net *net = dev_net(skb->dev);
adaa70bb 2062 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2063 struct ip_tunnel_info *tun_info;
4c9483b2 2064 struct flowi6 fl6 = {
e0d56fdd 2065 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2066 .daddr = iph->daddr,
2067 .saddr = iph->saddr,
6502ca52 2068 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2069 .flowi6_mark = skb->mark,
2070 .flowi6_proto = iph->nexthdr,
c71099ac 2071 };
5e5d6fed 2072 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2073
904af04d 2074 tun_info = skb_tunnel_info(skb);
46fa062a 2075 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2076 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2077
2078 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2079 flkeys = &_flkeys;
2080
23aebdac 2081 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2082 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2083 skb_dst_drop(skb);
b75cc8f9
DA
2084 skb_dst_set(skb,
2085 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2086}
2087
b75cc8f9
DA
2088static struct rt6_info *ip6_pol_route_output(struct net *net,
2089 struct fib6_table *table,
2090 struct flowi6 *fl6,
2091 const struct sk_buff *skb,
2092 int flags)
1da177e4 2093{
b75cc8f9 2094 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2095}
2096
6f21c96a
PA
2097struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2098 struct flowi6 *fl6, int flags)
c71099ac 2099{
d46a9d67 2100 bool any_src;
c71099ac 2101
4c1feac5
DA
2102 if (rt6_need_strict(&fl6->daddr)) {
2103 struct dst_entry *dst;
2104
2105 dst = l3mdev_link_scope_lookup(net, fl6);
2106 if (dst)
2107 return dst;
2108 }
ca254490 2109
1fb9489b 2110 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2111
d46a9d67 2112 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2113 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2114 (fl6->flowi6_oif && any_src))
77d16f45 2115 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2116
d46a9d67 2117 if (!any_src)
adaa70bb 2118 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2119 else if (sk)
2120 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2121
b75cc8f9 2122 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2123}
6f21c96a 2124EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2125
2774c131 2126struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2127{
5c1e6aa3 2128 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2129 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2130 struct dst_entry *new = NULL;
2131
1dbe3252 2132 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2133 DST_OBSOLETE_DEAD, 0);
14e50e57 2134 if (rt) {
0a1f5962 2135 rt6_info_init(rt);
81eb8447 2136 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2137
0a1f5962 2138 new = &rt->dst;
14e50e57 2139 new->__use = 1;
352e512c 2140 new->input = dst_discard;
ede2059d 2141 new->output = dst_discard_out;
14e50e57 2142
0a1f5962 2143 dst_copy_metrics(new, &ort->dst);
14e50e57 2144
1dbe3252 2145 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2146 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2147 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2148
2149 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2150#ifdef CONFIG_IPV6_SUBTREES
2151 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2152#endif
14e50e57
DM
2153 }
2154
69ead7af
DM
2155 dst_release(dst_orig);
2156 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2157}
14e50e57 2158
1da177e4
LT
2159/*
2160 * Destination cache support functions
2161 */
2162
8d1c802b 2163static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2164{
93531c67
DA
2165 u32 rt_cookie = 0;
2166
8ae86971 2167 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2168 return false;
2169
2170 if (fib6_check_expired(f6i))
2171 return false;
2172
2173 return true;
4b32b5ad
MKL
2174}
2175
a68886a6
DA
2176static struct dst_entry *rt6_check(struct rt6_info *rt,
2177 struct fib6_info *from,
2178 u32 cookie)
3da59bd9 2179{
36143645 2180 u32 rt_cookie = 0;
c5cff856 2181
a68886a6 2182 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2183 rt_cookie != cookie)
3da59bd9
MKL
2184 return NULL;
2185
2186 if (rt6_check_expired(rt))
2187 return NULL;
2188
2189 return &rt->dst;
2190}
2191
a68886a6
DA
2192static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2193 struct fib6_info *from,
2194 u32 cookie)
3da59bd9 2195{
5973fb1e
MKL
2196 if (!__rt6_check_expired(rt) &&
2197 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2198 fib6_check(from, cookie))
3da59bd9
MKL
2199 return &rt->dst;
2200 else
2201 return NULL;
2202}
2203
1da177e4
LT
2204static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2205{
a87b7dc9 2206 struct dst_entry *dst_ret;
a68886a6 2207 struct fib6_info *from;
1da177e4
LT
2208 struct rt6_info *rt;
2209
a87b7dc9
DA
2210 rt = container_of(dst, struct rt6_info, dst);
2211
2212 rcu_read_lock();
1da177e4 2213
6f3118b5
ND
2214 /* All IPV6 dsts are created with ->obsolete set to the value
2215 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2216 * into this function always.
2217 */
e3bc10bd 2218
a68886a6 2219 from = rcu_dereference(rt->from);
4b32b5ad 2220
a68886a6
DA
2221 if (from && (rt->rt6i_flags & RTF_PCPU ||
2222 unlikely(!list_empty(&rt->rt6i_uncached))))
2223 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2224 else
a68886a6 2225 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2226
2227 rcu_read_unlock();
2228
2229 return dst_ret;
1da177e4
LT
2230}
2231
2232static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2233{
2234 struct rt6_info *rt = (struct rt6_info *) dst;
2235
2236 if (rt) {
54c1a859 2237 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2238 rcu_read_lock();
54c1a859 2239 if (rt6_check_expired(rt)) {
93531c67 2240 rt6_remove_exception_rt(rt);
54c1a859
YH
2241 dst = NULL;
2242 }
c3c14da0 2243 rcu_read_unlock();
54c1a859 2244 } else {
1da177e4 2245 dst_release(dst);
54c1a859
YH
2246 dst = NULL;
2247 }
1da177e4 2248 }
54c1a859 2249 return dst;
1da177e4
LT
2250}
2251
2252static void ip6_link_failure(struct sk_buff *skb)
2253{
2254 struct rt6_info *rt;
2255
3ffe533c 2256 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2257
adf30907 2258 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2259 if (rt) {
8a14e46f 2260 rcu_read_lock();
1eb4f758 2261 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2262 if (dst_hold_safe(&rt->dst))
93531c67 2263 rt6_remove_exception_rt(rt);
c5cff856 2264 } else {
a68886a6 2265 struct fib6_info *from;
c5cff856
WW
2266 struct fib6_node *fn;
2267
a68886a6
DA
2268 from = rcu_dereference(rt->from);
2269 if (from) {
2270 fn = rcu_dereference(from->fib6_node);
2271 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2272 fn->fn_sernum = -1;
2273 }
1eb4f758 2274 }
8a14e46f 2275 rcu_read_unlock();
1da177e4
LT
2276 }
2277}
2278
6a3e030f
DA
2279static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2280{
a68886a6
DA
2281 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2282 struct fib6_info *from;
2283
2284 rcu_read_lock();
2285 from = rcu_dereference(rt0->from);
2286 if (from)
2287 rt0->dst.expires = from->expires;
2288 rcu_read_unlock();
2289 }
6a3e030f
DA
2290
2291 dst_set_expires(&rt0->dst, timeout);
2292 rt0->rt6i_flags |= RTF_EXPIRES;
2293}
2294
45e4fd26
MKL
2295static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2296{
2297 struct net *net = dev_net(rt->dst.dev);
2298
d4ead6b3 2299 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2300 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2301 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2302}
2303
0d3f6d29
MKL
2304static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2305{
a68886a6
DA
2306 bool from_set;
2307
2308 rcu_read_lock();
2309 from_set = !!rcu_dereference(rt->from);
2310 rcu_read_unlock();
2311
0d3f6d29 2312 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2313 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2314}
2315
45e4fd26
MKL
2316static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2317 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2318{
0dec879f 2319 const struct in6_addr *daddr, *saddr;
67ba4152 2320 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2321
19bda36c
XL
2322 if (dst_metric_locked(dst, RTAX_MTU))
2323 return;
2324
0dec879f
JA
2325 if (iph) {
2326 daddr = &iph->daddr;
2327 saddr = &iph->saddr;
2328 } else if (sk) {
2329 daddr = &sk->sk_v6_daddr;
2330 saddr = &inet6_sk(sk)->saddr;
2331 } else {
2332 daddr = NULL;
2333 saddr = NULL;
2334 }
2335 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2336 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2337 if (mtu >= dst_mtu(dst))
2338 return;
9d289715 2339
0d3f6d29 2340 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2341 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2342 /* update rt6_ex->stamp for cache */
2343 if (rt6->rt6i_flags & RTF_CACHE)
2344 rt6_update_exception_stamp_rt(rt6);
0dec879f 2345 } else if (daddr) {
a68886a6 2346 struct fib6_info *from;
45e4fd26
MKL
2347 struct rt6_info *nrt6;
2348
4d85cd0c 2349 rcu_read_lock();
a68886a6
DA
2350 from = rcu_dereference(rt6->from);
2351 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2352 if (nrt6) {
2353 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2354 if (rt6_insert_exception(nrt6, from))
2b760fcf 2355 dst_release_immediate(&nrt6->dst);
45e4fd26 2356 }
a68886a6 2357 rcu_read_unlock();
1da177e4
LT
2358 }
2359}
2360
45e4fd26
MKL
2361static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2362 struct sk_buff *skb, u32 mtu)
2363{
2364 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2365}
2366
42ae66c8 2367void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2368 int oif, u32 mark, kuid_t uid)
81aded24
DM
2369{
2370 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2371 struct dst_entry *dst;
2372 struct flowi6 fl6;
2373
2374 memset(&fl6, 0, sizeof(fl6));
2375 fl6.flowi6_oif = oif;
1b3c61dc 2376 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2377 fl6.daddr = iph->daddr;
2378 fl6.saddr = iph->saddr;
6502ca52 2379 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2380 fl6.flowi6_uid = uid;
81aded24
DM
2381
2382 dst = ip6_route_output(net, NULL, &fl6);
2383 if (!dst->error)
45e4fd26 2384 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2385 dst_release(dst);
2386}
2387EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2388
2389void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2390{
33c162a9
MKL
2391 struct dst_entry *dst;
2392
81aded24 2393 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2394 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2395
2396 dst = __sk_dst_get(sk);
2397 if (!dst || !dst->obsolete ||
2398 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2399 return;
2400
2401 bh_lock_sock(sk);
2402 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2403 ip6_datagram_dst_update(sk, false);
2404 bh_unlock_sock(sk);
81aded24
DM
2405}
2406EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2407
7d6850f7
AK
2408void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2409 const struct flowi6 *fl6)
2410{
2411#ifdef CONFIG_IPV6_SUBTREES
2412 struct ipv6_pinfo *np = inet6_sk(sk);
2413#endif
2414
2415 ip6_dst_store(sk, dst,
2416 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2417 &sk->sk_v6_daddr : NULL,
2418#ifdef CONFIG_IPV6_SUBTREES
2419 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2420 &np->saddr :
2421#endif
2422 NULL);
2423}
2424
b55b76b2
DJ
2425/* Handle redirects */
2426struct ip6rd_flowi {
2427 struct flowi6 fl6;
2428 struct in6_addr gateway;
2429};
2430
2431static struct rt6_info *__ip6_route_redirect(struct net *net,
2432 struct fib6_table *table,
2433 struct flowi6 *fl6,
b75cc8f9 2434 const struct sk_buff *skb,
b55b76b2
DJ
2435 int flags)
2436{
2437 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2438 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2439 struct fib6_info *rt;
b55b76b2
DJ
2440 struct fib6_node *fn;
2441
2442 /* Get the "current" route for this destination and
67c408cf 2443 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2444 *
2445 * RFC 4861 specifies that redirects should only be
2446 * accepted if they come from the nexthop to the target.
2447 * Due to the way the routes are chosen, this notion
2448 * is a bit fuzzy and one might need to check all possible
2449 * routes.
2450 */
2451
66f5d6ce 2452 rcu_read_lock();
6454743b 2453 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2454restart:
66f5d6ce 2455 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2456 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2457 continue;
14895687 2458 if (fib6_check_expired(rt))
b55b76b2 2459 continue;
93c2fb25 2460 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2461 break;
93c2fb25 2462 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2463 continue;
5e670d84 2464 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2465 continue;
2b760fcf
WW
2466 /* rt_cache's gateway might be different from its 'parent'
2467 * in the case of an ip redirect.
2468 * So we keep searching in the exception table if the gateway
2469 * is different.
2470 */
5e670d84 2471 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2472 rt_cache = rt6_find_cached_rt(rt,
2473 &fl6->daddr,
2474 &fl6->saddr);
2475 if (rt_cache &&
2476 ipv6_addr_equal(&rdfl->gateway,
2477 &rt_cache->rt6i_gateway)) {
23fb93a4 2478 ret = rt_cache;
2b760fcf
WW
2479 break;
2480 }
b55b76b2 2481 continue;
2b760fcf 2482 }
b55b76b2
DJ
2483 break;
2484 }
2485
2486 if (!rt)
421842ed 2487 rt = net->ipv6.fib6_null_entry;
93c2fb25 2488 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2489 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2490 goto out;
2491 }
2492
421842ed 2493 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2494 fn = fib6_backtrack(fn, &fl6->saddr);
2495 if (fn)
2496 goto restart;
b55b76b2 2497 }
a3c00e46 2498
b0a1ba59 2499out:
23fb93a4 2500 if (ret)
e873e4b9 2501 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2502 else
2503 ret = ip6_create_rt_rcu(rt);
b55b76b2 2504
66f5d6ce 2505 rcu_read_unlock();
b55b76b2 2506
b65f164d 2507 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2508 return ret;
b55b76b2
DJ
2509};
2510
2511static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2512 const struct flowi6 *fl6,
2513 const struct sk_buff *skb,
2514 const struct in6_addr *gateway)
b55b76b2
DJ
2515{
2516 int flags = RT6_LOOKUP_F_HAS_SADDR;
2517 struct ip6rd_flowi rdfl;
2518
2519 rdfl.fl6 = *fl6;
2520 rdfl.gateway = *gateway;
2521
b75cc8f9 2522 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2523 flags, __ip6_route_redirect);
2524}
2525
e2d118a1
LC
2526void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2527 kuid_t uid)
3a5ad2ee
DM
2528{
2529 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2530 struct dst_entry *dst;
2531 struct flowi6 fl6;
2532
2533 memset(&fl6, 0, sizeof(fl6));
e374c618 2534 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2535 fl6.flowi6_oif = oif;
2536 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2537 fl6.daddr = iph->daddr;
2538 fl6.saddr = iph->saddr;
6502ca52 2539 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2540 fl6.flowi6_uid = uid;
3a5ad2ee 2541
b75cc8f9 2542 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2543 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2544 dst_release(dst);
2545}
2546EXPORT_SYMBOL_GPL(ip6_redirect);
2547
c92a59ec
DJ
2548void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2549 u32 mark)
2550{
2551 const struct ipv6hdr *iph = ipv6_hdr(skb);
2552 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2553 struct dst_entry *dst;
2554 struct flowi6 fl6;
2555
2556 memset(&fl6, 0, sizeof(fl6));
e374c618 2557 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2558 fl6.flowi6_oif = oif;
2559 fl6.flowi6_mark = mark;
c92a59ec
DJ
2560 fl6.daddr = msg->dest;
2561 fl6.saddr = iph->daddr;
e2d118a1 2562 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2563
b75cc8f9 2564 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2565 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2566 dst_release(dst);
2567}
2568
3a5ad2ee
DM
2569void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2570{
e2d118a1
LC
2571 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2572 sk->sk_uid);
3a5ad2ee
DM
2573}
2574EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2575
0dbaee3b 2576static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2577{
0dbaee3b
DM
2578 struct net_device *dev = dst->dev;
2579 unsigned int mtu = dst_mtu(dst);
2580 struct net *net = dev_net(dev);
2581
1da177e4
LT
2582 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2583
5578689a
DL
2584 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2585 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2586
2587 /*
1ab1457c
YH
2588 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2589 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2590 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2591 * rely only on pmtu discovery"
2592 */
2593 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2594 mtu = IPV6_MAXPLEN;
2595 return mtu;
2596}
2597
ebb762f2 2598static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2599{
d33e4553 2600 struct inet6_dev *idev;
d4ead6b3 2601 unsigned int mtu;
4b32b5ad
MKL
2602
2603 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2604 if (mtu)
30f78d8e 2605 goto out;
618f9bc7
SK
2606
2607 mtu = IPV6_MIN_MTU;
d33e4553
DM
2608
2609 rcu_read_lock();
2610 idev = __in6_dev_get(dst->dev);
2611 if (idev)
2612 mtu = idev->cnf.mtu6;
2613 rcu_read_unlock();
2614
30f78d8e 2615out:
14972cbd
RP
2616 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2617
2618 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2619}
2620
901731b8
DA
2621/* MTU selection:
2622 * 1. mtu on route is locked - use it
2623 * 2. mtu from nexthop exception
2624 * 3. mtu from egress device
2625 *
2626 * based on ip6_dst_mtu_forward and exception logic of
2627 * rt6_find_cached_rt; called with rcu_read_lock
2628 */
2629u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2630 struct in6_addr *saddr)
2631{
2632 struct rt6_exception_bucket *bucket;
2633 struct rt6_exception *rt6_ex;
2634 struct in6_addr *src_key;
2635 struct inet6_dev *idev;
2636 u32 mtu = 0;
2637
2638 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2639 mtu = f6i->fib6_pmtu;
2640 if (mtu)
2641 goto out;
2642 }
2643
2644 src_key = NULL;
2645#ifdef CONFIG_IPV6_SUBTREES
2646 if (f6i->fib6_src.plen)
2647 src_key = saddr;
2648#endif
2649
2650 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2651 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2652 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2653 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2654
2655 if (likely(!mtu)) {
2656 struct net_device *dev = fib6_info_nh_dev(f6i);
2657
2658 mtu = IPV6_MIN_MTU;
2659 idev = __in6_dev_get(dev);
2660 if (idev && idev->cnf.mtu6 > mtu)
2661 mtu = idev->cnf.mtu6;
2662 }
2663
2664 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2665out:
2666 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2667}
2668
3b00944c 2669struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2670 struct flowi6 *fl6)
1da177e4 2671{
87a11578 2672 struct dst_entry *dst;
1da177e4
LT
2673 struct rt6_info *rt;
2674 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2675 struct net *net = dev_net(dev);
1da177e4 2676
38308473 2677 if (unlikely(!idev))
122bdf67 2678 return ERR_PTR(-ENODEV);
1da177e4 2679
ad706862 2680 rt = ip6_dst_alloc(net, dev, 0);
38308473 2681 if (unlikely(!rt)) {
1da177e4 2682 in6_dev_put(idev);
87a11578 2683 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2684 goto out;
2685 }
2686
8e2ec639 2687 rt->dst.flags |= DST_HOST;
588753f1 2688 rt->dst.input = ip6_input;
8e2ec639 2689 rt->dst.output = ip6_output;
550bab42 2690 rt->rt6i_gateway = fl6->daddr;
87a11578 2691 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2692 rt->rt6i_dst.plen = 128;
2693 rt->rt6i_idev = idev;
14edd87d 2694 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2695
4c981e28 2696 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2697 * do proper release of the net_device
2698 */
2699 rt6_uncached_list_add(rt);
81eb8447 2700 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2701
87a11578
DM
2702 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2703
1da177e4 2704out:
87a11578 2705 return dst;
1da177e4
LT
2706}
2707
569d3645 2708static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2709{
86393e52 2710 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2711 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2712 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2713 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2714 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2715 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2716 int entries;
7019b78e 2717
fc66f95c 2718 entries = dst_entries_get_fast(ops);
49a18d86 2719 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2720 entries <= rt_max_size)
1da177e4
LT
2721 goto out;
2722
6891a346 2723 net->ipv6.ip6_rt_gc_expire++;
14956643 2724 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2725 entries = dst_entries_get_slow(ops);
2726 if (entries < ops->gc_thresh)
7019b78e 2727 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2728out:
7019b78e 2729 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2730 return entries > rt_max_size;
1da177e4
LT
2731}
2732
8d1c802b 2733static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2734 struct fib6_config *cfg)
e715b6d3 2735{
263243d6 2736 struct dst_metrics *p;
e715b6d3 2737
63159f29 2738 if (!cfg->fc_mx)
e715b6d3
FW
2739 return 0;
2740
263243d6
ED
2741 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2742 if (unlikely(!p))
e715b6d3
FW
2743 return -ENOMEM;
2744
263243d6
ED
2745 refcount_set(&p->refcnt, 1);
2746 rt->fib6_metrics = p;
e715b6d3 2747
263243d6 2748 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2749}
1da177e4 2750
8c14586f
DA
2751static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2752 struct fib6_config *cfg,
f4797b33
DA
2753 const struct in6_addr *gw_addr,
2754 u32 tbid, int flags)
8c14586f
DA
2755{
2756 struct flowi6 fl6 = {
2757 .flowi6_oif = cfg->fc_ifindex,
2758 .daddr = *gw_addr,
2759 .saddr = cfg->fc_prefsrc,
2760 };
2761 struct fib6_table *table;
2762 struct rt6_info *rt;
8c14586f 2763
f4797b33 2764 table = fib6_get_table(net, tbid);
8c14586f
DA
2765 if (!table)
2766 return NULL;
2767
2768 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2769 flags |= RT6_LOOKUP_F_HAS_SADDR;
2770
f4797b33 2771 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2772 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2773
2774 /* if table lookup failed, fall back to full lookup */
2775 if (rt == net->ipv6.ip6_null_entry) {
2776 ip6_rt_put(rt);
2777 rt = NULL;
2778 }
2779
2780 return rt;
2781}
2782
fc1e64e1
DA
2783static int ip6_route_check_nh_onlink(struct net *net,
2784 struct fib6_config *cfg,
9fbb704c 2785 const struct net_device *dev,
fc1e64e1
DA
2786 struct netlink_ext_ack *extack)
2787{
44750f84 2788 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2789 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2790 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2791 struct rt6_info *grt;
2792 int err;
2793
2794 err = 0;
2795 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2796 if (grt) {
58e354c0
DA
2797 if (!grt->dst.error &&
2798 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2799 NL_SET_ERR_MSG(extack,
2800 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2801 err = -EINVAL;
2802 }
2803
2804 ip6_rt_put(grt);
2805 }
2806
2807 return err;
2808}
2809
1edce99f
DA
2810static int ip6_route_check_nh(struct net *net,
2811 struct fib6_config *cfg,
2812 struct net_device **_dev,
2813 struct inet6_dev **idev)
2814{
2815 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2816 struct net_device *dev = _dev ? *_dev : NULL;
2817 struct rt6_info *grt = NULL;
2818 int err = -EHOSTUNREACH;
2819
2820 if (cfg->fc_table) {
f4797b33
DA
2821 int flags = RT6_LOOKUP_F_IFACE;
2822
2823 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2824 cfg->fc_table, flags);
1edce99f
DA
2825 if (grt) {
2826 if (grt->rt6i_flags & RTF_GATEWAY ||
2827 (dev && dev != grt->dst.dev)) {
2828 ip6_rt_put(grt);
2829 grt = NULL;
2830 }
2831 }
2832 }
2833
2834 if (!grt)
b75cc8f9 2835 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2836
2837 if (!grt)
2838 goto out;
2839
2840 if (dev) {
2841 if (dev != grt->dst.dev) {
2842 ip6_rt_put(grt);
2843 goto out;
2844 }
2845 } else {
2846 *_dev = dev = grt->dst.dev;
2847 *idev = grt->rt6i_idev;
2848 dev_hold(dev);
2849 in6_dev_hold(grt->rt6i_idev);
2850 }
2851
2852 if (!(grt->rt6i_flags & RTF_GATEWAY))
2853 err = 0;
2854
2855 ip6_rt_put(grt);
2856
2857out:
2858 return err;
2859}
2860
9fbb704c
DA
2861static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2862 struct net_device **_dev, struct inet6_dev **idev,
2863 struct netlink_ext_ack *extack)
2864{
2865 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2866 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2867 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2868 const struct net_device *dev = *_dev;
232378e8 2869 bool need_addr_check = !dev;
9fbb704c
DA
2870 int err = -EINVAL;
2871
2872 /* if gw_addr is local we will fail to detect this in case
2873 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2874 * will return already-added prefix route via interface that
2875 * prefix route was assigned to, which might be non-loopback.
2876 */
232378e8
DA
2877 if (dev &&
2878 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2879 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2880 goto out;
2881 }
2882
2883 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2884 /* IPv6 strictly inhibits using not link-local
2885 * addresses as nexthop address.
2886 * Otherwise, router will not able to send redirects.
2887 * It is very good, but in some (rare!) circumstances
2888 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2889 * some exceptions. --ANK
2890 * We allow IPv4-mapped nexthops to support RFC4798-type
2891 * addressing
2892 */
2893 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2894 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2895 goto out;
2896 }
2897
2898 if (cfg->fc_flags & RTNH_F_ONLINK)
2899 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2900 else
2901 err = ip6_route_check_nh(net, cfg, _dev, idev);
2902
2903 if (err)
2904 goto out;
2905 }
2906
2907 /* reload in case device was changed */
2908 dev = *_dev;
2909
2910 err = -EINVAL;
2911 if (!dev) {
2912 NL_SET_ERR_MSG(extack, "Egress device not specified");
2913 goto out;
2914 } else if (dev->flags & IFF_LOOPBACK) {
2915 NL_SET_ERR_MSG(extack,
2916 "Egress device can not be loopback device for this route");
2917 goto out;
2918 }
232378e8
DA
2919
2920 /* if we did not check gw_addr above, do so now that the
2921 * egress device has been resolved.
2922 */
2923 if (need_addr_check &&
2924 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2925 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2926 goto out;
2927 }
2928
9fbb704c
DA
2929 err = 0;
2930out:
2931 return err;
2932}
2933
8d1c802b 2934static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2935 gfp_t gfp_flags,
333c4301 2936 struct netlink_ext_ack *extack)
1da177e4 2937{
5578689a 2938 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2939 struct fib6_info *rt = NULL;
1da177e4
LT
2940 struct net_device *dev = NULL;
2941 struct inet6_dev *idev = NULL;
c71099ac 2942 struct fib6_table *table;
1da177e4 2943 int addr_type;
8c5b83f0 2944 int err = -EINVAL;
1da177e4 2945
557c44be 2946 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2947 if (cfg->fc_flags & RTF_PCPU) {
2948 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2949 goto out;
d5d531cb 2950 }
557c44be 2951
2ea2352e
WW
2952 /* RTF_CACHE is an internal flag; can not be set by userspace */
2953 if (cfg->fc_flags & RTF_CACHE) {
2954 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2955 goto out;
2956 }
2957
e8478e80
DA
2958 if (cfg->fc_type > RTN_MAX) {
2959 NL_SET_ERR_MSG(extack, "Invalid route type");
2960 goto out;
2961 }
2962
d5d531cb
DA
2963 if (cfg->fc_dst_len > 128) {
2964 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2965 goto out;
2966 }
2967 if (cfg->fc_src_len > 128) {
2968 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2969 goto out;
d5d531cb 2970 }
1da177e4 2971#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2972 if (cfg->fc_src_len) {
2973 NL_SET_ERR_MSG(extack,
2974 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2975 goto out;
d5d531cb 2976 }
1da177e4 2977#endif
86872cb5 2978 if (cfg->fc_ifindex) {
1da177e4 2979 err = -ENODEV;
5578689a 2980 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2981 if (!dev)
2982 goto out;
2983 idev = in6_dev_get(dev);
2984 if (!idev)
2985 goto out;
2986 }
2987
86872cb5
TG
2988 if (cfg->fc_metric == 0)
2989 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2990
fc1e64e1
DA
2991 if (cfg->fc_flags & RTNH_F_ONLINK) {
2992 if (!dev) {
2993 NL_SET_ERR_MSG(extack,
2994 "Nexthop device required for onlink");
2995 err = -ENODEV;
2996 goto out;
2997 }
2998
2999 if (!(dev->flags & IFF_UP)) {
3000 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3001 err = -ENETDOWN;
3002 goto out;
3003 }
3004 }
3005
d71314b4 3006 err = -ENOBUFS;
38308473
DM
3007 if (cfg->fc_nlinfo.nlh &&
3008 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3009 table = fib6_get_table(net, cfg->fc_table);
38308473 3010 if (!table) {
f3213831 3011 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3012 table = fib6_new_table(net, cfg->fc_table);
3013 }
3014 } else {
3015 table = fib6_new_table(net, cfg->fc_table);
3016 }
38308473
DM
3017
3018 if (!table)
c71099ac 3019 goto out;
c71099ac 3020
93531c67
DA
3021 err = -ENOMEM;
3022 rt = fib6_info_alloc(gfp_flags);
3023 if (!rt)
1da177e4 3024 goto out;
93531c67
DA
3025
3026 if (cfg->fc_flags & RTF_ADDRCONF)
3027 rt->dst_nocount = true;
1da177e4 3028
d4ead6b3
DA
3029 err = ip6_convert_metrics(net, rt, cfg);
3030 if (err < 0)
1da177e4 3031 goto out;
1da177e4 3032
1716a961 3033 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3034 fib6_set_expires(rt, jiffies +
1716a961
G
3035 clock_t_to_jiffies(cfg->fc_expires));
3036 else
14895687 3037 fib6_clean_expires(rt);
1da177e4 3038
86872cb5
TG
3039 if (cfg->fc_protocol == RTPROT_UNSPEC)
3040 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3041 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3042
3043 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3044
19e42e45
RP
3045 if (cfg->fc_encap) {
3046 struct lwtunnel_state *lwtstate;
3047
30357d7d 3048 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3049 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3050 &lwtstate, extack);
19e42e45
RP
3051 if (err)
3052 goto out;
5e670d84 3053 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3054 }
3055
93c2fb25
DA
3056 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3057 rt->fib6_dst.plen = cfg->fc_dst_len;
3058 if (rt->fib6_dst.plen == 128)
3b6761d1 3059 rt->dst_host = true;
e5fd387a 3060
1da177e4 3061#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3062 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3063 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3064#endif
3065
93c2fb25 3066 rt->fib6_metric = cfg->fc_metric;
5e670d84 3067 rt->fib6_nh.nh_weight = 1;
1da177e4 3068
e8478e80 3069 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3070
3071 /* We cannot add true routes via loopback here,
3072 they would result in kernel looping; promote them to reject routes
3073 */
86872cb5 3074 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3075 (dev && (dev->flags & IFF_LOOPBACK) &&
3076 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3077 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3078 /* hold loopback dev/idev if we haven't done so. */
5578689a 3079 if (dev != net->loopback_dev) {
1da177e4
LT
3080 if (dev) {
3081 dev_put(dev);
3082 in6_dev_put(idev);
3083 }
5578689a 3084 dev = net->loopback_dev;
1da177e4
LT
3085 dev_hold(dev);
3086 idev = in6_dev_get(dev);
3087 if (!idev) {
3088 err = -ENODEV;
3089 goto out;
3090 }
3091 }
93c2fb25 3092 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3093 goto install_route;
3094 }
3095
86872cb5 3096 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3097 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3098 if (err)
48ed7b26 3099 goto out;
1da177e4 3100
93531c67 3101 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3102 }
3103
3104 err = -ENODEV;
38308473 3105 if (!dev)
1da177e4
LT
3106 goto out;
3107
428604fb
LB
3108 if (idev->cnf.disable_ipv6) {
3109 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3110 err = -EACCES;
3111 goto out;
3112 }
3113
955ec4cb
DA
3114 if (!(dev->flags & IFF_UP)) {
3115 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3116 err = -ENETDOWN;
3117 goto out;
3118 }
3119
c3968a85
DW
3120 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3121 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3122 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3123 err = -EINVAL;
3124 goto out;
3125 }
93c2fb25
DA
3126 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3127 rt->fib6_prefsrc.plen = 128;
c3968a85 3128 } else
93c2fb25 3129 rt->fib6_prefsrc.plen = 0;
c3968a85 3130
93c2fb25 3131 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3132
3133install_route:
93c2fb25 3134 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3135 !netif_carrier_ok(dev))
5e670d84
DA
3136 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3137 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3138 rt->fib6_nh.nh_dev = dev;
93c2fb25 3139 rt->fib6_table = table;
63152fc0 3140
c346dca1 3141 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3142
dcd1f572
DA
3143 if (idev)
3144 in6_dev_put(idev);
3145
8c5b83f0 3146 return rt;
6b9ea5a6
RP
3147out:
3148 if (dev)
3149 dev_put(dev);
3150 if (idev)
3151 in6_dev_put(idev);
6b9ea5a6 3152
93531c67 3153 fib6_info_release(rt);
8c5b83f0 3154 return ERR_PTR(err);
6b9ea5a6
RP
3155}
3156
acb54e3c 3157int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3158 struct netlink_ext_ack *extack)
6b9ea5a6 3159{
8d1c802b 3160 struct fib6_info *rt;
6b9ea5a6
RP
3161 int err;
3162
acb54e3c 3163 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3164 if (IS_ERR(rt))
3165 return PTR_ERR(rt);
6b9ea5a6 3166
d4ead6b3 3167 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3168 fib6_info_release(rt);
6b9ea5a6 3169
1da177e4
LT
3170 return err;
3171}
3172
8d1c802b 3173static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3174{
afb1d4b5 3175 struct net *net = info->nl_net;
c71099ac 3176 struct fib6_table *table;
afb1d4b5 3177 int err;
1da177e4 3178
421842ed 3179 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3180 err = -ENOENT;
3181 goto out;
3182 }
6c813a72 3183
93c2fb25 3184 table = rt->fib6_table;
66f5d6ce 3185 spin_lock_bh(&table->tb6_lock);
86872cb5 3186 err = fib6_del(rt, info);
66f5d6ce 3187 spin_unlock_bh(&table->tb6_lock);
1da177e4 3188
6825a26c 3189out:
93531c67 3190 fib6_info_release(rt);
1da177e4
LT
3191 return err;
3192}
3193
8d1c802b 3194int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3195{
afb1d4b5
DA
3196 struct nl_info info = { .nl_net = net };
3197
528c4ceb 3198 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3199}
3200
8d1c802b 3201static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3202{
3203 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3204 struct net *net = info->nl_net;
16a16cd3 3205 struct sk_buff *skb = NULL;
0ae81335 3206 struct fib6_table *table;
e3330039 3207 int err = -ENOENT;
0ae81335 3208
421842ed 3209 if (rt == net->ipv6.fib6_null_entry)
e3330039 3210 goto out_put;
93c2fb25 3211 table = rt->fib6_table;
66f5d6ce 3212 spin_lock_bh(&table->tb6_lock);
0ae81335 3213
93c2fb25 3214 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3215 struct fib6_info *sibling, *next_sibling;
0ae81335 3216
16a16cd3
DA
3217 /* prefer to send a single notification with all hops */
3218 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3219 if (skb) {
3220 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3221
d4ead6b3 3222 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3223 NULL, NULL, 0, RTM_DELROUTE,
3224 info->portid, seq, 0) < 0) {
3225 kfree_skb(skb);
3226 skb = NULL;
3227 } else
3228 info->skip_notify = 1;
3229 }
3230
0ae81335 3231 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3232 &rt->fib6_siblings,
3233 fib6_siblings) {
0ae81335
DA
3234 err = fib6_del(sibling, info);
3235 if (err)
e3330039 3236 goto out_unlock;
0ae81335
DA
3237 }
3238 }
3239
3240 err = fib6_del(rt, info);
e3330039 3241out_unlock:
66f5d6ce 3242 spin_unlock_bh(&table->tb6_lock);
e3330039 3243out_put:
93531c67 3244 fib6_info_release(rt);
16a16cd3
DA
3245
3246 if (skb) {
e3330039 3247 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3248 info->nlh, gfp_any());
3249 }
0ae81335
DA
3250 return err;
3251}
3252
23fb93a4
DA
3253static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3254{
3255 int rc = -ESRCH;
3256
3257 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3258 goto out;
3259
3260 if (cfg->fc_flags & RTF_GATEWAY &&
3261 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3262 goto out;
3263 if (dst_hold_safe(&rt->dst))
3264 rc = rt6_remove_exception_rt(rt);
3265out:
3266 return rc;
3267}
3268
333c4301
DA
3269static int ip6_route_del(struct fib6_config *cfg,
3270 struct netlink_ext_ack *extack)
1da177e4 3271{
8d1c802b 3272 struct rt6_info *rt_cache;
c71099ac 3273 struct fib6_table *table;
8d1c802b 3274 struct fib6_info *rt;
1da177e4 3275 struct fib6_node *fn;
1da177e4
LT
3276 int err = -ESRCH;
3277
5578689a 3278 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3279 if (!table) {
3280 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3281 return err;
d5d531cb 3282 }
c71099ac 3283
66f5d6ce 3284 rcu_read_lock();
1da177e4 3285
c71099ac 3286 fn = fib6_locate(&table->tb6_root,
86872cb5 3287 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3288 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3289 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3290
1da177e4 3291 if (fn) {
66f5d6ce 3292 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3293 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3294 int rc;
3295
2b760fcf
WW
3296 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3297 &cfg->fc_src);
23fb93a4
DA
3298 if (rt_cache) {
3299 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3300 if (rc != -ESRCH) {
3301 rcu_read_unlock();
23fb93a4 3302 return rc;
9e575010 3303 }
23fb93a4
DA
3304 }
3305 continue;
2b760fcf 3306 }
86872cb5 3307 if (cfg->fc_ifindex &&
5e670d84
DA
3308 (!rt->fib6_nh.nh_dev ||
3309 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3310 continue;
86872cb5 3311 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3312 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3313 continue;
93c2fb25 3314 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3315 continue;
93c2fb25 3316 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3317 continue;
e873e4b9
WW
3318 if (!fib6_info_hold_safe(rt))
3319 continue;
66f5d6ce 3320 rcu_read_unlock();
1da177e4 3321
0ae81335
DA
3322 /* if gateway was specified only delete the one hop */
3323 if (cfg->fc_flags & RTF_GATEWAY)
3324 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3325
3326 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3327 }
3328 }
66f5d6ce 3329 rcu_read_unlock();
1da177e4
LT
3330
3331 return err;
3332}
3333
6700c270 3334static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3335{
a6279458 3336 struct netevent_redirect netevent;
e8599ff4 3337 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3338 struct ndisc_options ndopts;
3339 struct inet6_dev *in6_dev;
3340 struct neighbour *neigh;
a68886a6 3341 struct fib6_info *from;
71bcdba0 3342 struct rd_msg *msg;
6e157b6a
DM
3343 int optlen, on_link;
3344 u8 *lladdr;
e8599ff4 3345
29a3cad5 3346 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3347 optlen -= sizeof(*msg);
e8599ff4
DM
3348
3349 if (optlen < 0) {
6e157b6a 3350 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3351 return;
3352 }
3353
71bcdba0 3354 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3355
71bcdba0 3356 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3357 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3358 return;
3359 }
3360
6e157b6a 3361 on_link = 0;
71bcdba0 3362 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3363 on_link = 1;
71bcdba0 3364 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3365 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3366 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3367 return;
3368 }
3369
3370 in6_dev = __in6_dev_get(skb->dev);
3371 if (!in6_dev)
3372 return;
3373 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3374 return;
3375
3376 /* RFC2461 8.1:
3377 * The IP source address of the Redirect MUST be the same as the current
3378 * first-hop router for the specified ICMP Destination Address.
3379 */
3380
f997c55c 3381 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3382 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3383 return;
3384 }
6e157b6a
DM
3385
3386 lladdr = NULL;
e8599ff4
DM
3387 if (ndopts.nd_opts_tgt_lladdr) {
3388 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3389 skb->dev);
3390 if (!lladdr) {
3391 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3392 return;
3393 }
3394 }
3395
6e157b6a 3396 rt = (struct rt6_info *) dst;
ec13ad1d 3397 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3398 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3399 return;
6e157b6a 3400 }
e8599ff4 3401
6e157b6a
DM
3402 /* Redirect received -> path was valid.
3403 * Look, redirects are sent only in response to data packets,
3404 * so that this nexthop apparently is reachable. --ANK
3405 */
0dec879f 3406 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3407
71bcdba0 3408 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3409 if (!neigh)
3410 return;
a6279458 3411
1da177e4
LT
3412 /*
3413 * We have finally decided to accept it.
3414 */
3415
f997c55c 3416 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3417 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3418 NEIGH_UPDATE_F_OVERRIDE|
3419 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3420 NEIGH_UPDATE_F_ISROUTER)),
3421 NDISC_REDIRECT, &ndopts);
1da177e4 3422
4d85cd0c 3423 rcu_read_lock();
a68886a6 3424 from = rcu_dereference(rt->from);
e873e4b9
WW
3425 /* This fib6_info_hold() is safe here because we hold reference to rt
3426 * and rt already holds reference to fib6_info.
3427 */
8a14e46f 3428 fib6_info_hold(from);
4d85cd0c 3429 rcu_read_unlock();
8a14e46f
DA
3430
3431 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3432 if (!nrt)
1da177e4
LT
3433 goto out;
3434
3435 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3436 if (on_link)
3437 nrt->rt6i_flags &= ~RTF_GATEWAY;
3438
4e3fd7a0 3439 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3440
2b760fcf
WW
3441 /* No need to remove rt from the exception table if rt is
3442 * a cached route because rt6_insert_exception() will
3443 * takes care of it
3444 */
8a14e46f 3445 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3446 dst_release_immediate(&nrt->dst);
3447 goto out;
3448 }
1da177e4 3449
d8d1f30b
CG
3450 netevent.old = &rt->dst;
3451 netevent.new = &nrt->dst;
71bcdba0 3452 netevent.daddr = &msg->dest;
60592833 3453 netevent.neigh = neigh;
8d71740c
TT
3454 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3455
1da177e4 3456out:
8a14e46f 3457 fib6_info_release(from);
e8599ff4 3458 neigh_release(neigh);
6e157b6a
DM
3459}
3460
70ceb4f5 3461#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3462static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3463 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3464 const struct in6_addr *gwaddr,
3465 struct net_device *dev)
70ceb4f5 3466{
830218c1
DA
3467 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3468 int ifindex = dev->ifindex;
70ceb4f5 3469 struct fib6_node *fn;
8d1c802b 3470 struct fib6_info *rt = NULL;
c71099ac
TG
3471 struct fib6_table *table;
3472
830218c1 3473 table = fib6_get_table(net, tb_id);
38308473 3474 if (!table)
c71099ac 3475 return NULL;
70ceb4f5 3476
66f5d6ce 3477 rcu_read_lock();
38fbeeee 3478 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3479 if (!fn)
3480 goto out;
3481
66f5d6ce 3482 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3483 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3484 continue;
93c2fb25 3485 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3486 continue;
5e670d84 3487 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3488 continue;
e873e4b9
WW
3489 if (!fib6_info_hold_safe(rt))
3490 continue;
70ceb4f5
YH
3491 break;
3492 }
3493out:
66f5d6ce 3494 rcu_read_unlock();
70ceb4f5
YH
3495 return rt;
3496}
3497
8d1c802b 3498static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3499 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3500 const struct in6_addr *gwaddr,
3501 struct net_device *dev,
95c96174 3502 unsigned int pref)
70ceb4f5 3503{
86872cb5 3504 struct fib6_config cfg = {
238fc7ea 3505 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3506 .fc_ifindex = dev->ifindex,
86872cb5
TG
3507 .fc_dst_len = prefixlen,
3508 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3509 RTF_UP | RTF_PREF(pref),
b91d5329 3510 .fc_protocol = RTPROT_RA,
e8478e80 3511 .fc_type = RTN_UNICAST,
15e47304 3512 .fc_nlinfo.portid = 0,
efa2cea0
DL
3513 .fc_nlinfo.nlh = NULL,
3514 .fc_nlinfo.nl_net = net,
86872cb5
TG
3515 };
3516
830218c1 3517 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3518 cfg.fc_dst = *prefix;
3519 cfg.fc_gateway = *gwaddr;
70ceb4f5 3520
e317da96
YH
3521 /* We should treat it as a default route if prefix length is 0. */
3522 if (!prefixlen)
86872cb5 3523 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3524
acb54e3c 3525 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3526
830218c1 3527 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3528}
3529#endif
3530
8d1c802b 3531struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3532 const struct in6_addr *addr,
3533 struct net_device *dev)
1ab1457c 3534{
830218c1 3535 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3536 struct fib6_info *rt;
c71099ac 3537 struct fib6_table *table;
1da177e4 3538
afb1d4b5 3539 table = fib6_get_table(net, tb_id);
38308473 3540 if (!table)
c71099ac 3541 return NULL;
1da177e4 3542
66f5d6ce
WW
3543 rcu_read_lock();
3544 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3545 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3546 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3547 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3548 break;
3549 }
e873e4b9
WW
3550 if (rt && !fib6_info_hold_safe(rt))
3551 rt = NULL;
66f5d6ce 3552 rcu_read_unlock();
1da177e4
LT
3553 return rt;
3554}
3555
8d1c802b 3556struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3557 const struct in6_addr *gwaddr,
ebacaaa0
YH
3558 struct net_device *dev,
3559 unsigned int pref)
1da177e4 3560{
86872cb5 3561 struct fib6_config cfg = {
ca254490 3562 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3563 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3564 .fc_ifindex = dev->ifindex,
3565 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3566 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3567 .fc_protocol = RTPROT_RA,
e8478e80 3568 .fc_type = RTN_UNICAST,
15e47304 3569 .fc_nlinfo.portid = 0,
5578689a 3570 .fc_nlinfo.nlh = NULL,
afb1d4b5 3571 .fc_nlinfo.nl_net = net,
86872cb5 3572 };
1da177e4 3573
4e3fd7a0 3574 cfg.fc_gateway = *gwaddr;
1da177e4 3575
acb54e3c 3576 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3577 struct fib6_table *table;
3578
3579 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3580 if (table)
3581 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3582 }
1da177e4 3583
afb1d4b5 3584 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3585}
3586
afb1d4b5
DA
3587static void __rt6_purge_dflt_routers(struct net *net,
3588 struct fib6_table *table)
1da177e4 3589{
8d1c802b 3590 struct fib6_info *rt;
1da177e4
LT
3591
3592restart:
66f5d6ce
WW
3593 rcu_read_lock();
3594 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3595 struct net_device *dev = fib6_info_nh_dev(rt);
3596 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3597
93c2fb25 3598 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3599 (!idev || idev->cnf.accept_ra != 2) &&
3600 fib6_info_hold_safe(rt)) {
93531c67
DA
3601 rcu_read_unlock();
3602 ip6_del_rt(net, rt);
1da177e4
LT
3603 goto restart;
3604 }
3605 }
66f5d6ce 3606 rcu_read_unlock();
830218c1
DA
3607
3608 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3609}
3610
3611void rt6_purge_dflt_routers(struct net *net)
3612{
3613 struct fib6_table *table;
3614 struct hlist_head *head;
3615 unsigned int h;
3616
3617 rcu_read_lock();
3618
3619 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3620 head = &net->ipv6.fib_table_hash[h];
3621 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3622 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3623 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3624 }
3625 }
3626
3627 rcu_read_unlock();
1da177e4
LT
3628}
3629
5578689a
DL
3630static void rtmsg_to_fib6_config(struct net *net,
3631 struct in6_rtmsg *rtmsg,
86872cb5
TG
3632 struct fib6_config *cfg)
3633{
3634 memset(cfg, 0, sizeof(*cfg));
3635
ca254490
DA
3636 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3637 : RT6_TABLE_MAIN;
86872cb5
TG
3638 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3639 cfg->fc_metric = rtmsg->rtmsg_metric;
3640 cfg->fc_expires = rtmsg->rtmsg_info;
3641 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3642 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3643 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3644 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3645
5578689a 3646 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3647
4e3fd7a0
AD
3648 cfg->fc_dst = rtmsg->rtmsg_dst;
3649 cfg->fc_src = rtmsg->rtmsg_src;
3650 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3651}
3652
5578689a 3653int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3654{
86872cb5 3655 struct fib6_config cfg;
1da177e4
LT
3656 struct in6_rtmsg rtmsg;
3657 int err;
3658
67ba4152 3659 switch (cmd) {
1da177e4
LT
3660 case SIOCADDRT: /* Add a route */
3661 case SIOCDELRT: /* Delete a route */
af31f412 3662 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3663 return -EPERM;
3664 err = copy_from_user(&rtmsg, arg,
3665 sizeof(struct in6_rtmsg));
3666 if (err)
3667 return -EFAULT;
86872cb5 3668
5578689a 3669 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3670
1da177e4
LT
3671 rtnl_lock();
3672 switch (cmd) {
3673 case SIOCADDRT:
acb54e3c 3674 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3675 break;
3676 case SIOCDELRT:
333c4301 3677 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3678 break;
3679 default:
3680 err = -EINVAL;
3681 }
3682 rtnl_unlock();
3683
3684 return err;
3ff50b79 3685 }
1da177e4
LT
3686
3687 return -EINVAL;
3688}
3689
3690/*
3691 * Drop the packet on the floor
3692 */
3693
d5fdd6ba 3694static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3695{
612f09e8 3696 int type;
adf30907 3697 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3698 switch (ipstats_mib_noroutes) {
3699 case IPSTATS_MIB_INNOROUTES:
0660e03f 3700 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3701 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3702 IP6_INC_STATS(dev_net(dst->dev),
3703 __in6_dev_get_safely(skb->dev),
3bd653c8 3704 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3705 break;
3706 }
3707 /* FALLTHROUGH */
3708 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3709 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3710 ipstats_mib_noroutes);
612f09e8
YH
3711 break;
3712 }
3ffe533c 3713 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3714 kfree_skb(skb);
3715 return 0;
3716}
3717
9ce8ade0
TG
3718static int ip6_pkt_discard(struct sk_buff *skb)
3719{
612f09e8 3720 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3721}
3722
ede2059d 3723static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3724{
adf30907 3725 skb->dev = skb_dst(skb)->dev;
612f09e8 3726 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3727}
3728
9ce8ade0
TG
3729static int ip6_pkt_prohibit(struct sk_buff *skb)
3730{
612f09e8 3731 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3732}
3733
ede2059d 3734static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3735{
adf30907 3736 skb->dev = skb_dst(skb)->dev;
612f09e8 3737 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3738}
3739
1da177e4
LT
3740/*
3741 * Allocate a dst for local (unicast / anycast) address.
3742 */
3743
360a9887
DA
3744struct fib6_info *addrconf_f6i_alloc(struct net *net,
3745 struct inet6_dev *idev,
3746 const struct in6_addr *addr,
3747 bool anycast, gfp_t gfp_flags)
1da177e4 3748{
ca254490 3749 u32 tb_id;
4832c30d 3750 struct net_device *dev = idev->dev;
360a9887 3751 struct fib6_info *f6i;
5f02ce24 3752
360a9887
DA
3753 f6i = fib6_info_alloc(gfp_flags);
3754 if (!f6i)
1da177e4
LT
3755 return ERR_PTR(-ENOMEM);
3756
360a9887 3757 f6i->dst_nocount = true;
360a9887
DA
3758 f6i->dst_host = true;
3759 f6i->fib6_protocol = RTPROT_KERNEL;
3760 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3761 if (anycast) {
360a9887
DA
3762 f6i->fib6_type = RTN_ANYCAST;
3763 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3764 } else {
360a9887
DA
3765 f6i->fib6_type = RTN_LOCAL;
3766 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3767 }
1da177e4 3768
360a9887 3769 f6i->fib6_nh.nh_gw = *addr;
93531c67 3770 dev_hold(dev);
360a9887
DA
3771 f6i->fib6_nh.nh_dev = dev;
3772 f6i->fib6_dst.addr = *addr;
3773 f6i->fib6_dst.plen = 128;
ca254490 3774 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3775 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3776
360a9887 3777 return f6i;
1da177e4
LT
3778}
3779
c3968a85
DW
3780/* remove deleted ip from prefsrc entries */
3781struct arg_dev_net_ip {
3782 struct net_device *dev;
3783 struct net *net;
3784 struct in6_addr *addr;
3785};
3786
8d1c802b 3787static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3788{
3789 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3790 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3791 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3792
5e670d84 3793 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3794 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3795 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3796 spin_lock_bh(&rt6_exception_lock);
c3968a85 3797 /* remove prefsrc entry */
93c2fb25 3798 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3799 /* need to update cache as well */
3800 rt6_exceptions_remove_prefsrc(rt);
3801 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3802 }
3803 return 0;
3804}
3805
3806void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3807{
3808 struct net *net = dev_net(ifp->idev->dev);
3809 struct arg_dev_net_ip adni = {
3810 .dev = ifp->idev->dev,
3811 .net = net,
3812 .addr = &ifp->addr,
3813 };
0c3584d5 3814 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3815}
3816
be7a010d 3817#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3818
3819/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3820static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3821{
3822 struct in6_addr *gateway = (struct in6_addr *)arg;
3823
93c2fb25 3824 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3825 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3826 return -1;
3827 }
b16cb459
WW
3828
3829 /* Further clean up cached routes in exception table.
3830 * This is needed because cached route may have a different
3831 * gateway than its 'parent' in the case of an ip redirect.
3832 */
3833 rt6_exceptions_clean_tohost(rt, gateway);
3834
be7a010d
DJ
3835 return 0;
3836}
3837
3838void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3839{
3840 fib6_clean_all(net, fib6_clean_tohost, gateway);
3841}
3842
2127d95a
IS
3843struct arg_netdev_event {
3844 const struct net_device *dev;
4c981e28
IS
3845 union {
3846 unsigned int nh_flags;
3847 unsigned long event;
3848 };
2127d95a
IS
3849};
3850
8d1c802b 3851static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3852{
8d1c802b 3853 struct fib6_info *iter;
d7dedee1
IS
3854 struct fib6_node *fn;
3855
93c2fb25
DA
3856 fn = rcu_dereference_protected(rt->fib6_node,
3857 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3858 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3859 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3860 while (iter) {
93c2fb25 3861 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3862 rt6_qualify_for_ecmp(iter))
d7dedee1 3863 return iter;
8fb11a9a 3864 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3865 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3866 }
3867
3868 return NULL;
3869}
3870
8d1c802b 3871static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3872{
5e670d84
DA
3873 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3874 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3875 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3876 return true;
3877
3878 return false;
3879}
3880
8d1c802b 3881static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3882{
8d1c802b 3883 struct fib6_info *iter;
d7dedee1
IS
3884 int total = 0;
3885
3886 if (!rt6_is_dead(rt))
5e670d84 3887 total += rt->fib6_nh.nh_weight;
d7dedee1 3888
93c2fb25 3889 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3890 if (!rt6_is_dead(iter))
5e670d84 3891 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3892 }
3893
3894 return total;
3895}
3896
8d1c802b 3897static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3898{
3899 int upper_bound = -1;
3900
3901 if (!rt6_is_dead(rt)) {
5e670d84 3902 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3903 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3904 total) - 1;
3905 }
5e670d84 3906 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3907}
3908
8d1c802b 3909static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3910{
8d1c802b 3911 struct fib6_info *iter;
d7dedee1
IS
3912 int weight = 0;
3913
3914 rt6_upper_bound_set(rt, &weight, total);
3915
93c2fb25 3916 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3917 rt6_upper_bound_set(iter, &weight, total);
3918}
3919
8d1c802b 3920void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3921{
8d1c802b 3922 struct fib6_info *first;
d7dedee1
IS
3923 int total;
3924
3925 /* In case the entire multipath route was marked for flushing,
3926 * then there is no need to rebalance upon the removal of every
3927 * sibling route.
3928 */
93c2fb25 3929 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3930 return;
3931
3932 /* During lookup routes are evaluated in order, so we need to
3933 * make sure upper bounds are assigned from the first sibling
3934 * onwards.
3935 */
3936 first = rt6_multipath_first_sibling(rt);
3937 if (WARN_ON_ONCE(!first))
3938 return;
3939
3940 total = rt6_multipath_total_weight(first);
3941 rt6_multipath_upper_bound_set(first, total);
3942}
3943
8d1c802b 3944static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3945{
3946 const struct arg_netdev_event *arg = p_arg;
7aef6859 3947 struct net *net = dev_net(arg->dev);
2127d95a 3948
421842ed 3949 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3950 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3951 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3952 rt6_multipath_rebalance(rt);
1de178ed 3953 }
2127d95a
IS
3954
3955 return 0;
3956}
3957
3958void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3959{
3960 struct arg_netdev_event arg = {
3961 .dev = dev,
6802f3ad
IS
3962 {
3963 .nh_flags = nh_flags,
3964 },
2127d95a
IS
3965 };
3966
3967 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3968 arg.nh_flags |= RTNH_F_LINKDOWN;
3969
3970 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3971}
3972
8d1c802b 3973static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3974 const struct net_device *dev)
3975{
8d1c802b 3976 struct fib6_info *iter;
1de178ed 3977
5e670d84 3978 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3979 return true;
93c2fb25 3980 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3981 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3982 return true;
3983
3984 return false;
3985}
3986
8d1c802b 3987static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3988{
8d1c802b 3989 struct fib6_info *iter;
1de178ed
IS
3990
3991 rt->should_flush = 1;
93c2fb25 3992 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3993 iter->should_flush = 1;
3994}
3995
8d1c802b 3996static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3997 const struct net_device *down_dev)
3998{
8d1c802b 3999 struct fib6_info *iter;
1de178ed
IS
4000 unsigned int dead = 0;
4001
5e670d84
DA
4002 if (rt->fib6_nh.nh_dev == down_dev ||
4003 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 4004 dead++;
93c2fb25 4005 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4006 if (iter->fib6_nh.nh_dev == down_dev ||
4007 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
4008 dead++;
4009
4010 return dead;
4011}
4012
8d1c802b 4013static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4014 const struct net_device *dev,
4015 unsigned int nh_flags)
4016{
8d1c802b 4017 struct fib6_info *iter;
1de178ed 4018
5e670d84
DA
4019 if (rt->fib6_nh.nh_dev == dev)
4020 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 4021 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4022 if (iter->fib6_nh.nh_dev == dev)
4023 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4024}
4025
a1a22c12 4026/* called with write lock held for table with rt */
8d1c802b 4027static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4028{
4c981e28
IS
4029 const struct arg_netdev_event *arg = p_arg;
4030 const struct net_device *dev = arg->dev;
7aef6859 4031 struct net *net = dev_net(dev);
8ed67789 4032
421842ed 4033 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4034 return 0;
4035
4036 switch (arg->event) {
4037 case NETDEV_UNREGISTER:
5e670d84 4038 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4039 case NETDEV_DOWN:
1de178ed 4040 if (rt->should_flush)
27c6fa73 4041 return -1;
93c2fb25 4042 if (!rt->fib6_nsiblings)
5e670d84 4043 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4044 if (rt6_multipath_uses_dev(rt, dev)) {
4045 unsigned int count;
4046
4047 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4048 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4049 rt6_multipath_flush(rt);
4050 return -1;
4051 }
4052 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4053 RTNH_F_LINKDOWN);
7aef6859 4054 fib6_update_sernum(net, rt);
d7dedee1 4055 rt6_multipath_rebalance(rt);
1de178ed
IS
4056 }
4057 return -2;
27c6fa73 4058 case NETDEV_CHANGE:
5e670d84 4059 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4060 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4061 break;
5e670d84 4062 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4063 rt6_multipath_rebalance(rt);
27c6fa73 4064 break;
2b241361 4065 }
c159d30c 4066
1da177e4
LT
4067 return 0;
4068}
4069
27c6fa73 4070void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4071{
4c981e28 4072 struct arg_netdev_event arg = {
8ed67789 4073 .dev = dev,
6802f3ad
IS
4074 {
4075 .event = event,
4076 },
8ed67789
DL
4077 };
4078
4c981e28
IS
4079 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4080}
4081
4082void rt6_disable_ip(struct net_device *dev, unsigned long event)
4083{
4084 rt6_sync_down_dev(dev, event);
4085 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4086 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4087}
4088
95c96174 4089struct rt6_mtu_change_arg {
1da177e4 4090 struct net_device *dev;
95c96174 4091 unsigned int mtu;
1da177e4
LT
4092};
4093
8d1c802b 4094static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4095{
4096 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4097 struct inet6_dev *idev;
4098
4099 /* In IPv6 pmtu discovery is not optional,
4100 so that RTAX_MTU lock cannot disable it.
4101 We still use this lock to block changes
4102 caused by addrconf/ndisc.
4103 */
4104
4105 idev = __in6_dev_get(arg->dev);
38308473 4106 if (!idev)
1da177e4
LT
4107 return 0;
4108
4109 /* For administrative MTU increase, there is no way to discover
4110 IPv6 PMTU increase, so PMTU increase should be updated here.
4111 Since RFC 1981 doesn't include administrative MTU increase
4112 update PMTU increase is a MUST. (i.e. jumbo frame)
4113 */
5e670d84 4114 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4115 !fib6_metric_locked(rt, RTAX_MTU)) {
4116 u32 mtu = rt->fib6_pmtu;
4117
4118 if (mtu >= arg->mtu ||
4119 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4120 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4121
f5bbe7ee 4122 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4123 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4124 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4125 }
1da177e4
LT
4126 return 0;
4127}
4128
95c96174 4129void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4130{
c71099ac
TG
4131 struct rt6_mtu_change_arg arg = {
4132 .dev = dev,
4133 .mtu = mtu,
4134 };
1da177e4 4135
0c3584d5 4136 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4137}
4138
ef7c79ed 4139static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4140 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4141 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4142 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4143 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4144 [RTA_PRIORITY] = { .type = NLA_U32 },
4145 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4146 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4147 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4148 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4149 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4150 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4151 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4152 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4153 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4154 [RTA_IP_PROTO] = { .type = NLA_U8 },
4155 [RTA_SPORT] = { .type = NLA_U16 },
4156 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4157};
4158
4159static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4160 struct fib6_config *cfg,
4161 struct netlink_ext_ack *extack)
1da177e4 4162{
86872cb5
TG
4163 struct rtmsg *rtm;
4164 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4165 unsigned int pref;
86872cb5 4166 int err;
1da177e4 4167
fceb6435
JB
4168 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4169 NULL);
86872cb5
TG
4170 if (err < 0)
4171 goto errout;
1da177e4 4172
86872cb5
TG
4173 err = -EINVAL;
4174 rtm = nlmsg_data(nlh);
4175 memset(cfg, 0, sizeof(*cfg));
4176
4177 cfg->fc_table = rtm->rtm_table;
4178 cfg->fc_dst_len = rtm->rtm_dst_len;
4179 cfg->fc_src_len = rtm->rtm_src_len;
4180 cfg->fc_flags = RTF_UP;
4181 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4182 cfg->fc_type = rtm->rtm_type;
86872cb5 4183
ef2c7d7b
ND
4184 if (rtm->rtm_type == RTN_UNREACHABLE ||
4185 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4186 rtm->rtm_type == RTN_PROHIBIT ||
4187 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4188 cfg->fc_flags |= RTF_REJECT;
4189
ab79ad14
4190 if (rtm->rtm_type == RTN_LOCAL)
4191 cfg->fc_flags |= RTF_LOCAL;
4192
1f56a01f
MKL
4193 if (rtm->rtm_flags & RTM_F_CLONED)
4194 cfg->fc_flags |= RTF_CACHE;
4195
fc1e64e1
DA
4196 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4197
15e47304 4198 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4199 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4200 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4201
4202 if (tb[RTA_GATEWAY]) {
67b61f6c 4203 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4204 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4205 }
86872cb5
TG
4206
4207 if (tb[RTA_DST]) {
4208 int plen = (rtm->rtm_dst_len + 7) >> 3;
4209
4210 if (nla_len(tb[RTA_DST]) < plen)
4211 goto errout;
4212
4213 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4214 }
86872cb5
TG
4215
4216 if (tb[RTA_SRC]) {
4217 int plen = (rtm->rtm_src_len + 7) >> 3;
4218
4219 if (nla_len(tb[RTA_SRC]) < plen)
4220 goto errout;
4221
4222 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4223 }
86872cb5 4224
c3968a85 4225 if (tb[RTA_PREFSRC])
67b61f6c 4226 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4227
86872cb5
TG
4228 if (tb[RTA_OIF])
4229 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4230
4231 if (tb[RTA_PRIORITY])
4232 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4233
4234 if (tb[RTA_METRICS]) {
4235 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4236 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4237 }
86872cb5
TG
4238
4239 if (tb[RTA_TABLE])
4240 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4241
51ebd318
ND
4242 if (tb[RTA_MULTIPATH]) {
4243 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4244 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4245
4246 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4247 cfg->fc_mp_len, extack);
9ed59592
DA
4248 if (err < 0)
4249 goto errout;
51ebd318
ND
4250 }
4251
c78ba6d6
LR
4252 if (tb[RTA_PREF]) {
4253 pref = nla_get_u8(tb[RTA_PREF]);
4254 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4255 pref != ICMPV6_ROUTER_PREF_HIGH)
4256 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4257 cfg->fc_flags |= RTF_PREF(pref);
4258 }
4259
19e42e45
RP
4260 if (tb[RTA_ENCAP])
4261 cfg->fc_encap = tb[RTA_ENCAP];
4262
9ed59592 4263 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4264 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4265
c255bd68 4266 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4267 if (err < 0)
4268 goto errout;
4269 }
4270
32bc201e
XL
4271 if (tb[RTA_EXPIRES]) {
4272 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4273
4274 if (addrconf_finite_timeout(timeout)) {
4275 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4276 cfg->fc_flags |= RTF_EXPIRES;
4277 }
4278 }
4279
86872cb5
TG
4280 err = 0;
4281errout:
4282 return err;
1da177e4
LT
4283}
4284
6b9ea5a6 4285struct rt6_nh {
8d1c802b 4286 struct fib6_info *fib6_info;
6b9ea5a6 4287 struct fib6_config r_cfg;
6b9ea5a6
RP
4288 struct list_head next;
4289};
4290
4291static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4292{
4293 struct rt6_nh *nh;
4294
4295 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4296 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4297 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4298 nh->r_cfg.fc_ifindex);
4299 }
4300}
4301
d4ead6b3
DA
4302static int ip6_route_info_append(struct net *net,
4303 struct list_head *rt6_nh_list,
8d1c802b
DA
4304 struct fib6_info *rt,
4305 struct fib6_config *r_cfg)
6b9ea5a6
RP
4306{
4307 struct rt6_nh *nh;
6b9ea5a6
RP
4308 int err = -EEXIST;
4309
4310 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4311 /* check if fib6_info already exists */
4312 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4313 return err;
4314 }
4315
4316 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4317 if (!nh)
4318 return -ENOMEM;
8d1c802b 4319 nh->fib6_info = rt;
d4ead6b3 4320 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4321 if (err) {
4322 kfree(nh);
4323 return err;
4324 }
4325 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4326 list_add_tail(&nh->next, rt6_nh_list);
4327
4328 return 0;
4329}
4330
8d1c802b
DA
4331static void ip6_route_mpath_notify(struct fib6_info *rt,
4332 struct fib6_info *rt_last,
3b1137fe
DA
4333 struct nl_info *info,
4334 __u16 nlflags)
4335{
4336 /* if this is an APPEND route, then rt points to the first route
4337 * inserted and rt_last points to last route inserted. Userspace
4338 * wants a consistent dump of the route which starts at the first
4339 * nexthop. Since sibling routes are always added at the end of
4340 * the list, find the first sibling of the last route appended
4341 */
93c2fb25
DA
4342 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4343 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4344 struct fib6_info,
93c2fb25 4345 fib6_siblings);
3b1137fe
DA
4346 }
4347
4348 if (rt)
4349 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4350}
4351
333c4301
DA
4352static int ip6_route_multipath_add(struct fib6_config *cfg,
4353 struct netlink_ext_ack *extack)
51ebd318 4354{
8d1c802b 4355 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4356 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4357 struct fib6_config r_cfg;
4358 struct rtnexthop *rtnh;
8d1c802b 4359 struct fib6_info *rt;
6b9ea5a6
RP
4360 struct rt6_nh *err_nh;
4361 struct rt6_nh *nh, *nh_safe;
3b1137fe 4362 __u16 nlflags;
51ebd318
ND
4363 int remaining;
4364 int attrlen;
6b9ea5a6
RP
4365 int err = 1;
4366 int nhn = 0;
4367 int replace = (cfg->fc_nlinfo.nlh &&
4368 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4369 LIST_HEAD(rt6_nh_list);
51ebd318 4370
3b1137fe
DA
4371 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4372 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4373 nlflags |= NLM_F_APPEND;
4374
35f1b4e9 4375 remaining = cfg->fc_mp_len;
51ebd318 4376 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4377
6b9ea5a6 4378 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4379 * fib6_info structs per nexthop
6b9ea5a6 4380 */
51ebd318
ND
4381 while (rtnh_ok(rtnh, remaining)) {
4382 memcpy(&r_cfg, cfg, sizeof(*cfg));
4383 if (rtnh->rtnh_ifindex)
4384 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4385
4386 attrlen = rtnh_attrlen(rtnh);
4387 if (attrlen > 0) {
4388 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4389
4390 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4391 if (nla) {
67b61f6c 4392 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4393 r_cfg.fc_flags |= RTF_GATEWAY;
4394 }
19e42e45
RP
4395 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4396 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4397 if (nla)
4398 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4399 }
6b9ea5a6 4400
68e2ffde 4401 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4402 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4403 if (IS_ERR(rt)) {
4404 err = PTR_ERR(rt);
4405 rt = NULL;
6b9ea5a6 4406 goto cleanup;
8c5b83f0 4407 }
b5d2d75e
DA
4408 if (!rt6_qualify_for_ecmp(rt)) {
4409 err = -EINVAL;
4410 NL_SET_ERR_MSG(extack,
4411 "Device only routes can not be added for IPv6 using the multipath API.");
4412 fib6_info_release(rt);
4413 goto cleanup;
4414 }
6b9ea5a6 4415
5e670d84 4416 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4417
d4ead6b3
DA
4418 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4419 rt, &r_cfg);
51ebd318 4420 if (err) {
93531c67 4421 fib6_info_release(rt);
6b9ea5a6
RP
4422 goto cleanup;
4423 }
4424
4425 rtnh = rtnh_next(rtnh, &remaining);
4426 }
4427
3b1137fe
DA
4428 /* for add and replace send one notification with all nexthops.
4429 * Skip the notification in fib6_add_rt2node and send one with
4430 * the full route when done
4431 */
4432 info->skip_notify = 1;
4433
6b9ea5a6
RP
4434 err_nh = NULL;
4435 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4436 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4437 fib6_info_release(nh->fib6_info);
93531c67 4438
f7225172
DA
4439 if (!err) {
4440 /* save reference to last route successfully inserted */
4441 rt_last = nh->fib6_info;
4442
4443 /* save reference to first route for notification */
4444 if (!rt_notif)
4445 rt_notif = nh->fib6_info;
4446 }
3b1137fe 4447
8d1c802b
DA
4448 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4449 nh->fib6_info = NULL;
6b9ea5a6
RP
4450 if (err) {
4451 if (replace && nhn)
4452 ip6_print_replace_route_err(&rt6_nh_list);
4453 err_nh = nh;
4454 goto add_errout;
51ebd318 4455 }
6b9ea5a6 4456
1a72418b 4457 /* Because each route is added like a single route we remove
27596472
MK
4458 * these flags after the first nexthop: if there is a collision,
4459 * we have already failed to add the first nexthop:
4460 * fib6_add_rt2node() has rejected it; when replacing, old
4461 * nexthops have been replaced by first new, the rest should
4462 * be added to it.
1a72418b 4463 */
27596472
MK
4464 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4465 NLM_F_REPLACE);
6b9ea5a6
RP
4466 nhn++;
4467 }
4468
3b1137fe
DA
4469 /* success ... tell user about new route */
4470 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4471 goto cleanup;
4472
4473add_errout:
3b1137fe
DA
4474 /* send notification for routes that were added so that
4475 * the delete notifications sent by ip6_route_del are
4476 * coherent
4477 */
4478 if (rt_notif)
4479 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4480
6b9ea5a6
RP
4481 /* Delete routes that were already added */
4482 list_for_each_entry(nh, &rt6_nh_list, next) {
4483 if (err_nh == nh)
4484 break;
333c4301 4485 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4486 }
4487
4488cleanup:
4489 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4490 if (nh->fib6_info)
4491 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4492 list_del(&nh->next);
4493 kfree(nh);
4494 }
4495
4496 return err;
4497}
4498
333c4301
DA
4499static int ip6_route_multipath_del(struct fib6_config *cfg,
4500 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4501{
4502 struct fib6_config r_cfg;
4503 struct rtnexthop *rtnh;
4504 int remaining;
4505 int attrlen;
4506 int err = 1, last_err = 0;
4507
4508 remaining = cfg->fc_mp_len;
4509 rtnh = (struct rtnexthop *)cfg->fc_mp;
4510
4511 /* Parse a Multipath Entry */
4512 while (rtnh_ok(rtnh, remaining)) {
4513 memcpy(&r_cfg, cfg, sizeof(*cfg));
4514 if (rtnh->rtnh_ifindex)
4515 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4516
4517 attrlen = rtnh_attrlen(rtnh);
4518 if (attrlen > 0) {
4519 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4520
4521 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4522 if (nla) {
4523 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4524 r_cfg.fc_flags |= RTF_GATEWAY;
4525 }
4526 }
333c4301 4527 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4528 if (err)
4529 last_err = err;
4530
51ebd318
ND
4531 rtnh = rtnh_next(rtnh, &remaining);
4532 }
4533
4534 return last_err;
4535}
4536
c21ef3e3
DA
4537static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4538 struct netlink_ext_ack *extack)
1da177e4 4539{
86872cb5
TG
4540 struct fib6_config cfg;
4541 int err;
1da177e4 4542
333c4301 4543 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4544 if (err < 0)
4545 return err;
4546
51ebd318 4547 if (cfg.fc_mp)
333c4301 4548 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4549 else {
4550 cfg.fc_delete_all_nh = 1;
333c4301 4551 return ip6_route_del(&cfg, extack);
0ae81335 4552 }
1da177e4
LT
4553}
4554
c21ef3e3
DA
4555static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4556 struct netlink_ext_ack *extack)
1da177e4 4557{
86872cb5
TG
4558 struct fib6_config cfg;
4559 int err;
1da177e4 4560
333c4301 4561 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4562 if (err < 0)
4563 return err;
4564
51ebd318 4565 if (cfg.fc_mp)
333c4301 4566 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4567 else
acb54e3c 4568 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4569}
4570
8d1c802b 4571static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4572{
beb1afac
DA
4573 int nexthop_len = 0;
4574
93c2fb25 4575 if (rt->fib6_nsiblings) {
beb1afac
DA
4576 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4577 + NLA_ALIGN(sizeof(struct rtnexthop))
4578 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4579 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4580
93c2fb25 4581 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4582 }
4583
339bf98f
TG
4584 return NLMSG_ALIGN(sizeof(struct rtmsg))
4585 + nla_total_size(16) /* RTA_SRC */
4586 + nla_total_size(16) /* RTA_DST */
4587 + nla_total_size(16) /* RTA_GATEWAY */
4588 + nla_total_size(16) /* RTA_PREFSRC */
4589 + nla_total_size(4) /* RTA_TABLE */
4590 + nla_total_size(4) /* RTA_IIF */
4591 + nla_total_size(4) /* RTA_OIF */
4592 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4593 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4594 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4595 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4596 + nla_total_size(1) /* RTA_PREF */
5e670d84 4597 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4598 + nexthop_len;
4599}
4600
8d1c802b 4601static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4602 unsigned int *flags, bool skip_oif)
beb1afac 4603{
5e670d84 4604 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4605 *flags |= RTNH_F_DEAD;
4606
5e670d84 4607 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4608 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4609
4610 rcu_read_lock();
4611 if (fib6_ignore_linkdown(rt))
beb1afac 4612 *flags |= RTNH_F_DEAD;
dcd1f572 4613 rcu_read_unlock();
beb1afac
DA
4614 }
4615
93c2fb25 4616 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4617 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4618 goto nla_put_failure;
4619 }
4620
5e670d84
DA
4621 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4622 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4623 *flags |= RTNH_F_OFFLOAD;
4624
5be083ce 4625 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4626 if (!skip_oif && rt->fib6_nh.nh_dev &&
4627 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4628 goto nla_put_failure;
4629
5e670d84
DA
4630 if (rt->fib6_nh.nh_lwtstate &&
4631 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4632 goto nla_put_failure;
4633
4634 return 0;
4635
4636nla_put_failure:
4637 return -EMSGSIZE;
4638}
4639
5be083ce 4640/* add multipath next hop */
8d1c802b 4641static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4642{
5e670d84 4643 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4644 struct rtnexthop *rtnh;
4645 unsigned int flags = 0;
4646
4647 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4648 if (!rtnh)
4649 goto nla_put_failure;
4650
5e670d84
DA
4651 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4652 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4653
5be083ce 4654 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4655 goto nla_put_failure;
4656
4657 rtnh->rtnh_flags = flags;
4658
4659 /* length of rtnetlink header + attributes */
4660 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4661
4662 return 0;
4663
4664nla_put_failure:
4665 return -EMSGSIZE;
339bf98f
TG
4666}
4667
d4ead6b3 4668static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4669 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4670 struct in6_addr *dest, struct in6_addr *src,
15e47304 4671 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4672 unsigned int flags)
1da177e4
LT
4673{
4674 struct rtmsg *rtm;
2d7202bf 4675 struct nlmsghdr *nlh;
d4ead6b3
DA
4676 long expires = 0;
4677 u32 *pmetrics;
9e762a4a 4678 u32 table;
1da177e4 4679
15e47304 4680 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4681 if (!nlh)
26932566 4682 return -EMSGSIZE;
2d7202bf
TG
4683
4684 rtm = nlmsg_data(nlh);
1da177e4 4685 rtm->rtm_family = AF_INET6;
93c2fb25
DA
4686 rtm->rtm_dst_len = rt->fib6_dst.plen;
4687 rtm->rtm_src_len = rt->fib6_src.plen;
1da177e4 4688 rtm->rtm_tos = 0;
93c2fb25
DA
4689 if (rt->fib6_table)
4690 table = rt->fib6_table->tb6_id;
c71099ac 4691 else
9e762a4a
PM
4692 table = RT6_TABLE_UNSPEC;
4693 rtm->rtm_table = table;
c78679e8
DM
4694 if (nla_put_u32(skb, RTA_TABLE, table))
4695 goto nla_put_failure;
e8478e80
DA
4696
4697 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4698 rtm->rtm_flags = 0;
4699 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4700 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4701
93c2fb25 4702 if (rt->fib6_flags & RTF_CACHE)
1da177e4
LT
4703 rtm->rtm_flags |= RTM_F_CLONED;
4704
d4ead6b3
DA
4705 if (dest) {
4706 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4707 goto nla_put_failure;
1ab1457c 4708 rtm->rtm_dst_len = 128;
1da177e4 4709 } else if (rtm->rtm_dst_len)
93c2fb25 4710 if (nla_put_in6_addr(skb, RTA_DST, &rt->fib6_dst.addr))
c78679e8 4711 goto nla_put_failure;
1da177e4
LT
4712#ifdef CONFIG_IPV6_SUBTREES
4713 if (src) {
930345ea 4714 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4715 goto nla_put_failure;
1ab1457c 4716 rtm->rtm_src_len = 128;
c78679e8 4717 } else if (rtm->rtm_src_len &&
93c2fb25 4718 nla_put_in6_addr(skb, RTA_SRC, &rt->fib6_src.addr))
c78679e8 4719 goto nla_put_failure;
1da177e4 4720#endif
7bc570c8
YH
4721 if (iif) {
4722#ifdef CONFIG_IPV6_MROUTE
93c2fb25 4723 if (ipv6_addr_is_multicast(&rt->fib6_dst.addr)) {
fd61c6ba
DA
4724 int err = ip6mr_get_route(net, skb, rtm, portid);
4725
4726 if (err == 0)
4727 return 0;
4728 if (err < 0)
4729 goto nla_put_failure;
7bc570c8
YH
4730 } else
4731#endif
c78679e8
DM
4732 if (nla_put_u32(skb, RTA_IIF, iif))
4733 goto nla_put_failure;
d4ead6b3 4734 } else if (dest) {
1da177e4 4735 struct in6_addr saddr_buf;
d4ead6b3 4736 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4737 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4738 goto nla_put_failure;
1da177e4 4739 }
2d7202bf 4740
93c2fb25 4741 if (rt->fib6_prefsrc.plen) {
c3968a85 4742 struct in6_addr saddr_buf;
93c2fb25 4743 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4744 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4745 goto nla_put_failure;
c3968a85
DW
4746 }
4747
d4ead6b3
DA
4748 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4749 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4750 goto nla_put_failure;
4751
93c2fb25 4752 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4753 goto nla_put_failure;
8253947e 4754
beb1afac
DA
4755 /* For multipath routes, walk the siblings list and add
4756 * each as a nexthop within RTA_MULTIPATH.
4757 */
93c2fb25 4758 if (rt->fib6_nsiblings) {
8d1c802b 4759 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4760 struct nlattr *mp;
4761
4762 mp = nla_nest_start(skb, RTA_MULTIPATH);
4763 if (!mp)
4764 goto nla_put_failure;
4765
4766 if (rt6_add_nexthop(skb, rt) < 0)
4767 goto nla_put_failure;
4768
4769 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4770 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4771 if (rt6_add_nexthop(skb, sibling) < 0)
4772 goto nla_put_failure;
4773 }
4774
4775 nla_nest_end(skb, mp);
4776 } else {
5be083ce 4777 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4778 goto nla_put_failure;
4779 }
4780
93c2fb25 4781 if (rt->fib6_flags & RTF_EXPIRES) {
14895687
DA
4782 expires = dst ? dst->expires : rt->expires;
4783 expires -= jiffies;
4784 }
69cdf8f9 4785
d4ead6b3 4786 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4787 goto nla_put_failure;
2d7202bf 4788
93c2fb25 4789 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->fib6_flags)))
c78ba6d6
LR
4790 goto nla_put_failure;
4791
19e42e45 4792
053c095a
JB
4793 nlmsg_end(skb, nlh);
4794 return 0;
2d7202bf
TG
4795
4796nla_put_failure:
26932566
PM
4797 nlmsg_cancel(skb, nlh);
4798 return -EMSGSIZE;
1da177e4
LT
4799}
4800
8d1c802b 4801int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4802{
4803 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4804 struct net *net = arg->net;
4805
421842ed 4806 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4807 return 0;
1da177e4 4808
2d7202bf
TG
4809 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4810 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4811
4812 /* user wants prefix routes only */
4813 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4814 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4815 /* success since this is not a prefix route */
4816 return 1;
4817 }
4818 }
1da177e4 4819
d4ead6b3
DA
4820 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4821 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4822 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4823}
4824
c21ef3e3
DA
4825static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4826 struct netlink_ext_ack *extack)
1da177e4 4827{
3b1e0a65 4828 struct net *net = sock_net(in_skb->sk);
ab364a6f 4829 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4830 int err, iif = 0, oif = 0;
a68886a6 4831 struct fib6_info *from;
18c3a61c 4832 struct dst_entry *dst;
ab364a6f 4833 struct rt6_info *rt;
1da177e4 4834 struct sk_buff *skb;
ab364a6f 4835 struct rtmsg *rtm;
4c9483b2 4836 struct flowi6 fl6;
18c3a61c 4837 bool fibmatch;
1da177e4 4838
fceb6435 4839 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4840 extack);
ab364a6f
TG
4841 if (err < 0)
4842 goto errout;
1da177e4 4843
ab364a6f 4844 err = -EINVAL;
4c9483b2 4845 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4846 rtm = nlmsg_data(nlh);
4847 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4848 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4849
ab364a6f
TG
4850 if (tb[RTA_SRC]) {
4851 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4852 goto errout;
4853
4e3fd7a0 4854 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4855 }
4856
4857 if (tb[RTA_DST]) {
4858 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4859 goto errout;
4860
4e3fd7a0 4861 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4862 }
4863
4864 if (tb[RTA_IIF])
4865 iif = nla_get_u32(tb[RTA_IIF]);
4866
4867 if (tb[RTA_OIF])
72331bc0 4868 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4869
2e47b291
LC
4870 if (tb[RTA_MARK])
4871 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4872
622ec2c9
LC
4873 if (tb[RTA_UID])
4874 fl6.flowi6_uid = make_kuid(current_user_ns(),
4875 nla_get_u32(tb[RTA_UID]));
4876 else
4877 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4878
eacb9384
RP
4879 if (tb[RTA_SPORT])
4880 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4881
4882 if (tb[RTA_DPORT])
4883 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4884
4885 if (tb[RTA_IP_PROTO]) {
4886 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4887 &fl6.flowi6_proto, extack);
4888 if (err)
4889 goto errout;
4890 }
4891
1da177e4
LT
4892 if (iif) {
4893 struct net_device *dev;
72331bc0
SL
4894 int flags = 0;
4895
121622db
FW
4896 rcu_read_lock();
4897
4898 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4899 if (!dev) {
121622db 4900 rcu_read_unlock();
1da177e4 4901 err = -ENODEV;
ab364a6f 4902 goto errout;
1da177e4 4903 }
72331bc0
SL
4904
4905 fl6.flowi6_iif = iif;
4906
4907 if (!ipv6_addr_any(&fl6.saddr))
4908 flags |= RT6_LOOKUP_F_HAS_SADDR;
4909
b75cc8f9 4910 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4911
4912 rcu_read_unlock();
72331bc0
SL
4913 } else {
4914 fl6.flowi6_oif = oif;
4915
58acfd71 4916 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4917 }
4918
18c3a61c
RP
4919
4920 rt = container_of(dst, struct rt6_info, dst);
4921 if (rt->dst.error) {
4922 err = rt->dst.error;
4923 ip6_rt_put(rt);
4924 goto errout;
1da177e4
LT
4925 }
4926
9d6acb3b
WC
4927 if (rt == net->ipv6.ip6_null_entry) {
4928 err = rt->dst.error;
4929 ip6_rt_put(rt);
4930 goto errout;
4931 }
4932
ab364a6f 4933 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4934 if (!skb) {
94e187c0 4935 ip6_rt_put(rt);
ab364a6f
TG
4936 err = -ENOBUFS;
4937 goto errout;
4938 }
1da177e4 4939
d8d1f30b 4940 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4941
4942 rcu_read_lock();
4943 from = rcu_dereference(rt->from);
4944
18c3a61c 4945 if (fibmatch)
a68886a6 4946 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4947 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4948 nlh->nlmsg_seq, 0);
4949 else
a68886a6
DA
4950 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4951 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4952 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4953 0);
a68886a6
DA
4954 rcu_read_unlock();
4955
1da177e4 4956 if (err < 0) {
ab364a6f
TG
4957 kfree_skb(skb);
4958 goto errout;
1da177e4
LT
4959 }
4960
15e47304 4961 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4962errout:
1da177e4 4963 return err;
1da177e4
LT
4964}
4965
8d1c802b 4966void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4967 unsigned int nlm_flags)
1da177e4
LT
4968{
4969 struct sk_buff *skb;
5578689a 4970 struct net *net = info->nl_net;
528c4ceb
DL
4971 u32 seq;
4972 int err;
4973
4974 err = -ENOBUFS;
38308473 4975 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4976
19e42e45 4977 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4978 if (!skb)
21713ebc
TG
4979 goto errout;
4980
d4ead6b3
DA
4981 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
4982 event, info->portid, seq, nlm_flags);
26932566
PM
4983 if (err < 0) {
4984 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4985 WARN_ON(err == -EMSGSIZE);
4986 kfree_skb(skb);
4987 goto errout;
4988 }
15e47304 4989 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4990 info->nlh, gfp_any());
4991 return;
21713ebc
TG
4992errout:
4993 if (err < 0)
5578689a 4994 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4995}
4996
8ed67789 4997static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4998 unsigned long event, void *ptr)
8ed67789 4999{
351638e7 5000 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5001 struct net *net = dev_net(dev);
8ed67789 5002
242d3a49
WC
5003 if (!(dev->flags & IFF_LOOPBACK))
5004 return NOTIFY_OK;
5005
5006 if (event == NETDEV_REGISTER) {
421842ed 5007 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5008 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5009 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5010#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5011 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5012 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5013 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5014 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5015#endif
76da0704
WC
5016 } else if (event == NETDEV_UNREGISTER &&
5017 dev->reg_state != NETREG_UNREGISTERED) {
5018 /* NETDEV_UNREGISTER could be fired for multiple times by
5019 * netdev_wait_allrefs(). Make sure we only call this once.
5020 */
12d94a80 5021 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5022#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5023 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5024 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5025#endif
5026 }
5027
5028 return NOTIFY_OK;
5029}
5030
1da177e4
LT
5031/*
5032 * /proc
5033 */
5034
5035#ifdef CONFIG_PROC_FS
1da177e4
LT
5036static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5037{
69ddb805 5038 struct net *net = (struct net *)seq->private;
1da177e4 5039 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5040 net->ipv6.rt6_stats->fib_nodes,
5041 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5042 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5043 net->ipv6.rt6_stats->fib_rt_entries,
5044 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5045 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5046 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5047
5048 return 0;
5049}
1da177e4
LT
5050#endif /* CONFIG_PROC_FS */
5051
5052#ifdef CONFIG_SYSCTL
5053
1da177e4 5054static
fe2c6338 5055int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5056 void __user *buffer, size_t *lenp, loff_t *ppos)
5057{
c486da34
LAG
5058 struct net *net;
5059 int delay;
5060 if (!write)
1da177e4 5061 return -EINVAL;
c486da34
LAG
5062
5063 net = (struct net *)ctl->extra1;
5064 delay = net->ipv6.sysctl.flush_delay;
5065 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5066 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5067 return 0;
1da177e4
LT
5068}
5069
fe2c6338 5070struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5071 {
1da177e4 5072 .procname = "flush",
4990509f 5073 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5074 .maxlen = sizeof(int),
89c8b3a1 5075 .mode = 0200,
6d9f239a 5076 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5077 },
5078 {
1da177e4 5079 .procname = "gc_thresh",
9a7ec3a9 5080 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5081 .maxlen = sizeof(int),
5082 .mode = 0644,
6d9f239a 5083 .proc_handler = proc_dointvec,
1da177e4
LT
5084 },
5085 {
1da177e4 5086 .procname = "max_size",
4990509f 5087 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5088 .maxlen = sizeof(int),
5089 .mode = 0644,
6d9f239a 5090 .proc_handler = proc_dointvec,
1da177e4
LT
5091 },
5092 {
1da177e4 5093 .procname = "gc_min_interval",
4990509f 5094 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5095 .maxlen = sizeof(int),
5096 .mode = 0644,
6d9f239a 5097 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5098 },
5099 {
1da177e4 5100 .procname = "gc_timeout",
4990509f 5101 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5102 .maxlen = sizeof(int),
5103 .mode = 0644,
6d9f239a 5104 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5105 },
5106 {
1da177e4 5107 .procname = "gc_interval",
4990509f 5108 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5109 .maxlen = sizeof(int),
5110 .mode = 0644,
6d9f239a 5111 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5112 },
5113 {
1da177e4 5114 .procname = "gc_elasticity",
4990509f 5115 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5116 .maxlen = sizeof(int),
5117 .mode = 0644,
f3d3f616 5118 .proc_handler = proc_dointvec,
1da177e4
LT
5119 },
5120 {
1da177e4 5121 .procname = "mtu_expires",
4990509f 5122 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5123 .maxlen = sizeof(int),
5124 .mode = 0644,
6d9f239a 5125 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5126 },
5127 {
1da177e4 5128 .procname = "min_adv_mss",
4990509f 5129 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5130 .maxlen = sizeof(int),
5131 .mode = 0644,
f3d3f616 5132 .proc_handler = proc_dointvec,
1da177e4
LT
5133 },
5134 {
1da177e4 5135 .procname = "gc_min_interval_ms",
4990509f 5136 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5137 .maxlen = sizeof(int),
5138 .mode = 0644,
6d9f239a 5139 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5140 },
f8572d8f 5141 { }
1da177e4
LT
5142};
5143
2c8c1e72 5144struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5145{
5146 struct ctl_table *table;
5147
5148 table = kmemdup(ipv6_route_table_template,
5149 sizeof(ipv6_route_table_template),
5150 GFP_KERNEL);
5ee09105
YH
5151
5152 if (table) {
5153 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5154 table[0].extra1 = net;
86393e52 5155 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5156 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5157 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5158 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5159 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5160 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5161 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5162 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5163 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5164
5165 /* Don't export sysctls to unprivileged users */
5166 if (net->user_ns != &init_user_ns)
5167 table[0].procname = NULL;
5ee09105
YH
5168 }
5169
760f2d01
DL
5170 return table;
5171}
1da177e4
LT
5172#endif
5173
2c8c1e72 5174static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5175{
633d424b 5176 int ret = -ENOMEM;
8ed67789 5177
86393e52
AD
5178 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5179 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5180
fc66f95c
ED
5181 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5182 goto out_ip6_dst_ops;
5183
421842ed
DA
5184 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5185 sizeof(*net->ipv6.fib6_null_entry),
5186 GFP_KERNEL);
5187 if (!net->ipv6.fib6_null_entry)
5188 goto out_ip6_dst_entries;
5189
8ed67789
DL
5190 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5191 sizeof(*net->ipv6.ip6_null_entry),
5192 GFP_KERNEL);
5193 if (!net->ipv6.ip6_null_entry)
421842ed 5194 goto out_fib6_null_entry;
d8d1f30b 5195 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5196 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5197 ip6_template_metrics, true);
8ed67789
DL
5198
5199#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5200 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5201 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5202 sizeof(*net->ipv6.ip6_prohibit_entry),
5203 GFP_KERNEL);
68fffc67
PZ
5204 if (!net->ipv6.ip6_prohibit_entry)
5205 goto out_ip6_null_entry;
d8d1f30b 5206 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5207 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5208 ip6_template_metrics, true);
8ed67789
DL
5209
5210 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5211 sizeof(*net->ipv6.ip6_blk_hole_entry),
5212 GFP_KERNEL);
68fffc67
PZ
5213 if (!net->ipv6.ip6_blk_hole_entry)
5214 goto out_ip6_prohibit_entry;
d8d1f30b 5215 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5216 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5217 ip6_template_metrics, true);
8ed67789
DL
5218#endif
5219
b339a47c
PZ
5220 net->ipv6.sysctl.flush_delay = 0;
5221 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5222 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5223 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5224 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5225 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5226 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5227 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5228
6891a346
BT
5229 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5230
8ed67789
DL
5231 ret = 0;
5232out:
5233 return ret;
f2fc6a54 5234
68fffc67
PZ
5235#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5236out_ip6_prohibit_entry:
5237 kfree(net->ipv6.ip6_prohibit_entry);
5238out_ip6_null_entry:
5239 kfree(net->ipv6.ip6_null_entry);
5240#endif
421842ed
DA
5241out_fib6_null_entry:
5242 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5243out_ip6_dst_entries:
5244 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5245out_ip6_dst_ops:
f2fc6a54 5246 goto out;
cdb18761
DL
5247}
5248
2c8c1e72 5249static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5250{
421842ed 5251 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5252 kfree(net->ipv6.ip6_null_entry);
5253#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5254 kfree(net->ipv6.ip6_prohibit_entry);
5255 kfree(net->ipv6.ip6_blk_hole_entry);
5256#endif
41bb78b4 5257 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5258}
5259
d189634e
TG
5260static int __net_init ip6_route_net_init_late(struct net *net)
5261{
5262#ifdef CONFIG_PROC_FS
c3506372
CH
5263 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5264 sizeof(struct ipv6_route_iter));
3617d949
CH
5265 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5266 rt6_stats_seq_show, NULL);
d189634e
TG
5267#endif
5268 return 0;
5269}
5270
5271static void __net_exit ip6_route_net_exit_late(struct net *net)
5272{
5273#ifdef CONFIG_PROC_FS
ece31ffd
G
5274 remove_proc_entry("ipv6_route", net->proc_net);
5275 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5276#endif
5277}
5278
cdb18761
DL
5279static struct pernet_operations ip6_route_net_ops = {
5280 .init = ip6_route_net_init,
5281 .exit = ip6_route_net_exit,
5282};
5283
c3426b47
DM
5284static int __net_init ipv6_inetpeer_init(struct net *net)
5285{
5286 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5287
5288 if (!bp)
5289 return -ENOMEM;
5290 inet_peer_base_init(bp);
5291 net->ipv6.peers = bp;
5292 return 0;
5293}
5294
5295static void __net_exit ipv6_inetpeer_exit(struct net *net)
5296{
5297 struct inet_peer_base *bp = net->ipv6.peers;
5298
5299 net->ipv6.peers = NULL;
56a6b248 5300 inetpeer_invalidate_tree(bp);
c3426b47
DM
5301 kfree(bp);
5302}
5303
2b823f72 5304static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5305 .init = ipv6_inetpeer_init,
5306 .exit = ipv6_inetpeer_exit,
5307};
5308
d189634e
TG
5309static struct pernet_operations ip6_route_net_late_ops = {
5310 .init = ip6_route_net_init_late,
5311 .exit = ip6_route_net_exit_late,
5312};
5313
8ed67789
DL
5314static struct notifier_block ip6_route_dev_notifier = {
5315 .notifier_call = ip6_route_dev_notify,
242d3a49 5316 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5317};
5318
2f460933
WC
5319void __init ip6_route_init_special_entries(void)
5320{
5321 /* Registering of the loopback is done before this portion of code,
5322 * the loopback reference in rt6_info will not be taken, do it
5323 * manually for init_net */
421842ed 5324 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5325 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5326 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5327 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5328 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5329 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5330 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5331 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5332 #endif
5333}
5334
433d49c3 5335int __init ip6_route_init(void)
1da177e4 5336{
433d49c3 5337 int ret;
8d0b94af 5338 int cpu;
433d49c3 5339
9a7ec3a9
DL
5340 ret = -ENOMEM;
5341 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5342 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5343 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5344 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5345 goto out;
14e50e57 5346
fc66f95c 5347 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5348 if (ret)
bdb3289f 5349 goto out_kmem_cache;
bdb3289f 5350
c3426b47
DM
5351 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5352 if (ret)
e8803b6c 5353 goto out_dst_entries;
2a0c451a 5354
7e52b33b
DM
5355 ret = register_pernet_subsys(&ip6_route_net_ops);
5356 if (ret)
5357 goto out_register_inetpeer;
c3426b47 5358
5dc121e9
AE
5359 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5360
e8803b6c 5361 ret = fib6_init();
433d49c3 5362 if (ret)
8ed67789 5363 goto out_register_subsys;
433d49c3 5364
433d49c3
DL
5365 ret = xfrm6_init();
5366 if (ret)
e8803b6c 5367 goto out_fib6_init;
c35b7e72 5368
433d49c3
DL
5369 ret = fib6_rules_init();
5370 if (ret)
5371 goto xfrm6_init;
7e5449c2 5372
d189634e
TG
5373 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5374 if (ret)
5375 goto fib6_rules_init;
5376
16feebcf
FW
5377 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5378 inet6_rtm_newroute, NULL, 0);
5379 if (ret < 0)
5380 goto out_register_late_subsys;
5381
5382 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5383 inet6_rtm_delroute, NULL, 0);
5384 if (ret < 0)
5385 goto out_register_late_subsys;
5386
5387 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5388 inet6_rtm_getroute, NULL,
5389 RTNL_FLAG_DOIT_UNLOCKED);
5390 if (ret < 0)
d189634e 5391 goto out_register_late_subsys;
c127ea2c 5392
8ed67789 5393 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5394 if (ret)
d189634e 5395 goto out_register_late_subsys;
8ed67789 5396
8d0b94af
MKL
5397 for_each_possible_cpu(cpu) {
5398 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5399
5400 INIT_LIST_HEAD(&ul->head);
5401 spin_lock_init(&ul->lock);
5402 }
5403
433d49c3
DL
5404out:
5405 return ret;
5406
d189634e 5407out_register_late_subsys:
16feebcf 5408 rtnl_unregister_all(PF_INET6);
d189634e 5409 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5410fib6_rules_init:
433d49c3
DL
5411 fib6_rules_cleanup();
5412xfrm6_init:
433d49c3 5413 xfrm6_fini();
2a0c451a
TG
5414out_fib6_init:
5415 fib6_gc_cleanup();
8ed67789
DL
5416out_register_subsys:
5417 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5418out_register_inetpeer:
5419 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5420out_dst_entries:
5421 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5422out_kmem_cache:
f2fc6a54 5423 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5424 goto out;
1da177e4
LT
5425}
5426
5427void ip6_route_cleanup(void)
5428{
8ed67789 5429 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5430 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5431 fib6_rules_cleanup();
1da177e4 5432 xfrm6_fini();
1da177e4 5433 fib6_gc_cleanup();
c3426b47 5434 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5435 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5436 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5437 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5438}