]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/ipv6/route.c
Revert "ipv6: fix double refcount of fib6_metrics"
[mirror_ubuntu-jammy-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
eacb9384 66#include <net/ip.h>
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
30d444d3
DA
73static int ip6_rt_type_to_error(u8 fib6_type);
74
75#define CREATE_TRACE_POINTS
76#include <trace/events/fib6.h>
77EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
78#undef CREATE_TRACE_POINTS
79
afc154e9 80enum rt6_nud_state {
7e980569
JB
81 RT6_NUD_FAIL_HARD = -3,
82 RT6_NUD_FAIL_PROBE = -2,
83 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
84 RT6_NUD_SUCCEED = 1
85};
86
1da177e4 87static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 88static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 89static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
90static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91static void ip6_dst_destroy(struct dst_entry *);
92static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
569d3645 94static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
95
96static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 97static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 98static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 99static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 100static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
101static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
102 struct sk_buff *skb, u32 mtu);
103static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
104 struct sk_buff *skb);
8d1c802b
DA
105static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
106static size_t rt6_nlmsg_size(struct fib6_info *rt);
d4ead6b3 107static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 108 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 109 struct in6_addr *dest, struct in6_addr *src,
16a16cd3
DA
110 int iif, int type, u32 portid, u32 seq,
111 unsigned int flags);
8d1c802b 112static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
113 struct in6_addr *daddr,
114 struct in6_addr *saddr);
1da177e4 115
70ceb4f5 116#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 117static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 118 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
119 const struct in6_addr *gwaddr,
120 struct net_device *dev,
95c96174 121 unsigned int pref);
8d1c802b 122static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 123 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
124 const struct in6_addr *gwaddr,
125 struct net_device *dev);
70ceb4f5
YH
126#endif
127
8d0b94af
MKL
128struct uncached_list {
129 spinlock_t lock;
130 struct list_head head;
131};
132
133static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
134
510c321b 135void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
136{
137 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
138
8d0b94af
MKL
139 rt->rt6i_uncached_list = ul;
140
141 spin_lock_bh(&ul->lock);
142 list_add_tail(&rt->rt6i_uncached, &ul->head);
143 spin_unlock_bh(&ul->lock);
144}
145
510c321b 146void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
147{
148 if (!list_empty(&rt->rt6i_uncached)) {
149 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 150 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
151
152 spin_lock_bh(&ul->lock);
153 list_del(&rt->rt6i_uncached);
81eb8447 154 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
155 spin_unlock_bh(&ul->lock);
156 }
157}
158
159static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
160{
161 struct net_device *loopback_dev = net->loopback_dev;
162 int cpu;
163
e332bc67
EB
164 if (dev == loopback_dev)
165 return;
166
8d0b94af
MKL
167 for_each_possible_cpu(cpu) {
168 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
169 struct rt6_info *rt;
170
171 spin_lock_bh(&ul->lock);
172 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
173 struct inet6_dev *rt_idev = rt->rt6i_idev;
174 struct net_device *rt_dev = rt->dst.dev;
175
e332bc67 176 if (rt_idev->dev == dev) {
8d0b94af
MKL
177 rt->rt6i_idev = in6_dev_get(loopback_dev);
178 in6_dev_put(rt_idev);
179 }
180
e332bc67 181 if (rt_dev == dev) {
8d0b94af
MKL
182 rt->dst.dev = loopback_dev;
183 dev_hold(rt->dst.dev);
184 dev_put(rt_dev);
185 }
186 }
187 spin_unlock_bh(&ul->lock);
188 }
189}
190
f8a1b43b 191static inline const void *choose_neigh_daddr(const struct in6_addr *p,
f894cbf8
DM
192 struct sk_buff *skb,
193 const void *daddr)
39232973 194{
a7563f34 195 if (!ipv6_addr_any(p))
39232973 196 return (const void *) p;
f894cbf8
DM
197 else if (skb)
198 return &ipv6_hdr(skb)->daddr;
39232973
DM
199 return daddr;
200}
201
f8a1b43b
DA
202struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw,
203 struct net_device *dev,
204 struct sk_buff *skb,
205 const void *daddr)
d3aaeb38 206{
39232973
DM
207 struct neighbour *n;
208
f8a1b43b
DA
209 daddr = choose_neigh_daddr(gw, skb, daddr);
210 n = __ipv6_neigh_lookup(dev, daddr);
f83c7790
DM
211 if (n)
212 return n;
f8a1b43b
DA
213 return neigh_create(&nd_tbl, daddr, dev);
214}
215
216static struct neighbour *ip6_dst_neigh_lookup(const struct dst_entry *dst,
217 struct sk_buff *skb,
218 const void *daddr)
219{
220 const struct rt6_info *rt = container_of(dst, struct rt6_info, dst);
221
222 return ip6_neigh_lookup(&rt->rt6i_gateway, dst->dev, skb, daddr);
f83c7790
DM
223}
224
63fca65d
JA
225static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
226{
227 struct net_device *dev = dst->dev;
228 struct rt6_info *rt = (struct rt6_info *)dst;
229
f8a1b43b 230 daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr);
63fca65d
JA
231 if (!daddr)
232 return;
233 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
234 return;
235 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
236 return;
237 __ipv6_confirm_neigh(dev, daddr);
238}
239
9a7ec3a9 240static struct dst_ops ip6_dst_ops_template = {
1da177e4 241 .family = AF_INET6,
1da177e4
LT
242 .gc = ip6_dst_gc,
243 .gc_thresh = 1024,
244 .check = ip6_dst_check,
0dbaee3b 245 .default_advmss = ip6_default_advmss,
ebb762f2 246 .mtu = ip6_mtu,
d4ead6b3 247 .cow_metrics = dst_cow_metrics_generic,
1da177e4
LT
248 .destroy = ip6_dst_destroy,
249 .ifdown = ip6_dst_ifdown,
250 .negative_advice = ip6_negative_advice,
251 .link_failure = ip6_link_failure,
252 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 253 .redirect = rt6_do_redirect,
9f8955cc 254 .local_out = __ip6_local_out,
f8a1b43b 255 .neigh_lookup = ip6_dst_neigh_lookup,
63fca65d 256 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
257};
258
ebb762f2 259static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 260{
618f9bc7
SK
261 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
262
263 return mtu ? : dst->dev->mtu;
ec831ea7
RD
264}
265
6700c270
DM
266static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb, u32 mtu)
14e50e57
DM
268{
269}
270
6700c270
DM
271static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
272 struct sk_buff *skb)
b587ee3b
DM
273{
274}
275
14e50e57
DM
276static struct dst_ops ip6_dst_blackhole_ops = {
277 .family = AF_INET6,
14e50e57
DM
278 .destroy = ip6_dst_destroy,
279 .check = ip6_dst_check,
ebb762f2 280 .mtu = ip6_blackhole_mtu,
214f45c9 281 .default_advmss = ip6_default_advmss,
14e50e57 282 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 283 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 284 .cow_metrics = dst_cow_metrics_generic,
f8a1b43b 285 .neigh_lookup = ip6_dst_neigh_lookup,
14e50e57
DM
286};
287
62fa8a84 288static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 289 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
290};
291
8d1c802b 292static const struct fib6_info fib6_null_entry_template = {
93c2fb25
DA
293 .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP),
294 .fib6_protocol = RTPROT_KERNEL,
295 .fib6_metric = ~(u32)0,
296 .fib6_ref = ATOMIC_INIT(1),
421842ed
DA
297 .fib6_type = RTN_UNREACHABLE,
298 .fib6_metrics = (struct dst_metrics *)&dst_default_metrics,
299};
300
fb0af4c7 301static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
302 .dst = {
303 .__refcnt = ATOMIC_INIT(1),
304 .__use = 1,
2c20cbd7 305 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 306 .error = -ENETUNREACH,
d8d1f30b
CG
307 .input = ip6_pkt_discard,
308 .output = ip6_pkt_discard_out,
1da177e4
LT
309 },
310 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
1da177e4
LT
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
325};
326
fb0af4c7 327static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
328 .dst = {
329 .__refcnt = ATOMIC_INIT(1),
330 .__use = 1,
2c20cbd7 331 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 332 .error = -EINVAL,
d8d1f30b 333 .input = dst_discard,
ede2059d 334 .output = dst_discard_out,
101367c2
TG
335 },
336 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
101367c2
TG
337};
338
339#endif
340
ebfa45f0
MKL
341static void rt6_info_init(struct rt6_info *rt)
342{
343 struct dst_entry *dst = &rt->dst;
344
345 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
ebfa45f0
MKL
346 INIT_LIST_HEAD(&rt->rt6i_uncached);
347}
348
1da177e4 349/* allocate dst with ip6_dst_ops */
93531c67
DA
350struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev,
351 int flags)
1da177e4 352{
97bab73f 353 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 354 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 355
81eb8447 356 if (rt) {
ebfa45f0 357 rt6_info_init(rt);
81eb8447
WW
358 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
359 }
8104891b 360
cf911662 361 return rt;
1da177e4 362}
9ab179d8 363EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 364
1da177e4
LT
365static void ip6_dst_destroy(struct dst_entry *dst)
366{
367 struct rt6_info *rt = (struct rt6_info *)dst;
a68886a6 368 struct fib6_info *from;
8d0b94af 369 struct inet6_dev *idev;
1da177e4 370
4b32b5ad 371 dst_destroy_metrics_generic(dst);
8d0b94af
MKL
372 rt6_uncached_list_del(rt);
373
374 idev = rt->rt6i_idev;
38308473 375 if (idev) {
1da177e4
LT
376 rt->rt6i_idev = NULL;
377 in6_dev_put(idev);
1ab1457c 378 }
1716a961 379
a68886a6
DA
380 rcu_read_lock();
381 from = rcu_dereference(rt->from);
382 rcu_assign_pointer(rt->from, NULL);
93531c67 383 fib6_info_release(from);
a68886a6 384 rcu_read_unlock();
b3419363
DM
385}
386
1da177e4
LT
387static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
388 int how)
389{
390 struct rt6_info *rt = (struct rt6_info *)dst;
391 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 392 struct net_device *loopback_dev =
c346dca1 393 dev_net(dev)->loopback_dev;
1da177e4 394
e5645f51
WW
395 if (idev && idev->dev != loopback_dev) {
396 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
397 if (loopback_idev) {
398 rt->rt6i_idev = loopback_idev;
399 in6_dev_put(idev);
97cac082 400 }
1da177e4
LT
401 }
402}
403
5973fb1e
MKL
404static bool __rt6_check_expired(const struct rt6_info *rt)
405{
406 if (rt->rt6i_flags & RTF_EXPIRES)
407 return time_after(jiffies, rt->dst.expires);
408 else
409 return false;
410}
411
a50feda5 412static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 413{
a68886a6
DA
414 struct fib6_info *from;
415
416 from = rcu_dereference(rt->from);
417
1716a961
G
418 if (rt->rt6i_flags & RTF_EXPIRES) {
419 if (time_after(jiffies, rt->dst.expires))
a50feda5 420 return true;
a68886a6 421 } else if (from) {
1e2ea8ad 422 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
a68886a6 423 fib6_check_expired(from);
1716a961 424 }
a50feda5 425 return false;
1da177e4
LT
426}
427
3b290a31
DA
428struct fib6_info *fib6_multipath_select(const struct net *net,
429 struct fib6_info *match,
430 struct flowi6 *fl6, int oif,
431 const struct sk_buff *skb,
432 int strict)
51ebd318 433{
8d1c802b 434 struct fib6_info *sibling, *next_sibling;
51ebd318 435
b673d6cc
JS
436 /* We might have already computed the hash for ICMPv6 errors. In such
437 * case it will always be non-zero. Otherwise now is the time to do it.
438 */
439 if (!fl6->mp_hash)
b4bac172 440 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 441
5e670d84 442 if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
3d709f69
IS
443 return match;
444
93c2fb25
DA
445 list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
446 fib6_siblings) {
5e670d84
DA
447 int nh_upper_bound;
448
449 nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
450 if (fl6->mp_hash > nh_upper_bound)
3d709f69
IS
451 continue;
452 if (rt6_score_route(sibling, oif, strict) < 0)
453 break;
454 match = sibling;
455 break;
456 }
457
51ebd318
ND
458 return match;
459}
460
1da177e4 461/*
66f5d6ce 462 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
463 */
464
8d1c802b
DA
465static inline struct fib6_info *rt6_device_match(struct net *net,
466 struct fib6_info *rt,
b71d1d42 467 const struct in6_addr *saddr,
1da177e4 468 int oif,
d420895e 469 int flags)
1da177e4 470{
8d1c802b 471 struct fib6_info *sprt;
1da177e4 472
5e670d84
DA
473 if (!oif && ipv6_addr_any(saddr) &&
474 !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
8067bb8c 475 return rt;
dd3abc4e 476
8fb11a9a 477 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
5e670d84 478 const struct net_device *dev = sprt->fib6_nh.nh_dev;
dd3abc4e 479
5e670d84 480 if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
481 continue;
482
dd3abc4e 483 if (oif) {
1da177e4
LT
484 if (dev->ifindex == oif)
485 return sprt;
dd3abc4e
YH
486 } else {
487 if (ipv6_chk_addr(net, saddr, dev,
488 flags & RT6_LOOKUP_F_IFACE))
489 return sprt;
1da177e4 490 }
dd3abc4e 491 }
1da177e4 492
eea68cd3
DA
493 if (oif && flags & RT6_LOOKUP_F_IFACE)
494 return net->ipv6.fib6_null_entry;
8067bb8c 495
421842ed 496 return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
1da177e4
LT
497}
498
27097255 499#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
500struct __rt6_probe_work {
501 struct work_struct work;
502 struct in6_addr target;
503 struct net_device *dev;
504};
505
506static void rt6_probe_deferred(struct work_struct *w)
507{
508 struct in6_addr mcaddr;
509 struct __rt6_probe_work *work =
510 container_of(w, struct __rt6_probe_work, work);
511
512 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 513 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 514 dev_put(work->dev);
662f5533 515 kfree(work);
c2f17e82
HFS
516}
517
8d1c802b 518static void rt6_probe(struct fib6_info *rt)
27097255 519{
990edb42 520 struct __rt6_probe_work *work;
5e670d84 521 const struct in6_addr *nh_gw;
f2c31e32 522 struct neighbour *neigh;
5e670d84
DA
523 struct net_device *dev;
524
27097255
YH
525 /*
526 * Okay, this does not seem to be appropriate
527 * for now, however, we need to check if it
528 * is really so; aka Router Reachability Probing.
529 *
530 * Router Reachability Probe MUST be rate-limited
531 * to no more than one per minute.
532 */
93c2fb25 533 if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
7ff74a59 534 return;
5e670d84
DA
535
536 nh_gw = &rt->fib6_nh.nh_gw;
537 dev = rt->fib6_nh.nh_dev;
2152caea 538 rcu_read_lock_bh();
5e670d84 539 neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
2152caea 540 if (neigh) {
dcd1f572
DA
541 struct inet6_dev *idev;
542
8d6c31bf
MKL
543 if (neigh->nud_state & NUD_VALID)
544 goto out;
545
dcd1f572 546 idev = __in6_dev_get(dev);
990edb42 547 work = NULL;
2152caea 548 write_lock(&neigh->lock);
990edb42
MKL
549 if (!(neigh->nud_state & NUD_VALID) &&
550 time_after(jiffies,
dcd1f572 551 neigh->updated + idev->cnf.rtr_probe_interval)) {
990edb42
MKL
552 work = kmalloc(sizeof(*work), GFP_ATOMIC);
553 if (work)
554 __neigh_set_probe_once(neigh);
c2f17e82 555 }
2152caea 556 write_unlock(&neigh->lock);
990edb42
MKL
557 } else {
558 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 559 }
990edb42
MKL
560
561 if (work) {
562 INIT_WORK(&work->work, rt6_probe_deferred);
5e670d84
DA
563 work->target = *nh_gw;
564 dev_hold(dev);
565 work->dev = dev;
990edb42
MKL
566 schedule_work(&work->work);
567 }
568
8d6c31bf 569out:
2152caea 570 rcu_read_unlock_bh();
27097255
YH
571}
572#else
8d1c802b 573static inline void rt6_probe(struct fib6_info *rt)
27097255 574{
27097255
YH
575}
576#endif
577
1da177e4 578/*
554cfb7e 579 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 580 */
8d1c802b 581static inline int rt6_check_dev(struct fib6_info *rt, int oif)
554cfb7e 582{
5e670d84
DA
583 const struct net_device *dev = rt->fib6_nh.nh_dev;
584
161980f4 585 if (!oif || dev->ifindex == oif)
554cfb7e 586 return 2;
161980f4 587 return 0;
554cfb7e 588}
1da177e4 589
8d1c802b 590static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
1da177e4 591{
afc154e9 592 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
5e670d84 593 struct neighbour *neigh;
f2c31e32 594
93c2fb25
DA
595 if (rt->fib6_flags & RTF_NONEXTHOP ||
596 !(rt->fib6_flags & RTF_GATEWAY))
afc154e9 597 return RT6_NUD_SUCCEED;
145a3621
YH
598
599 rcu_read_lock_bh();
5e670d84
DA
600 neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
601 &rt->fib6_nh.nh_gw);
145a3621
YH
602 if (neigh) {
603 read_lock(&neigh->lock);
554cfb7e 604 if (neigh->nud_state & NUD_VALID)
afc154e9 605 ret = RT6_NUD_SUCCEED;
398bcbeb 606#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 607 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 608 ret = RT6_NUD_SUCCEED;
7e980569
JB
609 else
610 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 611#endif
145a3621 612 read_unlock(&neigh->lock);
afc154e9
HFS
613 } else {
614 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 615 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 616 }
145a3621
YH
617 rcu_read_unlock_bh();
618
a5a81f0b 619 return ret;
1da177e4
LT
620}
621
8d1c802b 622static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
1da177e4 623{
a5a81f0b 624 int m;
1ab1457c 625
4d0c5911 626 m = rt6_check_dev(rt, oif);
77d16f45 627 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 628 return RT6_NUD_FAIL_HARD;
ebacaaa0 629#ifdef CONFIG_IPV6_ROUTER_PREF
93c2fb25 630 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
ebacaaa0 631#endif
afc154e9
HFS
632 if (strict & RT6_LOOKUP_F_REACHABLE) {
633 int n = rt6_check_neigh(rt);
634 if (n < 0)
635 return n;
636 }
554cfb7e
YH
637 return m;
638}
639
dcd1f572
DA
640/* called with rc_read_lock held */
641static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
642{
643 const struct net_device *dev = fib6_info_nh_dev(f6i);
644 bool rc = false;
645
646 if (dev) {
647 const struct inet6_dev *idev = __in6_dev_get(dev);
648
649 rc = !!idev->cnf.ignore_routes_with_linkdown;
650 }
651
652 return rc;
653}
654
8d1c802b
DA
655static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
656 int *mpri, struct fib6_info *match,
afc154e9 657 bool *do_rr)
554cfb7e 658{
f11e6659 659 int m;
afc154e9 660 bool match_do_rr = false;
35103d11 661
5e670d84 662 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c
IS
663 goto out;
664
dcd1f572 665 if (fib6_ignore_linkdown(rt) &&
5e670d84 666 rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 667 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 668 goto out;
f11e6659 669
14895687 670 if (fib6_check_expired(rt))
f11e6659
DM
671 goto out;
672
673 m = rt6_score_route(rt, oif, strict);
7e980569 674 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
675 match_do_rr = true;
676 m = 0; /* lowest valid score */
7e980569 677 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 678 goto out;
afc154e9
HFS
679 }
680
681 if (strict & RT6_LOOKUP_F_REACHABLE)
682 rt6_probe(rt);
f11e6659 683
7e980569 684 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 685 if (m > *mpri) {
afc154e9 686 *do_rr = match_do_rr;
f11e6659
DM
687 *mpri = m;
688 match = rt;
f11e6659 689 }
f11e6659
DM
690out:
691 return match;
692}
693
8d1c802b
DA
694static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
695 struct fib6_info *leaf,
696 struct fib6_info *rr_head,
afc154e9
HFS
697 u32 metric, int oif, int strict,
698 bool *do_rr)
f11e6659 699{
8d1c802b 700 struct fib6_info *rt, *match, *cont;
554cfb7e 701 int mpri = -1;
1da177e4 702
f11e6659 703 match = NULL;
9fbdcfaf 704 cont = NULL;
8fb11a9a 705 for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 706 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
707 cont = rt;
708 break;
709 }
710
711 match = find_match(rt, oif, strict, &mpri, match, do_rr);
712 }
713
66f5d6ce 714 for (rt = leaf; rt && rt != rr_head;
8fb11a9a 715 rt = rcu_dereference(rt->fib6_next)) {
93c2fb25 716 if (rt->fib6_metric != metric) {
9fbdcfaf
SK
717 cont = rt;
718 break;
719 }
720
afc154e9 721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
722 }
723
724 if (match || !cont)
725 return match;
726
8fb11a9a 727 for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
afc154e9 728 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 729
f11e6659
DM
730 return match;
731}
1da177e4 732
8d1c802b 733static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
8d1040e8 734 int oif, int strict)
f11e6659 735{
8d1c802b
DA
736 struct fib6_info *leaf = rcu_dereference(fn->leaf);
737 struct fib6_info *match, *rt0;
afc154e9 738 bool do_rr = false;
17ecf590 739 int key_plen;
1da177e4 740
421842ed
DA
741 if (!leaf || leaf == net->ipv6.fib6_null_entry)
742 return net->ipv6.fib6_null_entry;
8d1040e8 743
66f5d6ce 744 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 745 if (!rt0)
66f5d6ce 746 rt0 = leaf;
1da177e4 747
17ecf590
WW
748 /* Double check to make sure fn is not an intermediate node
749 * and fn->leaf does not points to its child's leaf
750 * (This might happen if all routes under fn are deleted from
751 * the tree and fib6_repair_tree() is called on the node.)
752 */
93c2fb25 753 key_plen = rt0->fib6_dst.plen;
17ecf590 754#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
755 if (rt0->fib6_src.plen)
756 key_plen = rt0->fib6_src.plen;
17ecf590
WW
757#endif
758 if (fn->fn_bit != key_plen)
421842ed 759 return net->ipv6.fib6_null_entry;
17ecf590 760
93c2fb25 761 match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
afc154e9 762 &do_rr);
1da177e4 763
afc154e9 764 if (do_rr) {
8fb11a9a 765 struct fib6_info *next = rcu_dereference(rt0->fib6_next);
f11e6659 766
554cfb7e 767 /* no entries matched; do round-robin */
93c2fb25 768 if (!next || next->fib6_metric != rt0->fib6_metric)
8d1040e8 769 next = leaf;
f11e6659 770
66f5d6ce 771 if (next != rt0) {
93c2fb25 772 spin_lock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 773 /* make sure next is not being deleted from the tree */
93c2fb25 774 if (next->fib6_node)
66f5d6ce 775 rcu_assign_pointer(fn->rr_ptr, next);
93c2fb25 776 spin_unlock_bh(&leaf->fib6_table->tb6_lock);
66f5d6ce 777 }
1da177e4 778 }
1da177e4 779
421842ed 780 return match ? match : net->ipv6.fib6_null_entry;
1da177e4
LT
781}
782
8d1c802b 783static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
8b9df265 784{
93c2fb25 785 return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
8b9df265
MKL
786}
787
70ceb4f5
YH
788#ifdef CONFIG_IPV6_ROUTE_INFO
789int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 790 const struct in6_addr *gwaddr)
70ceb4f5 791{
c346dca1 792 struct net *net = dev_net(dev);
70ceb4f5
YH
793 struct route_info *rinfo = (struct route_info *) opt;
794 struct in6_addr prefix_buf, *prefix;
795 unsigned int pref;
4bed72e4 796 unsigned long lifetime;
8d1c802b 797 struct fib6_info *rt;
70ceb4f5
YH
798
799 if (len < sizeof(struct route_info)) {
800 return -EINVAL;
801 }
802
803 /* Sanity check for prefix_len and length */
804 if (rinfo->length > 3) {
805 return -EINVAL;
806 } else if (rinfo->prefix_len > 128) {
807 return -EINVAL;
808 } else if (rinfo->prefix_len > 64) {
809 if (rinfo->length < 2) {
810 return -EINVAL;
811 }
812 } else if (rinfo->prefix_len > 0) {
813 if (rinfo->length < 1) {
814 return -EINVAL;
815 }
816 }
817
818 pref = rinfo->route_pref;
819 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 820 return -EINVAL;
70ceb4f5 821
4bed72e4 822 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
823
824 if (rinfo->length == 3)
825 prefix = (struct in6_addr *)rinfo->prefix;
826 else {
827 /* this function is safe */
828 ipv6_addr_prefix(&prefix_buf,
829 (struct in6_addr *)rinfo->prefix,
830 rinfo->prefix_len);
831 prefix = &prefix_buf;
832 }
833
f104a567 834 if (rinfo->prefix_len == 0)
afb1d4b5 835 rt = rt6_get_dflt_router(net, gwaddr, dev);
f104a567
DJ
836 else
837 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 838 gwaddr, dev);
70ceb4f5
YH
839
840 if (rt && !lifetime) {
afb1d4b5 841 ip6_del_rt(net, rt);
70ceb4f5
YH
842 rt = NULL;
843 }
844
845 if (!rt && lifetime)
830218c1
DA
846 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
847 dev, pref);
70ceb4f5 848 else if (rt)
93c2fb25
DA
849 rt->fib6_flags = RTF_ROUTEINFO |
850 (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
70ceb4f5
YH
851
852 if (rt) {
1716a961 853 if (!addrconf_finite_timeout(lifetime))
14895687 854 fib6_clean_expires(rt);
1716a961 855 else
14895687 856 fib6_set_expires(rt, jiffies + HZ * lifetime);
1716a961 857
93531c67 858 fib6_info_release(rt);
70ceb4f5
YH
859 }
860 return 0;
861}
862#endif
863
ae90d867
DA
864/*
865 * Misc support functions
866 */
867
868/* called with rcu_lock held */
8d1c802b 869static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
ae90d867 870{
5e670d84 871 struct net_device *dev = rt->fib6_nh.nh_dev;
ae90d867 872
93c2fb25 873 if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
ae90d867
DA
874 /* for copies of local routes, dst->dev needs to be the
875 * device if it is a master device, the master device if
876 * device is enslaved, and the loopback as the default
877 */
878 if (netif_is_l3_slave(dev) &&
93c2fb25 879 !rt6_need_strict(&rt->fib6_dst.addr))
ae90d867
DA
880 dev = l3mdev_master_dev_rcu(dev);
881 else if (!netif_is_l3_master(dev))
882 dev = dev_net(dev)->loopback_dev;
883 /* last case is netif_is_l3_master(dev) is true in which
884 * case we want dev returned to be dev
885 */
886 }
887
888 return dev;
889}
890
6edb3c96
DA
891static const int fib6_prop[RTN_MAX + 1] = {
892 [RTN_UNSPEC] = 0,
893 [RTN_UNICAST] = 0,
894 [RTN_LOCAL] = 0,
895 [RTN_BROADCAST] = 0,
896 [RTN_ANYCAST] = 0,
897 [RTN_MULTICAST] = 0,
898 [RTN_BLACKHOLE] = -EINVAL,
899 [RTN_UNREACHABLE] = -EHOSTUNREACH,
900 [RTN_PROHIBIT] = -EACCES,
901 [RTN_THROW] = -EAGAIN,
902 [RTN_NAT] = -EINVAL,
903 [RTN_XRESOLVE] = -EINVAL,
904};
905
906static int ip6_rt_type_to_error(u8 fib6_type)
907{
908 return fib6_prop[fib6_type];
909}
910
8d1c802b 911static unsigned short fib6_info_dst_flags(struct fib6_info *rt)
3b6761d1
DA
912{
913 unsigned short flags = 0;
914
915 if (rt->dst_nocount)
916 flags |= DST_NOCOUNT;
917 if (rt->dst_nopolicy)
918 flags |= DST_NOPOLICY;
919 if (rt->dst_host)
920 flags |= DST_HOST;
921
922 return flags;
923}
924
8d1c802b 925static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96
DA
926{
927 rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
928
929 switch (ort->fib6_type) {
930 case RTN_BLACKHOLE:
931 rt->dst.output = dst_discard_out;
932 rt->dst.input = dst_discard;
933 break;
934 case RTN_PROHIBIT:
935 rt->dst.output = ip6_pkt_prohibit_out;
936 rt->dst.input = ip6_pkt_prohibit;
937 break;
938 case RTN_THROW:
939 case RTN_UNREACHABLE:
940 default:
941 rt->dst.output = ip6_pkt_discard_out;
942 rt->dst.input = ip6_pkt_discard;
943 break;
944 }
945}
946
8d1c802b 947static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
6edb3c96 948{
93c2fb25 949 if (ort->fib6_flags & RTF_REJECT) {
6edb3c96
DA
950 ip6_rt_init_dst_reject(rt, ort);
951 return;
952 }
953
954 rt->dst.error = 0;
955 rt->dst.output = ip6_output;
956
d23c4b63 957 if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
6edb3c96 958 rt->dst.input = ip6_input;
93c2fb25 959 } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
6edb3c96
DA
960 rt->dst.input = ip6_mc_input;
961 } else {
962 rt->dst.input = ip6_forward;
963 }
964
965 if (ort->fib6_nh.nh_lwtstate) {
966 rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
967 lwtunnel_set_redirect(&rt->dst);
968 }
969
970 rt->dst.lastuse = jiffies;
971}
972
e873e4b9 973/* Caller must already hold reference to @from */
8d1c802b 974static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
ae90d867 975{
ae90d867 976 rt->rt6i_flags &= ~RTF_EXPIRES;
a68886a6 977 rcu_assign_pointer(rt->from, from);
d4ead6b3 978 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
86758605
WW
979 if (from->fib6_metrics != &dst_default_metrics) {
980 rt->dst._metrics |= DST_METRICS_REFCOUNTED;
981 refcount_inc(&from->fib6_metrics->refcnt);
982 }
ae90d867
DA
983}
984
e873e4b9 985/* Caller must already hold reference to @ort */
8d1c802b 986static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
ae90d867 987{
dcd1f572
DA
988 struct net_device *dev = fib6_info_nh_dev(ort);
989
6edb3c96
DA
990 ip6_rt_init_dst(rt, ort);
991
93c2fb25 992 rt->rt6i_dst = ort->fib6_dst;
dcd1f572 993 rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
5e670d84 994 rt->rt6i_gateway = ort->fib6_nh.nh_gw;
93c2fb25 995 rt->rt6i_flags = ort->fib6_flags;
ae90d867 996 rt6_set_from(rt, ort);
ae90d867 997#ifdef CONFIG_IPV6_SUBTREES
93c2fb25 998 rt->rt6i_src = ort->fib6_src;
ae90d867 999#endif
93c2fb25 1000 rt->rt6i_prefsrc = ort->fib6_prefsrc;
ae90d867
DA
1001}
1002
a3c00e46
MKL
1003static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
1004 struct in6_addr *saddr)
1005{
66f5d6ce 1006 struct fib6_node *pn, *sn;
a3c00e46
MKL
1007 while (1) {
1008 if (fn->fn_flags & RTN_TL_ROOT)
1009 return NULL;
66f5d6ce
WW
1010 pn = rcu_dereference(fn->parent);
1011 sn = FIB6_SUBTREE(pn);
1012 if (sn && sn != fn)
6454743b 1013 fn = fib6_node_lookup(sn, NULL, saddr);
a3c00e46
MKL
1014 else
1015 fn = pn;
1016 if (fn->fn_flags & RTN_RTINFO)
1017 return fn;
1018 }
1019}
c71099ac 1020
d3843fe5
WW
1021static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
1022 bool null_fallback)
1023{
1024 struct rt6_info *rt = *prt;
1025
1026 if (dst_hold_safe(&rt->dst))
1027 return true;
1028 if (null_fallback) {
1029 rt = net->ipv6.ip6_null_entry;
1030 dst_hold(&rt->dst);
1031 } else {
1032 rt = NULL;
1033 }
1034 *prt = rt;
1035 return false;
1036}
1037
dec9b0e2 1038/* called with rcu_lock held */
8d1c802b 1039static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
dec9b0e2 1040{
3b6761d1 1041 unsigned short flags = fib6_info_dst_flags(rt);
dec9b0e2
DA
1042 struct net_device *dev = rt->fib6_nh.nh_dev;
1043 struct rt6_info *nrt;
1044
e873e4b9
WW
1045 if (!fib6_info_hold_safe(rt))
1046 return NULL;
1047
93531c67 1048 nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
dec9b0e2
DA
1049 if (nrt)
1050 ip6_rt_copy_init(nrt, rt);
e873e4b9
WW
1051 else
1052 fib6_info_release(rt);
dec9b0e2
DA
1053
1054 return nrt;
1055}
1056
8ed67789
DL
1057static struct rt6_info *ip6_pol_route_lookup(struct net *net,
1058 struct fib6_table *table,
b75cc8f9
DA
1059 struct flowi6 *fl6,
1060 const struct sk_buff *skb,
1061 int flags)
1da177e4 1062{
8d1c802b 1063 struct fib6_info *f6i;
1da177e4 1064 struct fib6_node *fn;
23fb93a4 1065 struct rt6_info *rt;
1da177e4 1066
b6cdbc85
DA
1067 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1068 flags &= ~RT6_LOOKUP_F_IFACE;
1069
66f5d6ce 1070 rcu_read_lock();
6454743b 1071 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 1072restart:
23fb93a4
DA
1073 f6i = rcu_dereference(fn->leaf);
1074 if (!f6i) {
1075 f6i = net->ipv6.fib6_null_entry;
66f5d6ce 1076 } else {
23fb93a4 1077 f6i = rt6_device_match(net, f6i, &fl6->saddr,
66f5d6ce 1078 fl6->flowi6_oif, flags);
93c2fb25 1079 if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
3b290a31
DA
1080 f6i = fib6_multipath_select(net, f6i, fl6,
1081 fl6->flowi6_oif, skb,
1082 flags);
66f5d6ce 1083 }
23fb93a4 1084 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1085 fn = fib6_backtrack(fn, &fl6->saddr);
1086 if (fn)
1087 goto restart;
1088 }
2b760fcf 1089
d4bea421 1090 trace_fib6_table_lookup(net, f6i, table, fl6);
d3843fe5 1091
2b760fcf 1092 /* Search through exception table */
23fb93a4
DA
1093 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1094 if (rt) {
dec9b0e2
DA
1095 if (ip6_hold_safe(net, &rt, true))
1096 dst_use_noref(&rt->dst, jiffies);
23fb93a4 1097 } else if (f6i == net->ipv6.fib6_null_entry) {
dec9b0e2
DA
1098 rt = net->ipv6.ip6_null_entry;
1099 dst_hold(&rt->dst);
23fb93a4
DA
1100 } else {
1101 rt = ip6_create_rt_rcu(f6i);
1102 if (!rt) {
1103 rt = net->ipv6.ip6_null_entry;
1104 dst_hold(&rt->dst);
1105 }
dec9b0e2 1106 }
b811580d 1107
66f5d6ce 1108 rcu_read_unlock();
b811580d 1109
c71099ac 1110 return rt;
c71099ac
TG
1111}
1112
67ba4152 1113struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 1114 const struct sk_buff *skb, int flags)
ea6e574e 1115{
b75cc8f9 1116 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
1117}
1118EXPORT_SYMBOL_GPL(ip6_route_lookup);
1119
9acd9f3a 1120struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
1121 const struct in6_addr *saddr, int oif,
1122 const struct sk_buff *skb, int strict)
c71099ac 1123{
4c9483b2
DM
1124 struct flowi6 fl6 = {
1125 .flowi6_oif = oif,
1126 .daddr = *daddr,
c71099ac
TG
1127 };
1128 struct dst_entry *dst;
77d16f45 1129 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 1130
adaa70bb 1131 if (saddr) {
4c9483b2 1132 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
1133 flags |= RT6_LOOKUP_F_HAS_SADDR;
1134 }
1135
b75cc8f9 1136 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
1137 if (dst->error == 0)
1138 return (struct rt6_info *) dst;
1139
1140 dst_release(dst);
1141
1da177e4
LT
1142 return NULL;
1143}
7159039a
YH
1144EXPORT_SYMBOL(rt6_lookup);
1145
c71099ac 1146/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
1147 * It takes new route entry, the addition fails by any reason the
1148 * route is released.
1149 * Caller must hold dst before calling it.
1da177e4
LT
1150 */
1151
8d1c802b 1152static int __ip6_ins_rt(struct fib6_info *rt, struct nl_info *info,
333c4301 1153 struct netlink_ext_ack *extack)
1da177e4
LT
1154{
1155 int err;
c71099ac 1156 struct fib6_table *table;
1da177e4 1157
93c2fb25 1158 table = rt->fib6_table;
66f5d6ce 1159 spin_lock_bh(&table->tb6_lock);
d4ead6b3 1160 err = fib6_add(&table->tb6_root, rt, info, extack);
66f5d6ce 1161 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1162
1163 return err;
1164}
1165
8d1c802b 1166int ip6_ins_rt(struct net *net, struct fib6_info *rt)
40e22e8f 1167{
afb1d4b5 1168 struct nl_info info = { .nl_net = net, };
e715b6d3 1169
d4ead6b3 1170 return __ip6_ins_rt(rt, &info, NULL);
40e22e8f
TG
1171}
1172
8d1c802b 1173static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
8b9df265
MKL
1174 const struct in6_addr *daddr,
1175 const struct in6_addr *saddr)
1da177e4 1176{
4832c30d 1177 struct net_device *dev;
1da177e4
LT
1178 struct rt6_info *rt;
1179
1180 /*
1181 * Clone the route.
1182 */
1183
e873e4b9
WW
1184 if (!fib6_info_hold_safe(ort))
1185 return NULL;
1186
4832c30d 1187 dev = ip6_rt_get_dev_rcu(ort);
93531c67 1188 rt = ip6_dst_alloc(dev_net(dev), dev, 0);
e873e4b9
WW
1189 if (!rt) {
1190 fib6_info_release(ort);
83a09abd 1191 return NULL;
e873e4b9 1192 }
83a09abd
MKL
1193
1194 ip6_rt_copy_init(rt, ort);
1195 rt->rt6i_flags |= RTF_CACHE;
83a09abd
MKL
1196 rt->dst.flags |= DST_HOST;
1197 rt->rt6i_dst.addr = *daddr;
1198 rt->rt6i_dst.plen = 128;
1da177e4 1199
83a09abd 1200 if (!rt6_is_gw_or_nonexthop(ort)) {
93c2fb25
DA
1201 if (ort->fib6_dst.plen != 128 &&
1202 ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
83a09abd 1203 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1204#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1205 if (rt->rt6i_src.plen && saddr) {
1206 rt->rt6i_src.addr = *saddr;
1207 rt->rt6i_src.plen = 128;
8b9df265 1208 }
83a09abd 1209#endif
95a9a5ba 1210 }
1da177e4 1211
95a9a5ba
YH
1212 return rt;
1213}
1da177e4 1214
8d1c802b 1215static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
d52d3997 1216{
3b6761d1 1217 unsigned short flags = fib6_info_dst_flags(rt);
4832c30d 1218 struct net_device *dev;
d52d3997
MKL
1219 struct rt6_info *pcpu_rt;
1220
e873e4b9
WW
1221 if (!fib6_info_hold_safe(rt))
1222 return NULL;
1223
4832c30d
DA
1224 rcu_read_lock();
1225 dev = ip6_rt_get_dev_rcu(rt);
93531c67 1226 pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
4832c30d 1227 rcu_read_unlock();
e873e4b9
WW
1228 if (!pcpu_rt) {
1229 fib6_info_release(rt);
d52d3997 1230 return NULL;
e873e4b9 1231 }
d52d3997 1232 ip6_rt_copy_init(pcpu_rt, rt);
d52d3997
MKL
1233 pcpu_rt->rt6i_flags |= RTF_PCPU;
1234 return pcpu_rt;
1235}
1236
66f5d6ce 1237/* It should be called with rcu_read_lock() acquired */
8d1c802b 1238static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
d52d3997 1239{
a73e4195 1240 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1241
1242 p = this_cpu_ptr(rt->rt6i_pcpu);
1243 pcpu_rt = *p;
1244
d4ead6b3
DA
1245 if (pcpu_rt)
1246 ip6_hold_safe(NULL, &pcpu_rt, false);
d3843fe5 1247
a73e4195
MKL
1248 return pcpu_rt;
1249}
1250
afb1d4b5 1251static struct rt6_info *rt6_make_pcpu_route(struct net *net,
8d1c802b 1252 struct fib6_info *rt)
a73e4195
MKL
1253{
1254 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1255
1256 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1257 if (!pcpu_rt) {
9c7370a1
MKL
1258 dst_hold(&net->ipv6.ip6_null_entry->dst);
1259 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1260 }
1261
a94b9367
WW
1262 dst_hold(&pcpu_rt->dst);
1263 p = this_cpu_ptr(rt->rt6i_pcpu);
1264 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1265 BUG_ON(prev);
a94b9367 1266
d52d3997
MKL
1267 return pcpu_rt;
1268}
1269
35732d01
WW
1270/* exception hash table implementation
1271 */
1272static DEFINE_SPINLOCK(rt6_exception_lock);
1273
1274/* Remove rt6_ex from hash table and free the memory
1275 * Caller must hold rt6_exception_lock
1276 */
1277static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1278 struct rt6_exception *rt6_ex)
1279{
b2427e67 1280 struct net *net;
81eb8447 1281
35732d01
WW
1282 if (!bucket || !rt6_ex)
1283 return;
b2427e67
CIK
1284
1285 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01 1286 hlist_del_rcu(&rt6_ex->hlist);
77634cc6 1287 dst_release(&rt6_ex->rt6i->dst);
35732d01
WW
1288 kfree_rcu(rt6_ex, rcu);
1289 WARN_ON_ONCE(!bucket->depth);
1290 bucket->depth--;
81eb8447 1291 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1292}
1293
1294/* Remove oldest rt6_ex in bucket and free the memory
1295 * Caller must hold rt6_exception_lock
1296 */
1297static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1298{
1299 struct rt6_exception *rt6_ex, *oldest = NULL;
1300
1301 if (!bucket)
1302 return;
1303
1304 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1305 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1306 oldest = rt6_ex;
1307 }
1308 rt6_remove_exception(bucket, oldest);
1309}
1310
1311static u32 rt6_exception_hash(const struct in6_addr *dst,
1312 const struct in6_addr *src)
1313{
1314 static u32 seed __read_mostly;
1315 u32 val;
1316
1317 net_get_random_once(&seed, sizeof(seed));
1318 val = jhash(dst, sizeof(*dst), seed);
1319
1320#ifdef CONFIG_IPV6_SUBTREES
1321 if (src)
1322 val = jhash(src, sizeof(*src), val);
1323#endif
1324 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1325}
1326
1327/* Helper function to find the cached rt in the hash table
1328 * and update bucket pointer to point to the bucket for this
1329 * (daddr, saddr) pair
1330 * Caller must hold rt6_exception_lock
1331 */
1332static struct rt6_exception *
1333__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1334 const struct in6_addr *daddr,
1335 const struct in6_addr *saddr)
1336{
1337 struct rt6_exception *rt6_ex;
1338 u32 hval;
1339
1340 if (!(*bucket) || !daddr)
1341 return NULL;
1342
1343 hval = rt6_exception_hash(daddr, saddr);
1344 *bucket += hval;
1345
1346 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1347 struct rt6_info *rt6 = rt6_ex->rt6i;
1348 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1349
1350#ifdef CONFIG_IPV6_SUBTREES
1351 if (matched && saddr)
1352 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1353#endif
1354 if (matched)
1355 return rt6_ex;
1356 }
1357 return NULL;
1358}
1359
1360/* Helper function to find the cached rt in the hash table
1361 * and update bucket pointer to point to the bucket for this
1362 * (daddr, saddr) pair
1363 * Caller must hold rcu_read_lock()
1364 */
1365static struct rt6_exception *
1366__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1367 const struct in6_addr *daddr,
1368 const struct in6_addr *saddr)
1369{
1370 struct rt6_exception *rt6_ex;
1371 u32 hval;
1372
1373 WARN_ON_ONCE(!rcu_read_lock_held());
1374
1375 if (!(*bucket) || !daddr)
1376 return NULL;
1377
1378 hval = rt6_exception_hash(daddr, saddr);
1379 *bucket += hval;
1380
1381 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1382 struct rt6_info *rt6 = rt6_ex->rt6i;
1383 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1384
1385#ifdef CONFIG_IPV6_SUBTREES
1386 if (matched && saddr)
1387 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1388#endif
1389 if (matched)
1390 return rt6_ex;
1391 }
1392 return NULL;
1393}
1394
8d1c802b 1395static unsigned int fib6_mtu(const struct fib6_info *rt)
d4ead6b3
DA
1396{
1397 unsigned int mtu;
1398
dcd1f572
DA
1399 if (rt->fib6_pmtu) {
1400 mtu = rt->fib6_pmtu;
1401 } else {
1402 struct net_device *dev = fib6_info_nh_dev(rt);
1403 struct inet6_dev *idev;
1404
1405 rcu_read_lock();
1406 idev = __in6_dev_get(dev);
1407 mtu = idev->cnf.mtu6;
1408 rcu_read_unlock();
1409 }
1410
d4ead6b3
DA
1411 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1412
1413 return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
1414}
1415
35732d01 1416static int rt6_insert_exception(struct rt6_info *nrt,
8d1c802b 1417 struct fib6_info *ort)
35732d01 1418{
5e670d84 1419 struct net *net = dev_net(nrt->dst.dev);
35732d01
WW
1420 struct rt6_exception_bucket *bucket;
1421 struct in6_addr *src_key = NULL;
1422 struct rt6_exception *rt6_ex;
1423 int err = 0;
1424
35732d01
WW
1425 spin_lock_bh(&rt6_exception_lock);
1426
1427 if (ort->exception_bucket_flushed) {
1428 err = -EINVAL;
1429 goto out;
1430 }
1431
1432 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1433 lockdep_is_held(&rt6_exception_lock));
1434 if (!bucket) {
1435 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1436 GFP_ATOMIC);
1437 if (!bucket) {
1438 err = -ENOMEM;
1439 goto out;
1440 }
1441 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1442 }
1443
1444#ifdef CONFIG_IPV6_SUBTREES
1445 /* rt6i_src.plen != 0 indicates ort is in subtree
1446 * and exception table is indexed by a hash of
1447 * both rt6i_dst and rt6i_src.
1448 * Otherwise, the exception table is indexed by
1449 * a hash of only rt6i_dst.
1450 */
93c2fb25 1451 if (ort->fib6_src.plen)
35732d01
WW
1452 src_key = &nrt->rt6i_src.addr;
1453#endif
60006a48
WW
1454
1455 /* Update rt6i_prefsrc as it could be changed
1456 * in rt6_remove_prefsrc()
1457 */
93c2fb25 1458 nrt->rt6i_prefsrc = ort->fib6_prefsrc;
f5bbe7ee
WW
1459 /* rt6_mtu_change() might lower mtu on ort.
1460 * Only insert this exception route if its mtu
1461 * is less than ort's mtu value.
1462 */
d4ead6b3 1463 if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
f5bbe7ee
WW
1464 err = -EINVAL;
1465 goto out;
1466 }
60006a48 1467
35732d01
WW
1468 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1469 src_key);
1470 if (rt6_ex)
1471 rt6_remove_exception(bucket, rt6_ex);
1472
1473 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1474 if (!rt6_ex) {
1475 err = -ENOMEM;
1476 goto out;
1477 }
1478 rt6_ex->rt6i = nrt;
1479 rt6_ex->stamp = jiffies;
35732d01
WW
1480 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1481 bucket->depth++;
81eb8447 1482 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1483
1484 if (bucket->depth > FIB6_MAX_DEPTH)
1485 rt6_exception_remove_oldest(bucket);
1486
1487out:
1488 spin_unlock_bh(&rt6_exception_lock);
1489
1490 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1491 if (!err) {
93c2fb25 1492 spin_lock_bh(&ort->fib6_table->tb6_lock);
7aef6859 1493 fib6_update_sernum(net, ort);
93c2fb25 1494 spin_unlock_bh(&ort->fib6_table->tb6_lock);
b886d5f2
PA
1495 fib6_force_start_gc(net);
1496 }
35732d01
WW
1497
1498 return err;
1499}
1500
8d1c802b 1501void rt6_flush_exceptions(struct fib6_info *rt)
35732d01
WW
1502{
1503 struct rt6_exception_bucket *bucket;
1504 struct rt6_exception *rt6_ex;
1505 struct hlist_node *tmp;
1506 int i;
1507
1508 spin_lock_bh(&rt6_exception_lock);
1509 /* Prevent rt6_insert_exception() to recreate the bucket list */
1510 rt->exception_bucket_flushed = 1;
1511
1512 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1513 lockdep_is_held(&rt6_exception_lock));
1514 if (!bucket)
1515 goto out;
1516
1517 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1518 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1519 rt6_remove_exception(bucket, rt6_ex);
1520 WARN_ON_ONCE(bucket->depth);
1521 bucket++;
1522 }
1523
1524out:
1525 spin_unlock_bh(&rt6_exception_lock);
1526}
1527
1528/* Find cached rt in the hash table inside passed in rt
1529 * Caller has to hold rcu_read_lock()
1530 */
8d1c802b 1531static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
35732d01
WW
1532 struct in6_addr *daddr,
1533 struct in6_addr *saddr)
1534{
1535 struct rt6_exception_bucket *bucket;
1536 struct in6_addr *src_key = NULL;
1537 struct rt6_exception *rt6_ex;
1538 struct rt6_info *res = NULL;
1539
1540 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1541
1542#ifdef CONFIG_IPV6_SUBTREES
1543 /* rt6i_src.plen != 0 indicates rt is in subtree
1544 * and exception table is indexed by a hash of
1545 * both rt6i_dst and rt6i_src.
1546 * Otherwise, the exception table is indexed by
1547 * a hash of only rt6i_dst.
1548 */
93c2fb25 1549 if (rt->fib6_src.plen)
35732d01
WW
1550 src_key = saddr;
1551#endif
1552 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1553
1554 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1555 res = rt6_ex->rt6i;
1556
1557 return res;
1558}
1559
1560/* Remove the passed in cached rt from the hash table that contains it */
23fb93a4 1561static int rt6_remove_exception_rt(struct rt6_info *rt)
35732d01 1562{
35732d01
WW
1563 struct rt6_exception_bucket *bucket;
1564 struct in6_addr *src_key = NULL;
1565 struct rt6_exception *rt6_ex;
8a14e46f 1566 struct fib6_info *from;
35732d01
WW
1567 int err;
1568
091311de 1569 from = rcu_dereference(rt->from);
35732d01 1570 if (!from ||
442d713b 1571 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1572 return -EINVAL;
1573
1574 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1575 return -ENOENT;
1576
1577 spin_lock_bh(&rt6_exception_lock);
1578 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1579 lockdep_is_held(&rt6_exception_lock));
1580#ifdef CONFIG_IPV6_SUBTREES
1581 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1582 * and exception table is indexed by a hash of
1583 * both rt6i_dst and rt6i_src.
1584 * Otherwise, the exception table is indexed by
1585 * a hash of only rt6i_dst.
1586 */
93c2fb25 1587 if (from->fib6_src.plen)
35732d01
WW
1588 src_key = &rt->rt6i_src.addr;
1589#endif
1590 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1591 &rt->rt6i_dst.addr,
1592 src_key);
1593 if (rt6_ex) {
1594 rt6_remove_exception(bucket, rt6_ex);
1595 err = 0;
1596 } else {
1597 err = -ENOENT;
1598 }
1599
1600 spin_unlock_bh(&rt6_exception_lock);
1601 return err;
1602}
1603
1604/* Find rt6_ex which contains the passed in rt cache and
1605 * refresh its stamp
1606 */
1607static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1608{
35732d01 1609 struct rt6_exception_bucket *bucket;
8d1c802b 1610 struct fib6_info *from = rt->from;
35732d01
WW
1611 struct in6_addr *src_key = NULL;
1612 struct rt6_exception *rt6_ex;
1613
1614 if (!from ||
442d713b 1615 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1616 return;
1617
1618 rcu_read_lock();
1619 bucket = rcu_dereference(from->rt6i_exception_bucket);
1620
1621#ifdef CONFIG_IPV6_SUBTREES
1622 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1623 * and exception table is indexed by a hash of
1624 * both rt6i_dst and rt6i_src.
1625 * Otherwise, the exception table is indexed by
1626 * a hash of only rt6i_dst.
1627 */
93c2fb25 1628 if (from->fib6_src.plen)
35732d01
WW
1629 src_key = &rt->rt6i_src.addr;
1630#endif
1631 rt6_ex = __rt6_find_exception_rcu(&bucket,
1632 &rt->rt6i_dst.addr,
1633 src_key);
1634 if (rt6_ex)
1635 rt6_ex->stamp = jiffies;
1636
1637 rcu_read_unlock();
1638}
1639
8d1c802b 1640static void rt6_exceptions_remove_prefsrc(struct fib6_info *rt)
60006a48
WW
1641{
1642 struct rt6_exception_bucket *bucket;
1643 struct rt6_exception *rt6_ex;
1644 int i;
1645
1646 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1647 lockdep_is_held(&rt6_exception_lock));
1648
1649 if (bucket) {
1650 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1651 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1652 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1653 }
1654 bucket++;
1655 }
1656 }
1657}
1658
e9fa1495
SB
1659static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1660 struct rt6_info *rt, int mtu)
1661{
1662 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1663 * lowest MTU in the path: always allow updating the route PMTU to
1664 * reflect PMTU decreases.
1665 *
1666 * If the new MTU is higher, and the route PMTU is equal to the local
1667 * MTU, this means the old MTU is the lowest in the path, so allow
1668 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1669 * handle this.
1670 */
1671
1672 if (dst_mtu(&rt->dst) >= mtu)
1673 return true;
1674
1675 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1676 return true;
1677
1678 return false;
1679}
1680
1681static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
8d1c802b 1682 struct fib6_info *rt, int mtu)
f5bbe7ee
WW
1683{
1684 struct rt6_exception_bucket *bucket;
1685 struct rt6_exception *rt6_ex;
1686 int i;
1687
1688 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1689 lockdep_is_held(&rt6_exception_lock));
1690
e9fa1495
SB
1691 if (!bucket)
1692 return;
1693
1694 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1695 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1696 struct rt6_info *entry = rt6_ex->rt6i;
1697
1698 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
d4ead6b3 1699 * route), the metrics of its rt->from have already
e9fa1495
SB
1700 * been updated.
1701 */
d4ead6b3 1702 if (dst_metric_raw(&entry->dst, RTAX_MTU) &&
e9fa1495 1703 rt6_mtu_change_route_allowed(idev, entry, mtu))
d4ead6b3 1704 dst_metric_set(&entry->dst, RTAX_MTU, mtu);
f5bbe7ee 1705 }
e9fa1495 1706 bucket++;
f5bbe7ee
WW
1707 }
1708}
1709
b16cb459
WW
1710#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1711
8d1c802b 1712static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
b16cb459
WW
1713 struct in6_addr *gateway)
1714{
1715 struct rt6_exception_bucket *bucket;
1716 struct rt6_exception *rt6_ex;
1717 struct hlist_node *tmp;
1718 int i;
1719
1720 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1721 return;
1722
1723 spin_lock_bh(&rt6_exception_lock);
1724 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1725 lockdep_is_held(&rt6_exception_lock));
1726
1727 if (bucket) {
1728 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1729 hlist_for_each_entry_safe(rt6_ex, tmp,
1730 &bucket->chain, hlist) {
1731 struct rt6_info *entry = rt6_ex->rt6i;
1732
1733 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1734 RTF_CACHE_GATEWAY &&
1735 ipv6_addr_equal(gateway,
1736 &entry->rt6i_gateway)) {
1737 rt6_remove_exception(bucket, rt6_ex);
1738 }
1739 }
1740 bucket++;
1741 }
1742 }
1743
1744 spin_unlock_bh(&rt6_exception_lock);
1745}
1746
c757faa8
WW
1747static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1748 struct rt6_exception *rt6_ex,
1749 struct fib6_gc_args *gc_args,
1750 unsigned long now)
1751{
1752 struct rt6_info *rt = rt6_ex->rt6i;
1753
1859bac0
PA
1754 /* we are pruning and obsoleting aged-out and non gateway exceptions
1755 * even if others have still references to them, so that on next
1756 * dst_check() such references can be dropped.
1757 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1758 * expired, independently from their aging, as per RFC 8201 section 4
1759 */
31afeb42
WW
1760 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1761 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1762 RT6_TRACE("aging clone %p\n", rt);
1763 rt6_remove_exception(bucket, rt6_ex);
1764 return;
1765 }
1766 } else if (time_after(jiffies, rt->dst.expires)) {
1767 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1768 rt6_remove_exception(bucket, rt6_ex);
1769 return;
31afeb42
WW
1770 }
1771
1772 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1773 struct neighbour *neigh;
1774 __u8 neigh_flags = 0;
1775
1bfa26ff
ED
1776 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1777 if (neigh)
c757faa8 1778 neigh_flags = neigh->flags;
1bfa26ff 1779
c757faa8
WW
1780 if (!(neigh_flags & NTF_ROUTER)) {
1781 RT6_TRACE("purging route %p via non-router but gateway\n",
1782 rt);
1783 rt6_remove_exception(bucket, rt6_ex);
1784 return;
1785 }
1786 }
31afeb42 1787
c757faa8
WW
1788 gc_args->more++;
1789}
1790
8d1c802b 1791void rt6_age_exceptions(struct fib6_info *rt,
c757faa8
WW
1792 struct fib6_gc_args *gc_args,
1793 unsigned long now)
1794{
1795 struct rt6_exception_bucket *bucket;
1796 struct rt6_exception *rt6_ex;
1797 struct hlist_node *tmp;
1798 int i;
1799
1800 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1801 return;
1802
1bfa26ff
ED
1803 rcu_read_lock_bh();
1804 spin_lock(&rt6_exception_lock);
c757faa8
WW
1805 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1806 lockdep_is_held(&rt6_exception_lock));
1807
1808 if (bucket) {
1809 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1810 hlist_for_each_entry_safe(rt6_ex, tmp,
1811 &bucket->chain, hlist) {
1812 rt6_age_examine_exception(bucket, rt6_ex,
1813 gc_args, now);
1814 }
1815 bucket++;
1816 }
1817 }
1bfa26ff
ED
1818 spin_unlock(&rt6_exception_lock);
1819 rcu_read_unlock_bh();
c757faa8
WW
1820}
1821
1d053da9
DA
1822/* must be called with rcu lock held */
1823struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
1824 int oif, struct flowi6 *fl6, int strict)
1da177e4 1825{
367efcb9 1826 struct fib6_node *fn, *saved_fn;
8d1c802b 1827 struct fib6_info *f6i;
1da177e4 1828
6454743b 1829 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1830 saved_fn = fn;
1da177e4 1831
ca254490
DA
1832 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1833 oif = 0;
1834
a3c00e46 1835redo_rt6_select:
23fb93a4 1836 f6i = rt6_select(net, fn, oif, strict);
23fb93a4 1837 if (f6i == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
1838 fn = fib6_backtrack(fn, &fl6->saddr);
1839 if (fn)
1840 goto redo_rt6_select;
367efcb9
MKL
1841 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1842 /* also consider unreachable route */
1843 strict &= ~RT6_LOOKUP_F_REACHABLE;
1844 fn = saved_fn;
1845 goto redo_rt6_select;
367efcb9 1846 }
a3c00e46
MKL
1847 }
1848
d4bea421 1849 trace_fib6_table_lookup(net, f6i, table, fl6);
fb9de91e 1850
1d053da9
DA
1851 return f6i;
1852}
1853
1854struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1855 int oif, struct flowi6 *fl6,
1856 const struct sk_buff *skb, int flags)
1857{
1858 struct fib6_info *f6i;
1859 struct rt6_info *rt;
1860 int strict = 0;
1861
1862 strict |= flags & RT6_LOOKUP_F_IFACE;
1863 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1864 if (net->ipv6.devconf_all->forwarding == 0)
1865 strict |= RT6_LOOKUP_F_REACHABLE;
1866
1867 rcu_read_lock();
1868
1869 f6i = fib6_table_lookup(net, table, oif, fl6, strict);
1870 if (f6i->fib6_nsiblings)
1871 f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
1872
23fb93a4 1873 if (f6i == net->ipv6.fib6_null_entry) {
421842ed 1874 rt = net->ipv6.ip6_null_entry;
66f5d6ce 1875 rcu_read_unlock();
d3843fe5 1876 dst_hold(&rt->dst);
d3843fe5 1877 return rt;
23fb93a4
DA
1878 }
1879
1880 /*Search through exception table */
1881 rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
1882 if (rt) {
d4ead6b3 1883 if (ip6_hold_safe(net, &rt, true))
d3843fe5 1884 dst_use_noref(&rt->dst, jiffies);
d4ead6b3 1885
66f5d6ce 1886 rcu_read_unlock();
d52d3997 1887 return rt;
3da59bd9 1888 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
93c2fb25 1889 !(f6i->fib6_flags & RTF_GATEWAY))) {
3da59bd9
MKL
1890 /* Create a RTF_CACHE clone which will not be
1891 * owned by the fib6 tree. It is for the special case where
1892 * the daddr in the skb during the neighbor look-up is different
1893 * from the fl6->daddr used to look-up route here.
1894 */
3da59bd9
MKL
1895 struct rt6_info *uncached_rt;
1896
23fb93a4 1897 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
d52d3997 1898
4d85cd0c 1899 rcu_read_unlock();
c71099ac 1900
1cfb71ee
WW
1901 if (uncached_rt) {
1902 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1903 * No need for another dst_hold()
1904 */
8d0b94af 1905 rt6_uncached_list_add(uncached_rt);
81eb8447 1906 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1907 } else {
3da59bd9 1908 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1909 dst_hold(&uncached_rt->dst);
1910 }
b811580d 1911
3da59bd9 1912 return uncached_rt;
d52d3997
MKL
1913 } else {
1914 /* Get a percpu copy */
1915
1916 struct rt6_info *pcpu_rt;
1917
951f788a 1918 local_bh_disable();
23fb93a4 1919 pcpu_rt = rt6_get_pcpu_route(f6i);
d52d3997 1920
93531c67
DA
1921 if (!pcpu_rt)
1922 pcpu_rt = rt6_make_pcpu_route(net, f6i);
1923
951f788a
ED
1924 local_bh_enable();
1925 rcu_read_unlock();
d4bea421 1926
d52d3997
MKL
1927 return pcpu_rt;
1928 }
1da177e4 1929}
9ff74384 1930EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1931
b75cc8f9
DA
1932static struct rt6_info *ip6_pol_route_input(struct net *net,
1933 struct fib6_table *table,
1934 struct flowi6 *fl6,
1935 const struct sk_buff *skb,
1936 int flags)
4acad72d 1937{
b75cc8f9 1938 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1939}
1940
d409b847
MB
1941struct dst_entry *ip6_route_input_lookup(struct net *net,
1942 struct net_device *dev,
b75cc8f9
DA
1943 struct flowi6 *fl6,
1944 const struct sk_buff *skb,
1945 int flags)
72331bc0
SL
1946{
1947 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1948 flags |= RT6_LOOKUP_F_IFACE;
1949
b75cc8f9 1950 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1951}
d409b847 1952EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1953
23aebdac 1954static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1955 struct flow_keys *keys,
1956 struct flow_keys *flkeys)
23aebdac
JS
1957{
1958 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1959 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1960 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1961 const struct ipv6hdr *inner_iph;
1962 const struct icmp6hdr *icmph;
1963 struct ipv6hdr _inner_iph;
cea67a2d 1964 struct icmp6hdr _icmph;
23aebdac
JS
1965
1966 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1967 goto out;
1968
cea67a2d
ED
1969 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1970 sizeof(_icmph), &_icmph);
1971 if (!icmph)
1972 goto out;
1973
23aebdac
JS
1974 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1975 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1976 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1977 icmph->icmp6_type != ICMPV6_PARAMPROB)
1978 goto out;
1979
1980 inner_iph = skb_header_pointer(skb,
1981 skb_transport_offset(skb) + sizeof(*icmph),
1982 sizeof(_inner_iph), &_inner_iph);
1983 if (!inner_iph)
1984 goto out;
1985
1986 key_iph = inner_iph;
5e5d6fed 1987 _flkeys = NULL;
23aebdac 1988out:
5e5d6fed
RP
1989 if (_flkeys) {
1990 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1991 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1992 keys->tags.flow_label = _flkeys->tags.flow_label;
1993 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1994 } else {
1995 keys->addrs.v6addrs.src = key_iph->saddr;
1996 keys->addrs.v6addrs.dst = key_iph->daddr;
fa1be7e0 1997 keys->tags.flow_label = ip6_flowlabel(key_iph);
5e5d6fed
RP
1998 keys->basic.ip_proto = key_iph->nexthdr;
1999 }
23aebdac
JS
2000}
2001
2002/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
2003u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
2004 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
2005{
2006 struct flow_keys hash_keys;
9a2a537a 2007 u32 mhash;
23aebdac 2008
bbfa047a 2009 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
2010 case 0:
2011 memset(&hash_keys, 0, sizeof(hash_keys));
2012 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2013 if (skb) {
2014 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
2015 } else {
2016 hash_keys.addrs.v6addrs.src = fl6->saddr;
2017 hash_keys.addrs.v6addrs.dst = fl6->daddr;
fa1be7e0 2018 hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
b4bac172
DA
2019 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2020 }
2021 break;
2022 case 1:
2023 if (skb) {
2024 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
2025 struct flow_keys keys;
2026
2027 /* short-circuit if we already have L4 hash present */
2028 if (skb->l4_hash)
2029 return skb_get_hash_raw(skb) >> 1;
2030
2031 memset(&hash_keys, 0, sizeof(hash_keys));
2032
2033 if (!flkeys) {
2034 skb_flow_dissect_flow_keys(skb, &keys, flag);
2035 flkeys = &keys;
2036 }
2037 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2038 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
2039 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
2040 hash_keys.ports.src = flkeys->ports.src;
2041 hash_keys.ports.dst = flkeys->ports.dst;
2042 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
2043 } else {
2044 memset(&hash_keys, 0, sizeof(hash_keys));
2045 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
2046 hash_keys.addrs.v6addrs.src = fl6->saddr;
2047 hash_keys.addrs.v6addrs.dst = fl6->daddr;
2048 hash_keys.ports.src = fl6->fl6_sport;
2049 hash_keys.ports.dst = fl6->fl6_dport;
2050 hash_keys.basic.ip_proto = fl6->flowi6_proto;
2051 }
2052 break;
23aebdac 2053 }
9a2a537a 2054 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 2055
9a2a537a 2056 return mhash >> 1;
23aebdac
JS
2057}
2058
c71099ac
TG
2059void ip6_route_input(struct sk_buff *skb)
2060{
b71d1d42 2061 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 2062 struct net *net = dev_net(skb->dev);
adaa70bb 2063 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 2064 struct ip_tunnel_info *tun_info;
4c9483b2 2065 struct flowi6 fl6 = {
e0d56fdd 2066 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
2067 .daddr = iph->daddr,
2068 .saddr = iph->saddr,
6502ca52 2069 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
2070 .flowi6_mark = skb->mark,
2071 .flowi6_proto = iph->nexthdr,
c71099ac 2072 };
5e5d6fed 2073 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 2074
904af04d 2075 tun_info = skb_tunnel_info(skb);
46fa062a 2076 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 2077 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
2078
2079 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
2080 flkeys = &_flkeys;
2081
23aebdac 2082 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 2083 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 2084 skb_dst_drop(skb);
b75cc8f9
DA
2085 skb_dst_set(skb,
2086 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
2087}
2088
b75cc8f9
DA
2089static struct rt6_info *ip6_pol_route_output(struct net *net,
2090 struct fib6_table *table,
2091 struct flowi6 *fl6,
2092 const struct sk_buff *skb,
2093 int flags)
1da177e4 2094{
b75cc8f9 2095 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
2096}
2097
6f21c96a
PA
2098struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
2099 struct flowi6 *fl6, int flags)
c71099ac 2100{
d46a9d67 2101 bool any_src;
c71099ac 2102
4c1feac5
DA
2103 if (rt6_need_strict(&fl6->daddr)) {
2104 struct dst_entry *dst;
2105
2106 dst = l3mdev_link_scope_lookup(net, fl6);
2107 if (dst)
2108 return dst;
2109 }
ca254490 2110
1fb9489b 2111 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 2112
d46a9d67 2113 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 2114 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 2115 (fl6->flowi6_oif && any_src))
77d16f45 2116 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 2117
d46a9d67 2118 if (!any_src)
adaa70bb 2119 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
2120 else if (sk)
2121 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 2122
b75cc8f9 2123 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 2124}
6f21c96a 2125EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2126
2774c131 2127struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2128{
5c1e6aa3 2129 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2130 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2131 struct dst_entry *new = NULL;
2132
1dbe3252 2133 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2134 DST_OBSOLETE_DEAD, 0);
14e50e57 2135 if (rt) {
0a1f5962 2136 rt6_info_init(rt);
81eb8447 2137 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2138
0a1f5962 2139 new = &rt->dst;
14e50e57 2140 new->__use = 1;
352e512c 2141 new->input = dst_discard;
ede2059d 2142 new->output = dst_discard_out;
14e50e57 2143
0a1f5962 2144 dst_copy_metrics(new, &ort->dst);
14e50e57 2145
1dbe3252 2146 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2147 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2148 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2149
2150 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2151#ifdef CONFIG_IPV6_SUBTREES
2152 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2153#endif
14e50e57
DM
2154 }
2155
69ead7af
DM
2156 dst_release(dst_orig);
2157 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2158}
14e50e57 2159
1da177e4
LT
2160/*
2161 * Destination cache support functions
2162 */
2163
8d1c802b 2164static bool fib6_check(struct fib6_info *f6i, u32 cookie)
4b32b5ad 2165{
93531c67
DA
2166 u32 rt_cookie = 0;
2167
8ae86971 2168 if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie)
93531c67
DA
2169 return false;
2170
2171 if (fib6_check_expired(f6i))
2172 return false;
2173
2174 return true;
4b32b5ad
MKL
2175}
2176
a68886a6
DA
2177static struct dst_entry *rt6_check(struct rt6_info *rt,
2178 struct fib6_info *from,
2179 u32 cookie)
3da59bd9 2180{
36143645 2181 u32 rt_cookie = 0;
c5cff856 2182
a68886a6 2183 if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) ||
93531c67 2184 rt_cookie != cookie)
3da59bd9
MKL
2185 return NULL;
2186
2187 if (rt6_check_expired(rt))
2188 return NULL;
2189
2190 return &rt->dst;
2191}
2192
a68886a6
DA
2193static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt,
2194 struct fib6_info *from,
2195 u32 cookie)
3da59bd9 2196{
5973fb1e
MKL
2197 if (!__rt6_check_expired(rt) &&
2198 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
a68886a6 2199 fib6_check(from, cookie))
3da59bd9
MKL
2200 return &rt->dst;
2201 else
2202 return NULL;
2203}
2204
1da177e4
LT
2205static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2206{
a87b7dc9 2207 struct dst_entry *dst_ret;
a68886a6 2208 struct fib6_info *from;
1da177e4
LT
2209 struct rt6_info *rt;
2210
a87b7dc9
DA
2211 rt = container_of(dst, struct rt6_info, dst);
2212
2213 rcu_read_lock();
1da177e4 2214
6f3118b5
ND
2215 /* All IPV6 dsts are created with ->obsolete set to the value
2216 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2217 * into this function always.
2218 */
e3bc10bd 2219
a68886a6 2220 from = rcu_dereference(rt->from);
4b32b5ad 2221
a68886a6
DA
2222 if (from && (rt->rt6i_flags & RTF_PCPU ||
2223 unlikely(!list_empty(&rt->rt6i_uncached))))
2224 dst_ret = rt6_dst_from_check(rt, from, cookie);
3da59bd9 2225 else
a68886a6 2226 dst_ret = rt6_check(rt, from, cookie);
a87b7dc9
DA
2227
2228 rcu_read_unlock();
2229
2230 return dst_ret;
1da177e4
LT
2231}
2232
2233static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2234{
2235 struct rt6_info *rt = (struct rt6_info *) dst;
2236
2237 if (rt) {
54c1a859 2238 if (rt->rt6i_flags & RTF_CACHE) {
c3c14da0 2239 rcu_read_lock();
54c1a859 2240 if (rt6_check_expired(rt)) {
93531c67 2241 rt6_remove_exception_rt(rt);
54c1a859
YH
2242 dst = NULL;
2243 }
c3c14da0 2244 rcu_read_unlock();
54c1a859 2245 } else {
1da177e4 2246 dst_release(dst);
54c1a859
YH
2247 dst = NULL;
2248 }
1da177e4 2249 }
54c1a859 2250 return dst;
1da177e4
LT
2251}
2252
2253static void ip6_link_failure(struct sk_buff *skb)
2254{
2255 struct rt6_info *rt;
2256
3ffe533c 2257 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2258
adf30907 2259 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2260 if (rt) {
8a14e46f 2261 rcu_read_lock();
1eb4f758 2262 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0 2263 if (dst_hold_safe(&rt->dst))
93531c67 2264 rt6_remove_exception_rt(rt);
c5cff856 2265 } else {
a68886a6 2266 struct fib6_info *from;
c5cff856
WW
2267 struct fib6_node *fn;
2268
a68886a6
DA
2269 from = rcu_dereference(rt->from);
2270 if (from) {
2271 fn = rcu_dereference(from->fib6_node);
2272 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2273 fn->fn_sernum = -1;
2274 }
1eb4f758 2275 }
8a14e46f 2276 rcu_read_unlock();
1da177e4
LT
2277 }
2278}
2279
6a3e030f
DA
2280static void rt6_update_expires(struct rt6_info *rt0, int timeout)
2281{
a68886a6
DA
2282 if (!(rt0->rt6i_flags & RTF_EXPIRES)) {
2283 struct fib6_info *from;
2284
2285 rcu_read_lock();
2286 from = rcu_dereference(rt0->from);
2287 if (from)
2288 rt0->dst.expires = from->expires;
2289 rcu_read_unlock();
2290 }
6a3e030f
DA
2291
2292 dst_set_expires(&rt0->dst, timeout);
2293 rt0->rt6i_flags |= RTF_EXPIRES;
2294}
2295
45e4fd26
MKL
2296static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2297{
2298 struct net *net = dev_net(rt->dst.dev);
2299
d4ead6b3 2300 dst_metric_set(&rt->dst, RTAX_MTU, mtu);
45e4fd26 2301 rt->rt6i_flags |= RTF_MODIFIED;
45e4fd26
MKL
2302 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2303}
2304
0d3f6d29
MKL
2305static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2306{
a68886a6
DA
2307 bool from_set;
2308
2309 rcu_read_lock();
2310 from_set = !!rcu_dereference(rt->from);
2311 rcu_read_unlock();
2312
0d3f6d29 2313 return !(rt->rt6i_flags & RTF_CACHE) &&
a68886a6 2314 (rt->rt6i_flags & RTF_PCPU || from_set);
0d3f6d29
MKL
2315}
2316
45e4fd26
MKL
2317static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2318 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2319{
0dec879f 2320 const struct in6_addr *daddr, *saddr;
67ba4152 2321 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2322
19bda36c
XL
2323 if (dst_metric_locked(dst, RTAX_MTU))
2324 return;
2325
0dec879f
JA
2326 if (iph) {
2327 daddr = &iph->daddr;
2328 saddr = &iph->saddr;
2329 } else if (sk) {
2330 daddr = &sk->sk_v6_daddr;
2331 saddr = &inet6_sk(sk)->saddr;
2332 } else {
2333 daddr = NULL;
2334 saddr = NULL;
2335 }
2336 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2337 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2338 if (mtu >= dst_mtu(dst))
2339 return;
9d289715 2340
0d3f6d29 2341 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2342 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2343 /* update rt6_ex->stamp for cache */
2344 if (rt6->rt6i_flags & RTF_CACHE)
2345 rt6_update_exception_stamp_rt(rt6);
0dec879f 2346 } else if (daddr) {
a68886a6 2347 struct fib6_info *from;
45e4fd26
MKL
2348 struct rt6_info *nrt6;
2349
4d85cd0c 2350 rcu_read_lock();
a68886a6
DA
2351 from = rcu_dereference(rt6->from);
2352 nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
45e4fd26
MKL
2353 if (nrt6) {
2354 rt6_do_update_pmtu(nrt6, mtu);
a68886a6 2355 if (rt6_insert_exception(nrt6, from))
2b760fcf 2356 dst_release_immediate(&nrt6->dst);
45e4fd26 2357 }
a68886a6 2358 rcu_read_unlock();
1da177e4
LT
2359 }
2360}
2361
45e4fd26
MKL
2362static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2363 struct sk_buff *skb, u32 mtu)
2364{
2365 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2366}
2367
42ae66c8 2368void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2369 int oif, u32 mark, kuid_t uid)
81aded24
DM
2370{
2371 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2372 struct dst_entry *dst;
2373 struct flowi6 fl6;
2374
2375 memset(&fl6, 0, sizeof(fl6));
2376 fl6.flowi6_oif = oif;
1b3c61dc 2377 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2378 fl6.daddr = iph->daddr;
2379 fl6.saddr = iph->saddr;
6502ca52 2380 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2381 fl6.flowi6_uid = uid;
81aded24
DM
2382
2383 dst = ip6_route_output(net, NULL, &fl6);
2384 if (!dst->error)
45e4fd26 2385 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2386 dst_release(dst);
2387}
2388EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2389
2390void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2391{
33c162a9
MKL
2392 struct dst_entry *dst;
2393
81aded24 2394 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2395 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2396
2397 dst = __sk_dst_get(sk);
2398 if (!dst || !dst->obsolete ||
2399 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2400 return;
2401
2402 bh_lock_sock(sk);
2403 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2404 ip6_datagram_dst_update(sk, false);
2405 bh_unlock_sock(sk);
81aded24
DM
2406}
2407EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2408
7d6850f7
AK
2409void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2410 const struct flowi6 *fl6)
2411{
2412#ifdef CONFIG_IPV6_SUBTREES
2413 struct ipv6_pinfo *np = inet6_sk(sk);
2414#endif
2415
2416 ip6_dst_store(sk, dst,
2417 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2418 &sk->sk_v6_daddr : NULL,
2419#ifdef CONFIG_IPV6_SUBTREES
2420 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2421 &np->saddr :
2422#endif
2423 NULL);
2424}
2425
b55b76b2
DJ
2426/* Handle redirects */
2427struct ip6rd_flowi {
2428 struct flowi6 fl6;
2429 struct in6_addr gateway;
2430};
2431
2432static struct rt6_info *__ip6_route_redirect(struct net *net,
2433 struct fib6_table *table,
2434 struct flowi6 *fl6,
b75cc8f9 2435 const struct sk_buff *skb,
b55b76b2
DJ
2436 int flags)
2437{
2438 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
23fb93a4 2439 struct rt6_info *ret = NULL, *rt_cache;
8d1c802b 2440 struct fib6_info *rt;
b55b76b2
DJ
2441 struct fib6_node *fn;
2442
2443 /* Get the "current" route for this destination and
67c408cf 2444 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2445 *
2446 * RFC 4861 specifies that redirects should only be
2447 * accepted if they come from the nexthop to the target.
2448 * Due to the way the routes are chosen, this notion
2449 * is a bit fuzzy and one might need to check all possible
2450 * routes.
2451 */
2452
66f5d6ce 2453 rcu_read_lock();
6454743b 2454 fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
b55b76b2 2455restart:
66f5d6ce 2456 for_each_fib6_node_rt_rcu(fn) {
5e670d84 2457 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
8067bb8c 2458 continue;
14895687 2459 if (fib6_check_expired(rt))
b55b76b2 2460 continue;
93c2fb25 2461 if (rt->fib6_flags & RTF_REJECT)
b55b76b2 2462 break;
93c2fb25 2463 if (!(rt->fib6_flags & RTF_GATEWAY))
b55b76b2 2464 continue;
5e670d84 2465 if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
b55b76b2 2466 continue;
2b760fcf
WW
2467 /* rt_cache's gateway might be different from its 'parent'
2468 * in the case of an ip redirect.
2469 * So we keep searching in the exception table if the gateway
2470 * is different.
2471 */
5e670d84 2472 if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
2b760fcf
WW
2473 rt_cache = rt6_find_cached_rt(rt,
2474 &fl6->daddr,
2475 &fl6->saddr);
2476 if (rt_cache &&
2477 ipv6_addr_equal(&rdfl->gateway,
2478 &rt_cache->rt6i_gateway)) {
23fb93a4 2479 ret = rt_cache;
2b760fcf
WW
2480 break;
2481 }
b55b76b2 2482 continue;
2b760fcf 2483 }
b55b76b2
DJ
2484 break;
2485 }
2486
2487 if (!rt)
421842ed 2488 rt = net->ipv6.fib6_null_entry;
93c2fb25 2489 else if (rt->fib6_flags & RTF_REJECT) {
23fb93a4 2490 ret = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2491 goto out;
2492 }
2493
421842ed 2494 if (rt == net->ipv6.fib6_null_entry) {
a3c00e46
MKL
2495 fn = fib6_backtrack(fn, &fl6->saddr);
2496 if (fn)
2497 goto restart;
b55b76b2 2498 }
a3c00e46 2499
b0a1ba59 2500out:
23fb93a4 2501 if (ret)
e873e4b9 2502 ip6_hold_safe(net, &ret, true);
23fb93a4
DA
2503 else
2504 ret = ip6_create_rt_rcu(rt);
b55b76b2 2505
66f5d6ce 2506 rcu_read_unlock();
b55b76b2 2507
b65f164d 2508 trace_fib6_table_lookup(net, rt, table, fl6);
23fb93a4 2509 return ret;
b55b76b2
DJ
2510};
2511
2512static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2513 const struct flowi6 *fl6,
2514 const struct sk_buff *skb,
2515 const struct in6_addr *gateway)
b55b76b2
DJ
2516{
2517 int flags = RT6_LOOKUP_F_HAS_SADDR;
2518 struct ip6rd_flowi rdfl;
2519
2520 rdfl.fl6 = *fl6;
2521 rdfl.gateway = *gateway;
2522
b75cc8f9 2523 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2524 flags, __ip6_route_redirect);
2525}
2526
e2d118a1
LC
2527void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2528 kuid_t uid)
3a5ad2ee
DM
2529{
2530 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2531 struct dst_entry *dst;
2532 struct flowi6 fl6;
2533
2534 memset(&fl6, 0, sizeof(fl6));
e374c618 2535 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2536 fl6.flowi6_oif = oif;
2537 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2538 fl6.daddr = iph->daddr;
2539 fl6.saddr = iph->saddr;
6502ca52 2540 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2541 fl6.flowi6_uid = uid;
3a5ad2ee 2542
b75cc8f9 2543 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2544 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2545 dst_release(dst);
2546}
2547EXPORT_SYMBOL_GPL(ip6_redirect);
2548
c92a59ec
DJ
2549void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2550 u32 mark)
2551{
2552 const struct ipv6hdr *iph = ipv6_hdr(skb);
2553 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2554 struct dst_entry *dst;
2555 struct flowi6 fl6;
2556
2557 memset(&fl6, 0, sizeof(fl6));
e374c618 2558 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2559 fl6.flowi6_oif = oif;
2560 fl6.flowi6_mark = mark;
c92a59ec
DJ
2561 fl6.daddr = msg->dest;
2562 fl6.saddr = iph->daddr;
e2d118a1 2563 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2564
b75cc8f9 2565 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2566 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2567 dst_release(dst);
2568}
2569
3a5ad2ee
DM
2570void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2571{
e2d118a1
LC
2572 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2573 sk->sk_uid);
3a5ad2ee
DM
2574}
2575EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2576
0dbaee3b 2577static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2578{
0dbaee3b
DM
2579 struct net_device *dev = dst->dev;
2580 unsigned int mtu = dst_mtu(dst);
2581 struct net *net = dev_net(dev);
2582
1da177e4
LT
2583 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2584
5578689a
DL
2585 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2586 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2587
2588 /*
1ab1457c
YH
2589 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2590 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2591 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2592 * rely only on pmtu discovery"
2593 */
2594 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2595 mtu = IPV6_MAXPLEN;
2596 return mtu;
2597}
2598
ebb762f2 2599static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2600{
d33e4553 2601 struct inet6_dev *idev;
d4ead6b3 2602 unsigned int mtu;
4b32b5ad
MKL
2603
2604 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2605 if (mtu)
30f78d8e 2606 goto out;
618f9bc7
SK
2607
2608 mtu = IPV6_MIN_MTU;
d33e4553
DM
2609
2610 rcu_read_lock();
2611 idev = __in6_dev_get(dst->dev);
2612 if (idev)
2613 mtu = idev->cnf.mtu6;
2614 rcu_read_unlock();
2615
30f78d8e 2616out:
14972cbd
RP
2617 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2618
2619 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2620}
2621
901731b8
DA
2622/* MTU selection:
2623 * 1. mtu on route is locked - use it
2624 * 2. mtu from nexthop exception
2625 * 3. mtu from egress device
2626 *
2627 * based on ip6_dst_mtu_forward and exception logic of
2628 * rt6_find_cached_rt; called with rcu_read_lock
2629 */
2630u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
2631 struct in6_addr *saddr)
2632{
2633 struct rt6_exception_bucket *bucket;
2634 struct rt6_exception *rt6_ex;
2635 struct in6_addr *src_key;
2636 struct inet6_dev *idev;
2637 u32 mtu = 0;
2638
2639 if (unlikely(fib6_metric_locked(f6i, RTAX_MTU))) {
2640 mtu = f6i->fib6_pmtu;
2641 if (mtu)
2642 goto out;
2643 }
2644
2645 src_key = NULL;
2646#ifdef CONFIG_IPV6_SUBTREES
2647 if (f6i->fib6_src.plen)
2648 src_key = saddr;
2649#endif
2650
2651 bucket = rcu_dereference(f6i->rt6i_exception_bucket);
2652 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
2653 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
2654 mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
2655
2656 if (likely(!mtu)) {
2657 struct net_device *dev = fib6_info_nh_dev(f6i);
2658
2659 mtu = IPV6_MIN_MTU;
2660 idev = __in6_dev_get(dev);
2661 if (idev && idev->cnf.mtu6 > mtu)
2662 mtu = idev->cnf.mtu6;
2663 }
2664
2665 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2666out:
2667 return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
2668}
2669
3b00944c 2670struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2671 struct flowi6 *fl6)
1da177e4 2672{
87a11578 2673 struct dst_entry *dst;
1da177e4
LT
2674 struct rt6_info *rt;
2675 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2676 struct net *net = dev_net(dev);
1da177e4 2677
38308473 2678 if (unlikely(!idev))
122bdf67 2679 return ERR_PTR(-ENODEV);
1da177e4 2680
ad706862 2681 rt = ip6_dst_alloc(net, dev, 0);
38308473 2682 if (unlikely(!rt)) {
1da177e4 2683 in6_dev_put(idev);
87a11578 2684 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2685 goto out;
2686 }
2687
8e2ec639 2688 rt->dst.flags |= DST_HOST;
588753f1 2689 rt->dst.input = ip6_input;
8e2ec639 2690 rt->dst.output = ip6_output;
550bab42 2691 rt->rt6i_gateway = fl6->daddr;
87a11578 2692 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2693 rt->rt6i_dst.plen = 128;
2694 rt->rt6i_idev = idev;
14edd87d 2695 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2696
4c981e28 2697 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2698 * do proper release of the net_device
2699 */
2700 rt6_uncached_list_add(rt);
81eb8447 2701 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2702
87a11578
DM
2703 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2704
1da177e4 2705out:
87a11578 2706 return dst;
1da177e4
LT
2707}
2708
569d3645 2709static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2710{
86393e52 2711 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2712 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2713 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2714 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2715 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2716 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2717 int entries;
7019b78e 2718
fc66f95c 2719 entries = dst_entries_get_fast(ops);
49a18d86 2720 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2721 entries <= rt_max_size)
1da177e4
LT
2722 goto out;
2723
6891a346 2724 net->ipv6.ip6_rt_gc_expire++;
14956643 2725 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2726 entries = dst_entries_get_slow(ops);
2727 if (entries < ops->gc_thresh)
7019b78e 2728 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2729out:
7019b78e 2730 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2731 return entries > rt_max_size;
1da177e4
LT
2732}
2733
8d1c802b 2734static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
d4ead6b3 2735 struct fib6_config *cfg)
e715b6d3 2736{
263243d6 2737 struct dst_metrics *p;
e715b6d3 2738
63159f29 2739 if (!cfg->fc_mx)
e715b6d3
FW
2740 return 0;
2741
263243d6
ED
2742 p = kzalloc(sizeof(*rt->fib6_metrics), GFP_KERNEL);
2743 if (unlikely(!p))
e715b6d3
FW
2744 return -ENOMEM;
2745
263243d6
ED
2746 refcount_set(&p->refcnt, 1);
2747 rt->fib6_metrics = p;
e715b6d3 2748
263243d6 2749 return ip_metrics_convert(net, cfg->fc_mx, cfg->fc_mx_len, p->metrics);
e715b6d3 2750}
1da177e4 2751
8c14586f
DA
2752static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2753 struct fib6_config *cfg,
f4797b33
DA
2754 const struct in6_addr *gw_addr,
2755 u32 tbid, int flags)
8c14586f
DA
2756{
2757 struct flowi6 fl6 = {
2758 .flowi6_oif = cfg->fc_ifindex,
2759 .daddr = *gw_addr,
2760 .saddr = cfg->fc_prefsrc,
2761 };
2762 struct fib6_table *table;
2763 struct rt6_info *rt;
8c14586f 2764
f4797b33 2765 table = fib6_get_table(net, tbid);
8c14586f
DA
2766 if (!table)
2767 return NULL;
2768
2769 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2770 flags |= RT6_LOOKUP_F_HAS_SADDR;
2771
f4797b33 2772 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2773 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2774
2775 /* if table lookup failed, fall back to full lookup */
2776 if (rt == net->ipv6.ip6_null_entry) {
2777 ip6_rt_put(rt);
2778 rt = NULL;
2779 }
2780
2781 return rt;
2782}
2783
fc1e64e1
DA
2784static int ip6_route_check_nh_onlink(struct net *net,
2785 struct fib6_config *cfg,
9fbb704c 2786 const struct net_device *dev,
fc1e64e1
DA
2787 struct netlink_ext_ack *extack)
2788{
44750f84 2789 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2790 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2791 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2792 struct rt6_info *grt;
2793 int err;
2794
2795 err = 0;
2796 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2797 if (grt) {
58e354c0
DA
2798 if (!grt->dst.error &&
2799 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2800 NL_SET_ERR_MSG(extack,
2801 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2802 err = -EINVAL;
2803 }
2804
2805 ip6_rt_put(grt);
2806 }
2807
2808 return err;
2809}
2810
1edce99f
DA
2811static int ip6_route_check_nh(struct net *net,
2812 struct fib6_config *cfg,
2813 struct net_device **_dev,
2814 struct inet6_dev **idev)
2815{
2816 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2817 struct net_device *dev = _dev ? *_dev : NULL;
2818 struct rt6_info *grt = NULL;
2819 int err = -EHOSTUNREACH;
2820
2821 if (cfg->fc_table) {
f4797b33
DA
2822 int flags = RT6_LOOKUP_F_IFACE;
2823
2824 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2825 cfg->fc_table, flags);
1edce99f
DA
2826 if (grt) {
2827 if (grt->rt6i_flags & RTF_GATEWAY ||
2828 (dev && dev != grt->dst.dev)) {
2829 ip6_rt_put(grt);
2830 grt = NULL;
2831 }
2832 }
2833 }
2834
2835 if (!grt)
b75cc8f9 2836 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2837
2838 if (!grt)
2839 goto out;
2840
2841 if (dev) {
2842 if (dev != grt->dst.dev) {
2843 ip6_rt_put(grt);
2844 goto out;
2845 }
2846 } else {
2847 *_dev = dev = grt->dst.dev;
2848 *idev = grt->rt6i_idev;
2849 dev_hold(dev);
2850 in6_dev_hold(grt->rt6i_idev);
2851 }
2852
2853 if (!(grt->rt6i_flags & RTF_GATEWAY))
2854 err = 0;
2855
2856 ip6_rt_put(grt);
2857
2858out:
2859 return err;
2860}
2861
9fbb704c
DA
2862static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2863 struct net_device **_dev, struct inet6_dev **idev,
2864 struct netlink_ext_ack *extack)
2865{
2866 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2867 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2868 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2869 const struct net_device *dev = *_dev;
232378e8 2870 bool need_addr_check = !dev;
9fbb704c
DA
2871 int err = -EINVAL;
2872
2873 /* if gw_addr is local we will fail to detect this in case
2874 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2875 * will return already-added prefix route via interface that
2876 * prefix route was assigned to, which might be non-loopback.
2877 */
232378e8
DA
2878 if (dev &&
2879 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2880 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2881 goto out;
2882 }
2883
2884 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2885 /* IPv6 strictly inhibits using not link-local
2886 * addresses as nexthop address.
2887 * Otherwise, router will not able to send redirects.
2888 * It is very good, but in some (rare!) circumstances
2889 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2890 * some exceptions. --ANK
2891 * We allow IPv4-mapped nexthops to support RFC4798-type
2892 * addressing
2893 */
2894 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2895 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2896 goto out;
2897 }
2898
2899 if (cfg->fc_flags & RTNH_F_ONLINK)
2900 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2901 else
2902 err = ip6_route_check_nh(net, cfg, _dev, idev);
2903
2904 if (err)
2905 goto out;
2906 }
2907
2908 /* reload in case device was changed */
2909 dev = *_dev;
2910
2911 err = -EINVAL;
2912 if (!dev) {
2913 NL_SET_ERR_MSG(extack, "Egress device not specified");
2914 goto out;
2915 } else if (dev->flags & IFF_LOOPBACK) {
2916 NL_SET_ERR_MSG(extack,
2917 "Egress device can not be loopback device for this route");
2918 goto out;
2919 }
232378e8
DA
2920
2921 /* if we did not check gw_addr above, do so now that the
2922 * egress device has been resolved.
2923 */
2924 if (need_addr_check &&
2925 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2926 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2927 goto out;
2928 }
2929
9fbb704c
DA
2930 err = 0;
2931out:
2932 return err;
2933}
2934
8d1c802b 2935static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
acb54e3c 2936 gfp_t gfp_flags,
333c4301 2937 struct netlink_ext_ack *extack)
1da177e4 2938{
5578689a 2939 struct net *net = cfg->fc_nlinfo.nl_net;
8d1c802b 2940 struct fib6_info *rt = NULL;
1da177e4
LT
2941 struct net_device *dev = NULL;
2942 struct inet6_dev *idev = NULL;
c71099ac 2943 struct fib6_table *table;
1da177e4 2944 int addr_type;
8c5b83f0 2945 int err = -EINVAL;
1da177e4 2946
557c44be 2947 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2948 if (cfg->fc_flags & RTF_PCPU) {
2949 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2950 goto out;
d5d531cb 2951 }
557c44be 2952
2ea2352e
WW
2953 /* RTF_CACHE is an internal flag; can not be set by userspace */
2954 if (cfg->fc_flags & RTF_CACHE) {
2955 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2956 goto out;
2957 }
2958
e8478e80
DA
2959 if (cfg->fc_type > RTN_MAX) {
2960 NL_SET_ERR_MSG(extack, "Invalid route type");
2961 goto out;
2962 }
2963
d5d531cb
DA
2964 if (cfg->fc_dst_len > 128) {
2965 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2966 goto out;
2967 }
2968 if (cfg->fc_src_len > 128) {
2969 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2970 goto out;
d5d531cb 2971 }
1da177e4 2972#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2973 if (cfg->fc_src_len) {
2974 NL_SET_ERR_MSG(extack,
2975 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2976 goto out;
d5d531cb 2977 }
1da177e4 2978#endif
86872cb5 2979 if (cfg->fc_ifindex) {
1da177e4 2980 err = -ENODEV;
5578689a 2981 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2982 if (!dev)
2983 goto out;
2984 idev = in6_dev_get(dev);
2985 if (!idev)
2986 goto out;
2987 }
2988
86872cb5
TG
2989 if (cfg->fc_metric == 0)
2990 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2991
fc1e64e1
DA
2992 if (cfg->fc_flags & RTNH_F_ONLINK) {
2993 if (!dev) {
2994 NL_SET_ERR_MSG(extack,
2995 "Nexthop device required for onlink");
2996 err = -ENODEV;
2997 goto out;
2998 }
2999
3000 if (!(dev->flags & IFF_UP)) {
3001 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3002 err = -ENETDOWN;
3003 goto out;
3004 }
3005 }
3006
d71314b4 3007 err = -ENOBUFS;
38308473
DM
3008 if (cfg->fc_nlinfo.nlh &&
3009 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 3010 table = fib6_get_table(net, cfg->fc_table);
38308473 3011 if (!table) {
f3213831 3012 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
3013 table = fib6_new_table(net, cfg->fc_table);
3014 }
3015 } else {
3016 table = fib6_new_table(net, cfg->fc_table);
3017 }
38308473
DM
3018
3019 if (!table)
c71099ac 3020 goto out;
c71099ac 3021
93531c67
DA
3022 err = -ENOMEM;
3023 rt = fib6_info_alloc(gfp_flags);
3024 if (!rt)
1da177e4 3025 goto out;
93531c67
DA
3026
3027 if (cfg->fc_flags & RTF_ADDRCONF)
3028 rt->dst_nocount = true;
1da177e4 3029
d4ead6b3
DA
3030 err = ip6_convert_metrics(net, rt, cfg);
3031 if (err < 0)
1da177e4 3032 goto out;
1da177e4 3033
1716a961 3034 if (cfg->fc_flags & RTF_EXPIRES)
14895687 3035 fib6_set_expires(rt, jiffies +
1716a961
G
3036 clock_t_to_jiffies(cfg->fc_expires));
3037 else
14895687 3038 fib6_clean_expires(rt);
1da177e4 3039
86872cb5
TG
3040 if (cfg->fc_protocol == RTPROT_UNSPEC)
3041 cfg->fc_protocol = RTPROT_BOOT;
93c2fb25 3042 rt->fib6_protocol = cfg->fc_protocol;
86872cb5
TG
3043
3044 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4 3045
19e42e45
RP
3046 if (cfg->fc_encap) {
3047 struct lwtunnel_state *lwtstate;
3048
30357d7d 3049 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 3050 cfg->fc_encap, AF_INET6, cfg,
9ae28727 3051 &lwtstate, extack);
19e42e45
RP
3052 if (err)
3053 goto out;
5e670d84 3054 rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
19e42e45
RP
3055 }
3056
93c2fb25
DA
3057 ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
3058 rt->fib6_dst.plen = cfg->fc_dst_len;
3059 if (rt->fib6_dst.plen == 128)
3b6761d1 3060 rt->dst_host = true;
e5fd387a 3061
1da177e4 3062#ifdef CONFIG_IPV6_SUBTREES
93c2fb25
DA
3063 ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
3064 rt->fib6_src.plen = cfg->fc_src_len;
1da177e4
LT
3065#endif
3066
93c2fb25 3067 rt->fib6_metric = cfg->fc_metric;
5e670d84 3068 rt->fib6_nh.nh_weight = 1;
1da177e4 3069
e8478e80 3070 rt->fib6_type = cfg->fc_type;
1da177e4
LT
3071
3072 /* We cannot add true routes via loopback here,
3073 they would result in kernel looping; promote them to reject routes
3074 */
86872cb5 3075 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
3076 (dev && (dev->flags & IFF_LOOPBACK) &&
3077 !(addr_type & IPV6_ADDR_LOOPBACK) &&
3078 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 3079 /* hold loopback dev/idev if we haven't done so. */
5578689a 3080 if (dev != net->loopback_dev) {
1da177e4
LT
3081 if (dev) {
3082 dev_put(dev);
3083 in6_dev_put(idev);
3084 }
5578689a 3085 dev = net->loopback_dev;
1da177e4
LT
3086 dev_hold(dev);
3087 idev = in6_dev_get(dev);
3088 if (!idev) {
3089 err = -ENODEV;
3090 goto out;
3091 }
3092 }
93c2fb25 3093 rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
1da177e4
LT
3094 goto install_route;
3095 }
3096
86872cb5 3097 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
3098 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
3099 if (err)
48ed7b26 3100 goto out;
1da177e4 3101
93531c67 3102 rt->fib6_nh.nh_gw = cfg->fc_gateway;
1da177e4
LT
3103 }
3104
3105 err = -ENODEV;
38308473 3106 if (!dev)
1da177e4
LT
3107 goto out;
3108
428604fb
LB
3109 if (idev->cnf.disable_ipv6) {
3110 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
3111 err = -EACCES;
3112 goto out;
3113 }
3114
955ec4cb
DA
3115 if (!(dev->flags & IFF_UP)) {
3116 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
3117 err = -ENETDOWN;
3118 goto out;
3119 }
3120
c3968a85
DW
3121 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
3122 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 3123 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
3124 err = -EINVAL;
3125 goto out;
3126 }
93c2fb25
DA
3127 rt->fib6_prefsrc.addr = cfg->fc_prefsrc;
3128 rt->fib6_prefsrc.plen = 128;
c3968a85 3129 } else
93c2fb25 3130 rt->fib6_prefsrc.plen = 0;
c3968a85 3131
93c2fb25 3132 rt->fib6_flags = cfg->fc_flags;
1da177e4
LT
3133
3134install_route:
93c2fb25 3135 if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
5609b80a 3136 !netif_carrier_ok(dev))
5e670d84
DA
3137 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
3138 rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
93531c67 3139 rt->fib6_nh.nh_dev = dev;
93c2fb25 3140 rt->fib6_table = table;
63152fc0 3141
c346dca1 3142 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 3143
dcd1f572
DA
3144 if (idev)
3145 in6_dev_put(idev);
3146
8c5b83f0 3147 return rt;
6b9ea5a6
RP
3148out:
3149 if (dev)
3150 dev_put(dev);
3151 if (idev)
3152 in6_dev_put(idev);
6b9ea5a6 3153
93531c67 3154 fib6_info_release(rt);
8c5b83f0 3155 return ERR_PTR(err);
6b9ea5a6
RP
3156}
3157
acb54e3c 3158int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
333c4301 3159 struct netlink_ext_ack *extack)
6b9ea5a6 3160{
8d1c802b 3161 struct fib6_info *rt;
6b9ea5a6
RP
3162 int err;
3163
acb54e3c 3164 rt = ip6_route_info_create(cfg, gfp_flags, extack);
d4ead6b3
DA
3165 if (IS_ERR(rt))
3166 return PTR_ERR(rt);
6b9ea5a6 3167
d4ead6b3 3168 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, extack);
93531c67 3169 fib6_info_release(rt);
6b9ea5a6 3170
1da177e4
LT
3171 return err;
3172}
3173
8d1c802b 3174static int __ip6_del_rt(struct fib6_info *rt, struct nl_info *info)
1da177e4 3175{
afb1d4b5 3176 struct net *net = info->nl_net;
c71099ac 3177 struct fib6_table *table;
afb1d4b5 3178 int err;
1da177e4 3179
421842ed 3180 if (rt == net->ipv6.fib6_null_entry) {
6825a26c
G
3181 err = -ENOENT;
3182 goto out;
3183 }
6c813a72 3184
93c2fb25 3185 table = rt->fib6_table;
66f5d6ce 3186 spin_lock_bh(&table->tb6_lock);
86872cb5 3187 err = fib6_del(rt, info);
66f5d6ce 3188 spin_unlock_bh(&table->tb6_lock);
1da177e4 3189
6825a26c 3190out:
93531c67 3191 fib6_info_release(rt);
1da177e4
LT
3192 return err;
3193}
3194
8d1c802b 3195int ip6_del_rt(struct net *net, struct fib6_info *rt)
e0a1ad73 3196{
afb1d4b5
DA
3197 struct nl_info info = { .nl_net = net };
3198
528c4ceb 3199 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3200}
3201
8d1c802b 3202static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
0ae81335
DA
3203{
3204 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3205 struct net *net = info->nl_net;
16a16cd3 3206 struct sk_buff *skb = NULL;
0ae81335 3207 struct fib6_table *table;
e3330039 3208 int err = -ENOENT;
0ae81335 3209
421842ed 3210 if (rt == net->ipv6.fib6_null_entry)
e3330039 3211 goto out_put;
93c2fb25 3212 table = rt->fib6_table;
66f5d6ce 3213 spin_lock_bh(&table->tb6_lock);
0ae81335 3214
93c2fb25 3215 if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
8d1c802b 3216 struct fib6_info *sibling, *next_sibling;
0ae81335 3217
16a16cd3
DA
3218 /* prefer to send a single notification with all hops */
3219 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3220 if (skb) {
3221 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3222
d4ead6b3 3223 if (rt6_fill_node(net, skb, rt, NULL,
16a16cd3
DA
3224 NULL, NULL, 0, RTM_DELROUTE,
3225 info->portid, seq, 0) < 0) {
3226 kfree_skb(skb);
3227 skb = NULL;
3228 } else
3229 info->skip_notify = 1;
3230 }
3231
0ae81335 3232 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25
DA
3233 &rt->fib6_siblings,
3234 fib6_siblings) {
0ae81335
DA
3235 err = fib6_del(sibling, info);
3236 if (err)
e3330039 3237 goto out_unlock;
0ae81335
DA
3238 }
3239 }
3240
3241 err = fib6_del(rt, info);
e3330039 3242out_unlock:
66f5d6ce 3243 spin_unlock_bh(&table->tb6_lock);
e3330039 3244out_put:
93531c67 3245 fib6_info_release(rt);
16a16cd3
DA
3246
3247 if (skb) {
e3330039 3248 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3249 info->nlh, gfp_any());
3250 }
0ae81335
DA
3251 return err;
3252}
3253
23fb93a4
DA
3254static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
3255{
3256 int rc = -ESRCH;
3257
3258 if (cfg->fc_ifindex && rt->dst.dev->ifindex != cfg->fc_ifindex)
3259 goto out;
3260
3261 if (cfg->fc_flags & RTF_GATEWAY &&
3262 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
3263 goto out;
3264 if (dst_hold_safe(&rt->dst))
3265 rc = rt6_remove_exception_rt(rt);
3266out:
3267 return rc;
3268}
3269
333c4301
DA
3270static int ip6_route_del(struct fib6_config *cfg,
3271 struct netlink_ext_ack *extack)
1da177e4 3272{
8d1c802b 3273 struct rt6_info *rt_cache;
c71099ac 3274 struct fib6_table *table;
8d1c802b 3275 struct fib6_info *rt;
1da177e4 3276 struct fib6_node *fn;
1da177e4
LT
3277 int err = -ESRCH;
3278
5578689a 3279 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3280 if (!table) {
3281 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3282 return err;
d5d531cb 3283 }
c71099ac 3284
66f5d6ce 3285 rcu_read_lock();
1da177e4 3286
c71099ac 3287 fn = fib6_locate(&table->tb6_root,
86872cb5 3288 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3289 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3290 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3291
1da177e4 3292 if (fn) {
66f5d6ce 3293 for_each_fib6_node_rt_rcu(fn) {
2b760fcf 3294 if (cfg->fc_flags & RTF_CACHE) {
23fb93a4
DA
3295 int rc;
3296
2b760fcf
WW
3297 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3298 &cfg->fc_src);
23fb93a4
DA
3299 if (rt_cache) {
3300 rc = ip6_del_cached_rt(rt_cache, cfg);
9e575010
ED
3301 if (rc != -ESRCH) {
3302 rcu_read_unlock();
23fb93a4 3303 return rc;
9e575010 3304 }
23fb93a4
DA
3305 }
3306 continue;
2b760fcf 3307 }
86872cb5 3308 if (cfg->fc_ifindex &&
5e670d84
DA
3309 (!rt->fib6_nh.nh_dev ||
3310 rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
1da177e4 3311 continue;
86872cb5 3312 if (cfg->fc_flags & RTF_GATEWAY &&
5e670d84 3313 !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
1da177e4 3314 continue;
93c2fb25 3315 if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
1da177e4 3316 continue;
93c2fb25 3317 if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
c2ed1880 3318 continue;
e873e4b9
WW
3319 if (!fib6_info_hold_safe(rt))
3320 continue;
66f5d6ce 3321 rcu_read_unlock();
1da177e4 3322
0ae81335
DA
3323 /* if gateway was specified only delete the one hop */
3324 if (cfg->fc_flags & RTF_GATEWAY)
3325 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3326
3327 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3328 }
3329 }
66f5d6ce 3330 rcu_read_unlock();
1da177e4
LT
3331
3332 return err;
3333}
3334
6700c270 3335static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3336{
a6279458 3337 struct netevent_redirect netevent;
e8599ff4 3338 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3339 struct ndisc_options ndopts;
3340 struct inet6_dev *in6_dev;
3341 struct neighbour *neigh;
a68886a6 3342 struct fib6_info *from;
71bcdba0 3343 struct rd_msg *msg;
6e157b6a
DM
3344 int optlen, on_link;
3345 u8 *lladdr;
e8599ff4 3346
29a3cad5 3347 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3348 optlen -= sizeof(*msg);
e8599ff4
DM
3349
3350 if (optlen < 0) {
6e157b6a 3351 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3352 return;
3353 }
3354
71bcdba0 3355 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3356
71bcdba0 3357 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3358 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3359 return;
3360 }
3361
6e157b6a 3362 on_link = 0;
71bcdba0 3363 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3364 on_link = 1;
71bcdba0 3365 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3366 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3367 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3368 return;
3369 }
3370
3371 in6_dev = __in6_dev_get(skb->dev);
3372 if (!in6_dev)
3373 return;
3374 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3375 return;
3376
3377 /* RFC2461 8.1:
3378 * The IP source address of the Redirect MUST be the same as the current
3379 * first-hop router for the specified ICMP Destination Address.
3380 */
3381
f997c55c 3382 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3383 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3384 return;
3385 }
6e157b6a
DM
3386
3387 lladdr = NULL;
e8599ff4
DM
3388 if (ndopts.nd_opts_tgt_lladdr) {
3389 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3390 skb->dev);
3391 if (!lladdr) {
3392 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3393 return;
3394 }
3395 }
3396
6e157b6a 3397 rt = (struct rt6_info *) dst;
ec13ad1d 3398 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3399 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3400 return;
6e157b6a 3401 }
e8599ff4 3402
6e157b6a
DM
3403 /* Redirect received -> path was valid.
3404 * Look, redirects are sent only in response to data packets,
3405 * so that this nexthop apparently is reachable. --ANK
3406 */
0dec879f 3407 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3408
71bcdba0 3409 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3410 if (!neigh)
3411 return;
a6279458 3412
1da177e4
LT
3413 /*
3414 * We have finally decided to accept it.
3415 */
3416
f997c55c 3417 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3418 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3419 NEIGH_UPDATE_F_OVERRIDE|
3420 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3421 NEIGH_UPDATE_F_ISROUTER)),
3422 NDISC_REDIRECT, &ndopts);
1da177e4 3423
4d85cd0c 3424 rcu_read_lock();
a68886a6 3425 from = rcu_dereference(rt->from);
e873e4b9
WW
3426 /* This fib6_info_hold() is safe here because we hold reference to rt
3427 * and rt already holds reference to fib6_info.
3428 */
8a14e46f 3429 fib6_info_hold(from);
4d85cd0c 3430 rcu_read_unlock();
8a14e46f
DA
3431
3432 nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
38308473 3433 if (!nrt)
1da177e4
LT
3434 goto out;
3435
3436 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3437 if (on_link)
3438 nrt->rt6i_flags &= ~RTF_GATEWAY;
3439
4e3fd7a0 3440 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3441
2b760fcf
WW
3442 /* No need to remove rt from the exception table if rt is
3443 * a cached route because rt6_insert_exception() will
3444 * takes care of it
3445 */
8a14e46f 3446 if (rt6_insert_exception(nrt, from)) {
2b760fcf
WW
3447 dst_release_immediate(&nrt->dst);
3448 goto out;
3449 }
1da177e4 3450
d8d1f30b
CG
3451 netevent.old = &rt->dst;
3452 netevent.new = &nrt->dst;
71bcdba0 3453 netevent.daddr = &msg->dest;
60592833 3454 netevent.neigh = neigh;
8d71740c
TT
3455 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3456
1da177e4 3457out:
8a14e46f 3458 fib6_info_release(from);
e8599ff4 3459 neigh_release(neigh);
6e157b6a
DM
3460}
3461
70ceb4f5 3462#ifdef CONFIG_IPV6_ROUTE_INFO
8d1c802b 3463static struct fib6_info *rt6_get_route_info(struct net *net,
b71d1d42 3464 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3465 const struct in6_addr *gwaddr,
3466 struct net_device *dev)
70ceb4f5 3467{
830218c1
DA
3468 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3469 int ifindex = dev->ifindex;
70ceb4f5 3470 struct fib6_node *fn;
8d1c802b 3471 struct fib6_info *rt = NULL;
c71099ac
TG
3472 struct fib6_table *table;
3473
830218c1 3474 table = fib6_get_table(net, tb_id);
38308473 3475 if (!table)
c71099ac 3476 return NULL;
70ceb4f5 3477
66f5d6ce 3478 rcu_read_lock();
38fbeeee 3479 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3480 if (!fn)
3481 goto out;
3482
66f5d6ce 3483 for_each_fib6_node_rt_rcu(fn) {
5e670d84 3484 if (rt->fib6_nh.nh_dev->ifindex != ifindex)
70ceb4f5 3485 continue;
93c2fb25 3486 if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
70ceb4f5 3487 continue;
5e670d84 3488 if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
70ceb4f5 3489 continue;
e873e4b9
WW
3490 if (!fib6_info_hold_safe(rt))
3491 continue;
70ceb4f5
YH
3492 break;
3493 }
3494out:
66f5d6ce 3495 rcu_read_unlock();
70ceb4f5
YH
3496 return rt;
3497}
3498
8d1c802b 3499static struct fib6_info *rt6_add_route_info(struct net *net,
b71d1d42 3500 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3501 const struct in6_addr *gwaddr,
3502 struct net_device *dev,
95c96174 3503 unsigned int pref)
70ceb4f5 3504{
86872cb5 3505 struct fib6_config cfg = {
238fc7ea 3506 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3507 .fc_ifindex = dev->ifindex,
86872cb5
TG
3508 .fc_dst_len = prefixlen,
3509 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3510 RTF_UP | RTF_PREF(pref),
b91d5329 3511 .fc_protocol = RTPROT_RA,
e8478e80 3512 .fc_type = RTN_UNICAST,
15e47304 3513 .fc_nlinfo.portid = 0,
efa2cea0
DL
3514 .fc_nlinfo.nlh = NULL,
3515 .fc_nlinfo.nl_net = net,
86872cb5
TG
3516 };
3517
830218c1 3518 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3519 cfg.fc_dst = *prefix;
3520 cfg.fc_gateway = *gwaddr;
70ceb4f5 3521
e317da96
YH
3522 /* We should treat it as a default route if prefix length is 0. */
3523 if (!prefixlen)
86872cb5 3524 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3525
acb54e3c 3526 ip6_route_add(&cfg, GFP_ATOMIC, NULL);
70ceb4f5 3527
830218c1 3528 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3529}
3530#endif
3531
8d1c802b 3532struct fib6_info *rt6_get_dflt_router(struct net *net,
afb1d4b5
DA
3533 const struct in6_addr *addr,
3534 struct net_device *dev)
1ab1457c 3535{
830218c1 3536 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
8d1c802b 3537 struct fib6_info *rt;
c71099ac 3538 struct fib6_table *table;
1da177e4 3539
afb1d4b5 3540 table = fib6_get_table(net, tb_id);
38308473 3541 if (!table)
c71099ac 3542 return NULL;
1da177e4 3543
66f5d6ce
WW
3544 rcu_read_lock();
3545 for_each_fib6_node_rt_rcu(&table->tb6_root) {
5e670d84 3546 if (dev == rt->fib6_nh.nh_dev &&
93c2fb25 3547 ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
5e670d84 3548 ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
1da177e4
LT
3549 break;
3550 }
e873e4b9
WW
3551 if (rt && !fib6_info_hold_safe(rt))
3552 rt = NULL;
66f5d6ce 3553 rcu_read_unlock();
1da177e4
LT
3554 return rt;
3555}
3556
8d1c802b 3557struct fib6_info *rt6_add_dflt_router(struct net *net,
afb1d4b5 3558 const struct in6_addr *gwaddr,
ebacaaa0
YH
3559 struct net_device *dev,
3560 unsigned int pref)
1da177e4 3561{
86872cb5 3562 struct fib6_config cfg = {
ca254490 3563 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3564 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3565 .fc_ifindex = dev->ifindex,
3566 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3567 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3568 .fc_protocol = RTPROT_RA,
e8478e80 3569 .fc_type = RTN_UNICAST,
15e47304 3570 .fc_nlinfo.portid = 0,
5578689a 3571 .fc_nlinfo.nlh = NULL,
afb1d4b5 3572 .fc_nlinfo.nl_net = net,
86872cb5 3573 };
1da177e4 3574
4e3fd7a0 3575 cfg.fc_gateway = *gwaddr;
1da177e4 3576
acb54e3c 3577 if (!ip6_route_add(&cfg, GFP_ATOMIC, NULL)) {
830218c1
DA
3578 struct fib6_table *table;
3579
3580 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3581 if (table)
3582 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3583 }
1da177e4 3584
afb1d4b5 3585 return rt6_get_dflt_router(net, gwaddr, dev);
1da177e4
LT
3586}
3587
afb1d4b5
DA
3588static void __rt6_purge_dflt_routers(struct net *net,
3589 struct fib6_table *table)
1da177e4 3590{
8d1c802b 3591 struct fib6_info *rt;
1da177e4
LT
3592
3593restart:
66f5d6ce
WW
3594 rcu_read_lock();
3595 for_each_fib6_node_rt_rcu(&table->tb6_root) {
dcd1f572
DA
3596 struct net_device *dev = fib6_info_nh_dev(rt);
3597 struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
3598
93c2fb25 3599 if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
e873e4b9
WW
3600 (!idev || idev->cnf.accept_ra != 2) &&
3601 fib6_info_hold_safe(rt)) {
93531c67
DA
3602 rcu_read_unlock();
3603 ip6_del_rt(net, rt);
1da177e4
LT
3604 goto restart;
3605 }
3606 }
66f5d6ce 3607 rcu_read_unlock();
830218c1
DA
3608
3609 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3610}
3611
3612void rt6_purge_dflt_routers(struct net *net)
3613{
3614 struct fib6_table *table;
3615 struct hlist_head *head;
3616 unsigned int h;
3617
3618 rcu_read_lock();
3619
3620 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3621 head = &net->ipv6.fib_table_hash[h];
3622 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3623 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
afb1d4b5 3624 __rt6_purge_dflt_routers(net, table);
830218c1
DA
3625 }
3626 }
3627
3628 rcu_read_unlock();
1da177e4
LT
3629}
3630
5578689a
DL
3631static void rtmsg_to_fib6_config(struct net *net,
3632 struct in6_rtmsg *rtmsg,
86872cb5
TG
3633 struct fib6_config *cfg)
3634{
3635 memset(cfg, 0, sizeof(*cfg));
3636
ca254490
DA
3637 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3638 : RT6_TABLE_MAIN;
86872cb5
TG
3639 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3640 cfg->fc_metric = rtmsg->rtmsg_metric;
3641 cfg->fc_expires = rtmsg->rtmsg_info;
3642 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3643 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3644 cfg->fc_flags = rtmsg->rtmsg_flags;
e8478e80 3645 cfg->fc_type = rtmsg->rtmsg_type;
86872cb5 3646
5578689a 3647 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3648
4e3fd7a0
AD
3649 cfg->fc_dst = rtmsg->rtmsg_dst;
3650 cfg->fc_src = rtmsg->rtmsg_src;
3651 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3652}
3653
5578689a 3654int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3655{
86872cb5 3656 struct fib6_config cfg;
1da177e4
LT
3657 struct in6_rtmsg rtmsg;
3658 int err;
3659
67ba4152 3660 switch (cmd) {
1da177e4
LT
3661 case SIOCADDRT: /* Add a route */
3662 case SIOCDELRT: /* Delete a route */
af31f412 3663 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3664 return -EPERM;
3665 err = copy_from_user(&rtmsg, arg,
3666 sizeof(struct in6_rtmsg));
3667 if (err)
3668 return -EFAULT;
86872cb5 3669
5578689a 3670 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3671
1da177e4
LT
3672 rtnl_lock();
3673 switch (cmd) {
3674 case SIOCADDRT:
acb54e3c 3675 err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
1da177e4
LT
3676 break;
3677 case SIOCDELRT:
333c4301 3678 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3679 break;
3680 default:
3681 err = -EINVAL;
3682 }
3683 rtnl_unlock();
3684
3685 return err;
3ff50b79 3686 }
1da177e4
LT
3687
3688 return -EINVAL;
3689}
3690
3691/*
3692 * Drop the packet on the floor
3693 */
3694
d5fdd6ba 3695static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3696{
612f09e8 3697 int type;
adf30907 3698 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3699 switch (ipstats_mib_noroutes) {
3700 case IPSTATS_MIB_INNOROUTES:
0660e03f 3701 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3702 if (type == IPV6_ADDR_ANY) {
bdb7cc64
SS
3703 IP6_INC_STATS(dev_net(dst->dev),
3704 __in6_dev_get_safely(skb->dev),
3bd653c8 3705 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3706 break;
3707 }
3708 /* FALLTHROUGH */
3709 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3710 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3711 ipstats_mib_noroutes);
612f09e8
YH
3712 break;
3713 }
3ffe533c 3714 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3715 kfree_skb(skb);
3716 return 0;
3717}
3718
9ce8ade0
TG
3719static int ip6_pkt_discard(struct sk_buff *skb)
3720{
612f09e8 3721 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3722}
3723
ede2059d 3724static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3725{
adf30907 3726 skb->dev = skb_dst(skb)->dev;
612f09e8 3727 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3728}
3729
9ce8ade0
TG
3730static int ip6_pkt_prohibit(struct sk_buff *skb)
3731{
612f09e8 3732 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3733}
3734
ede2059d 3735static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3736{
adf30907 3737 skb->dev = skb_dst(skb)->dev;
612f09e8 3738 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3739}
3740
1da177e4
LT
3741/*
3742 * Allocate a dst for local (unicast / anycast) address.
3743 */
3744
360a9887
DA
3745struct fib6_info *addrconf_f6i_alloc(struct net *net,
3746 struct inet6_dev *idev,
3747 const struct in6_addr *addr,
3748 bool anycast, gfp_t gfp_flags)
1da177e4 3749{
ca254490 3750 u32 tb_id;
4832c30d 3751 struct net_device *dev = idev->dev;
360a9887 3752 struct fib6_info *f6i;
5f02ce24 3753
360a9887
DA
3754 f6i = fib6_info_alloc(gfp_flags);
3755 if (!f6i)
1da177e4
LT
3756 return ERR_PTR(-ENOMEM);
3757
360a9887 3758 f6i->dst_nocount = true;
360a9887
DA
3759 f6i->dst_host = true;
3760 f6i->fib6_protocol = RTPROT_KERNEL;
3761 f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
e8478e80 3762 if (anycast) {
360a9887
DA
3763 f6i->fib6_type = RTN_ANYCAST;
3764 f6i->fib6_flags |= RTF_ANYCAST;
e8478e80 3765 } else {
360a9887
DA
3766 f6i->fib6_type = RTN_LOCAL;
3767 f6i->fib6_flags |= RTF_LOCAL;
e8478e80 3768 }
1da177e4 3769
360a9887 3770 f6i->fib6_nh.nh_gw = *addr;
93531c67 3771 dev_hold(dev);
360a9887
DA
3772 f6i->fib6_nh.nh_dev = dev;
3773 f6i->fib6_dst.addr = *addr;
3774 f6i->fib6_dst.plen = 128;
ca254490 3775 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
360a9887 3776 f6i->fib6_table = fib6_get_table(net, tb_id);
1da177e4 3777
360a9887 3778 return f6i;
1da177e4
LT
3779}
3780
c3968a85
DW
3781/* remove deleted ip from prefsrc entries */
3782struct arg_dev_net_ip {
3783 struct net_device *dev;
3784 struct net *net;
3785 struct in6_addr *addr;
3786};
3787
8d1c802b 3788static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
c3968a85
DW
3789{
3790 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3791 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3792 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3793
5e670d84 3794 if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
421842ed 3795 rt != net->ipv6.fib6_null_entry &&
93c2fb25 3796 ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
60006a48 3797 spin_lock_bh(&rt6_exception_lock);
c3968a85 3798 /* remove prefsrc entry */
93c2fb25 3799 rt->fib6_prefsrc.plen = 0;
60006a48
WW
3800 /* need to update cache as well */
3801 rt6_exceptions_remove_prefsrc(rt);
3802 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3803 }
3804 return 0;
3805}
3806
3807void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3808{
3809 struct net *net = dev_net(ifp->idev->dev);
3810 struct arg_dev_net_ip adni = {
3811 .dev = ifp->idev->dev,
3812 .net = net,
3813 .addr = &ifp->addr,
3814 };
0c3584d5 3815 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3816}
3817
be7a010d 3818#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3819
3820/* Remove routers and update dst entries when gateway turn into host. */
8d1c802b 3821static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
be7a010d
DJ
3822{
3823 struct in6_addr *gateway = (struct in6_addr *)arg;
3824
93c2fb25 3825 if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
5e670d84 3826 ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
be7a010d
DJ
3827 return -1;
3828 }
b16cb459
WW
3829
3830 /* Further clean up cached routes in exception table.
3831 * This is needed because cached route may have a different
3832 * gateway than its 'parent' in the case of an ip redirect.
3833 */
3834 rt6_exceptions_clean_tohost(rt, gateway);
3835
be7a010d
DJ
3836 return 0;
3837}
3838
3839void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3840{
3841 fib6_clean_all(net, fib6_clean_tohost, gateway);
3842}
3843
2127d95a
IS
3844struct arg_netdev_event {
3845 const struct net_device *dev;
4c981e28
IS
3846 union {
3847 unsigned int nh_flags;
3848 unsigned long event;
3849 };
2127d95a
IS
3850};
3851
8d1c802b 3852static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
d7dedee1 3853{
8d1c802b 3854 struct fib6_info *iter;
d7dedee1
IS
3855 struct fib6_node *fn;
3856
93c2fb25
DA
3857 fn = rcu_dereference_protected(rt->fib6_node,
3858 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3859 iter = rcu_dereference_protected(fn->leaf,
93c2fb25 3860 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1 3861 while (iter) {
93c2fb25 3862 if (iter->fib6_metric == rt->fib6_metric &&
33bd5ac5 3863 rt6_qualify_for_ecmp(iter))
d7dedee1 3864 return iter;
8fb11a9a 3865 iter = rcu_dereference_protected(iter->fib6_next,
93c2fb25 3866 lockdep_is_held(&rt->fib6_table->tb6_lock));
d7dedee1
IS
3867 }
3868
3869 return NULL;
3870}
3871
8d1c802b 3872static bool rt6_is_dead(const struct fib6_info *rt)
d7dedee1 3873{
5e670d84
DA
3874 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
3875 (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
dcd1f572 3876 fib6_ignore_linkdown(rt)))
d7dedee1
IS
3877 return true;
3878
3879 return false;
3880}
3881
8d1c802b 3882static int rt6_multipath_total_weight(const struct fib6_info *rt)
d7dedee1 3883{
8d1c802b 3884 struct fib6_info *iter;
d7dedee1
IS
3885 int total = 0;
3886
3887 if (!rt6_is_dead(rt))
5e670d84 3888 total += rt->fib6_nh.nh_weight;
d7dedee1 3889
93c2fb25 3890 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
d7dedee1 3891 if (!rt6_is_dead(iter))
5e670d84 3892 total += iter->fib6_nh.nh_weight;
d7dedee1
IS
3893 }
3894
3895 return total;
3896}
3897
8d1c802b 3898static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
d7dedee1
IS
3899{
3900 int upper_bound = -1;
3901
3902 if (!rt6_is_dead(rt)) {
5e670d84 3903 *weight += rt->fib6_nh.nh_weight;
d7dedee1
IS
3904 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3905 total) - 1;
3906 }
5e670d84 3907 atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
d7dedee1
IS
3908}
3909
8d1c802b 3910static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
d7dedee1 3911{
8d1c802b 3912 struct fib6_info *iter;
d7dedee1
IS
3913 int weight = 0;
3914
3915 rt6_upper_bound_set(rt, &weight, total);
3916
93c2fb25 3917 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
d7dedee1
IS
3918 rt6_upper_bound_set(iter, &weight, total);
3919}
3920
8d1c802b 3921void rt6_multipath_rebalance(struct fib6_info *rt)
d7dedee1 3922{
8d1c802b 3923 struct fib6_info *first;
d7dedee1
IS
3924 int total;
3925
3926 /* In case the entire multipath route was marked for flushing,
3927 * then there is no need to rebalance upon the removal of every
3928 * sibling route.
3929 */
93c2fb25 3930 if (!rt->fib6_nsiblings || rt->should_flush)
d7dedee1
IS
3931 return;
3932
3933 /* During lookup routes are evaluated in order, so we need to
3934 * make sure upper bounds are assigned from the first sibling
3935 * onwards.
3936 */
3937 first = rt6_multipath_first_sibling(rt);
3938 if (WARN_ON_ONCE(!first))
3939 return;
3940
3941 total = rt6_multipath_total_weight(first);
3942 rt6_multipath_upper_bound_set(first, total);
3943}
3944
8d1c802b 3945static int fib6_ifup(struct fib6_info *rt, void *p_arg)
2127d95a
IS
3946{
3947 const struct arg_netdev_event *arg = p_arg;
7aef6859 3948 struct net *net = dev_net(arg->dev);
2127d95a 3949
421842ed 3950 if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
5e670d84 3951 rt->fib6_nh.nh_flags &= ~arg->nh_flags;
7aef6859 3952 fib6_update_sernum_upto_root(net, rt);
d7dedee1 3953 rt6_multipath_rebalance(rt);
1de178ed 3954 }
2127d95a
IS
3955
3956 return 0;
3957}
3958
3959void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3960{
3961 struct arg_netdev_event arg = {
3962 .dev = dev,
6802f3ad
IS
3963 {
3964 .nh_flags = nh_flags,
3965 },
2127d95a
IS
3966 };
3967
3968 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3969 arg.nh_flags |= RTNH_F_LINKDOWN;
3970
3971 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3972}
3973
8d1c802b 3974static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
1de178ed
IS
3975 const struct net_device *dev)
3976{
8d1c802b 3977 struct fib6_info *iter;
1de178ed 3978
5e670d84 3979 if (rt->fib6_nh.nh_dev == dev)
1de178ed 3980 return true;
93c2fb25 3981 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84 3982 if (iter->fib6_nh.nh_dev == dev)
1de178ed
IS
3983 return true;
3984
3985 return false;
3986}
3987
8d1c802b 3988static void rt6_multipath_flush(struct fib6_info *rt)
1de178ed 3989{
8d1c802b 3990 struct fib6_info *iter;
1de178ed
IS
3991
3992 rt->should_flush = 1;
93c2fb25 3993 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
1de178ed
IS
3994 iter->should_flush = 1;
3995}
3996
8d1c802b 3997static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
1de178ed
IS
3998 const struct net_device *down_dev)
3999{
8d1c802b 4000 struct fib6_info *iter;
1de178ed
IS
4001 unsigned int dead = 0;
4002
5e670d84
DA
4003 if (rt->fib6_nh.nh_dev == down_dev ||
4004 rt->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed 4005 dead++;
93c2fb25 4006 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4007 if (iter->fib6_nh.nh_dev == down_dev ||
4008 iter->fib6_nh.nh_flags & RTNH_F_DEAD)
1de178ed
IS
4009 dead++;
4010
4011 return dead;
4012}
4013
8d1c802b 4014static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
1de178ed
IS
4015 const struct net_device *dev,
4016 unsigned int nh_flags)
4017{
8d1c802b 4018 struct fib6_info *iter;
1de178ed 4019
5e670d84
DA
4020 if (rt->fib6_nh.nh_dev == dev)
4021 rt->fib6_nh.nh_flags |= nh_flags;
93c2fb25 4022 list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
5e670d84
DA
4023 if (iter->fib6_nh.nh_dev == dev)
4024 iter->fib6_nh.nh_flags |= nh_flags;
1de178ed
IS
4025}
4026
a1a22c12 4027/* called with write lock held for table with rt */
8d1c802b 4028static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
1da177e4 4029{
4c981e28
IS
4030 const struct arg_netdev_event *arg = p_arg;
4031 const struct net_device *dev = arg->dev;
7aef6859 4032 struct net *net = dev_net(dev);
8ed67789 4033
421842ed 4034 if (rt == net->ipv6.fib6_null_entry)
27c6fa73
IS
4035 return 0;
4036
4037 switch (arg->event) {
4038 case NETDEV_UNREGISTER:
5e670d84 4039 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
27c6fa73 4040 case NETDEV_DOWN:
1de178ed 4041 if (rt->should_flush)
27c6fa73 4042 return -1;
93c2fb25 4043 if (!rt->fib6_nsiblings)
5e670d84 4044 return rt->fib6_nh.nh_dev == dev ? -1 : 0;
1de178ed
IS
4045 if (rt6_multipath_uses_dev(rt, dev)) {
4046 unsigned int count;
4047
4048 count = rt6_multipath_dead_count(rt, dev);
93c2fb25 4049 if (rt->fib6_nsiblings + 1 == count) {
1de178ed
IS
4050 rt6_multipath_flush(rt);
4051 return -1;
4052 }
4053 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
4054 RTNH_F_LINKDOWN);
7aef6859 4055 fib6_update_sernum(net, rt);
d7dedee1 4056 rt6_multipath_rebalance(rt);
1de178ed
IS
4057 }
4058 return -2;
27c6fa73 4059 case NETDEV_CHANGE:
5e670d84 4060 if (rt->fib6_nh.nh_dev != dev ||
93c2fb25 4061 rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73 4062 break;
5e670d84 4063 rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 4064 rt6_multipath_rebalance(rt);
27c6fa73 4065 break;
2b241361 4066 }
c159d30c 4067
1da177e4
LT
4068 return 0;
4069}
4070
27c6fa73 4071void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 4072{
4c981e28 4073 struct arg_netdev_event arg = {
8ed67789 4074 .dev = dev,
6802f3ad
IS
4075 {
4076 .event = event,
4077 },
8ed67789
DL
4078 };
4079
4c981e28
IS
4080 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
4081}
4082
4083void rt6_disable_ip(struct net_device *dev, unsigned long event)
4084{
4085 rt6_sync_down_dev(dev, event);
4086 rt6_uncached_list_flush_dev(dev_net(dev), dev);
4087 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
4088}
4089
95c96174 4090struct rt6_mtu_change_arg {
1da177e4 4091 struct net_device *dev;
95c96174 4092 unsigned int mtu;
1da177e4
LT
4093};
4094
8d1c802b 4095static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4096{
4097 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
4098 struct inet6_dev *idev;
4099
4100 /* In IPv6 pmtu discovery is not optional,
4101 so that RTAX_MTU lock cannot disable it.
4102 We still use this lock to block changes
4103 caused by addrconf/ndisc.
4104 */
4105
4106 idev = __in6_dev_get(arg->dev);
38308473 4107 if (!idev)
1da177e4
LT
4108 return 0;
4109
4110 /* For administrative MTU increase, there is no way to discover
4111 IPv6 PMTU increase, so PMTU increase should be updated here.
4112 Since RFC 1981 doesn't include administrative MTU increase
4113 update PMTU increase is a MUST. (i.e. jumbo frame)
4114 */
5e670d84 4115 if (rt->fib6_nh.nh_dev == arg->dev &&
d4ead6b3
DA
4116 !fib6_metric_locked(rt, RTAX_MTU)) {
4117 u32 mtu = rt->fib6_pmtu;
4118
4119 if (mtu >= arg->mtu ||
4120 (mtu < arg->mtu && mtu == idev->cnf.mtu6))
4121 fib6_metric_set(rt, RTAX_MTU, arg->mtu);
4122
f5bbe7ee 4123 spin_lock_bh(&rt6_exception_lock);
e9fa1495 4124 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 4125 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 4126 }
1da177e4
LT
4127 return 0;
4128}
4129
95c96174 4130void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 4131{
c71099ac
TG
4132 struct rt6_mtu_change_arg arg = {
4133 .dev = dev,
4134 .mtu = mtu,
4135 };
1da177e4 4136
0c3584d5 4137 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
4138}
4139
ef7c79ed 4140static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 4141 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 4142 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 4143 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 4144 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
4145 [RTA_PRIORITY] = { .type = NLA_U32 },
4146 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 4147 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 4148 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
4149 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
4150 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 4151 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 4152 [RTA_UID] = { .type = NLA_U32 },
3b45a410 4153 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 4154 [RTA_TABLE] = { .type = NLA_U32 },
eacb9384
RP
4155 [RTA_IP_PROTO] = { .type = NLA_U8 },
4156 [RTA_SPORT] = { .type = NLA_U16 },
4157 [RTA_DPORT] = { .type = NLA_U16 },
86872cb5
TG
4158};
4159
4160static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
4161 struct fib6_config *cfg,
4162 struct netlink_ext_ack *extack)
1da177e4 4163{
86872cb5
TG
4164 struct rtmsg *rtm;
4165 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4166 unsigned int pref;
86872cb5 4167 int err;
1da177e4 4168
fceb6435
JB
4169 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4170 NULL);
86872cb5
TG
4171 if (err < 0)
4172 goto errout;
1da177e4 4173
86872cb5
TG
4174 err = -EINVAL;
4175 rtm = nlmsg_data(nlh);
4176 memset(cfg, 0, sizeof(*cfg));
4177
4178 cfg->fc_table = rtm->rtm_table;
4179 cfg->fc_dst_len = rtm->rtm_dst_len;
4180 cfg->fc_src_len = rtm->rtm_src_len;
4181 cfg->fc_flags = RTF_UP;
4182 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4183 cfg->fc_type = rtm->rtm_type;
86872cb5 4184
ef2c7d7b
ND
4185 if (rtm->rtm_type == RTN_UNREACHABLE ||
4186 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4187 rtm->rtm_type == RTN_PROHIBIT ||
4188 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4189 cfg->fc_flags |= RTF_REJECT;
4190
ab79ad14
4191 if (rtm->rtm_type == RTN_LOCAL)
4192 cfg->fc_flags |= RTF_LOCAL;
4193
1f56a01f
MKL
4194 if (rtm->rtm_flags & RTM_F_CLONED)
4195 cfg->fc_flags |= RTF_CACHE;
4196
fc1e64e1
DA
4197 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4198
15e47304 4199 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4200 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4201 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4202
4203 if (tb[RTA_GATEWAY]) {
67b61f6c 4204 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4205 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4206 }
86872cb5
TG
4207
4208 if (tb[RTA_DST]) {
4209 int plen = (rtm->rtm_dst_len + 7) >> 3;
4210
4211 if (nla_len(tb[RTA_DST]) < plen)
4212 goto errout;
4213
4214 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4215 }
86872cb5
TG
4216
4217 if (tb[RTA_SRC]) {
4218 int plen = (rtm->rtm_src_len + 7) >> 3;
4219
4220 if (nla_len(tb[RTA_SRC]) < plen)
4221 goto errout;
4222
4223 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4224 }
86872cb5 4225
c3968a85 4226 if (tb[RTA_PREFSRC])
67b61f6c 4227 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4228
86872cb5
TG
4229 if (tb[RTA_OIF])
4230 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4231
4232 if (tb[RTA_PRIORITY])
4233 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4234
4235 if (tb[RTA_METRICS]) {
4236 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4237 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4238 }
86872cb5
TG
4239
4240 if (tb[RTA_TABLE])
4241 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4242
51ebd318
ND
4243 if (tb[RTA_MULTIPATH]) {
4244 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4245 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4246
4247 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4248 cfg->fc_mp_len, extack);
9ed59592
DA
4249 if (err < 0)
4250 goto errout;
51ebd318
ND
4251 }
4252
c78ba6d6
LR
4253 if (tb[RTA_PREF]) {
4254 pref = nla_get_u8(tb[RTA_PREF]);
4255 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4256 pref != ICMPV6_ROUTER_PREF_HIGH)
4257 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4258 cfg->fc_flags |= RTF_PREF(pref);
4259 }
4260
19e42e45
RP
4261 if (tb[RTA_ENCAP])
4262 cfg->fc_encap = tb[RTA_ENCAP];
4263
9ed59592 4264 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4265 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4266
c255bd68 4267 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4268 if (err < 0)
4269 goto errout;
4270 }
4271
32bc201e
XL
4272 if (tb[RTA_EXPIRES]) {
4273 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4274
4275 if (addrconf_finite_timeout(timeout)) {
4276 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4277 cfg->fc_flags |= RTF_EXPIRES;
4278 }
4279 }
4280
86872cb5
TG
4281 err = 0;
4282errout:
4283 return err;
1da177e4
LT
4284}
4285
6b9ea5a6 4286struct rt6_nh {
8d1c802b 4287 struct fib6_info *fib6_info;
6b9ea5a6 4288 struct fib6_config r_cfg;
6b9ea5a6
RP
4289 struct list_head next;
4290};
4291
4292static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4293{
4294 struct rt6_nh *nh;
4295
4296 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4297 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4298 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4299 nh->r_cfg.fc_ifindex);
4300 }
4301}
4302
d4ead6b3
DA
4303static int ip6_route_info_append(struct net *net,
4304 struct list_head *rt6_nh_list,
8d1c802b
DA
4305 struct fib6_info *rt,
4306 struct fib6_config *r_cfg)
6b9ea5a6
RP
4307{
4308 struct rt6_nh *nh;
6b9ea5a6
RP
4309 int err = -EEXIST;
4310
4311 list_for_each_entry(nh, rt6_nh_list, next) {
8d1c802b
DA
4312 /* check if fib6_info already exists */
4313 if (rt6_duplicate_nexthop(nh->fib6_info, rt))
6b9ea5a6
RP
4314 return err;
4315 }
4316
4317 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4318 if (!nh)
4319 return -ENOMEM;
8d1c802b 4320 nh->fib6_info = rt;
d4ead6b3 4321 err = ip6_convert_metrics(net, rt, r_cfg);
6b9ea5a6
RP
4322 if (err) {
4323 kfree(nh);
4324 return err;
4325 }
4326 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4327 list_add_tail(&nh->next, rt6_nh_list);
4328
4329 return 0;
4330}
4331
8d1c802b
DA
4332static void ip6_route_mpath_notify(struct fib6_info *rt,
4333 struct fib6_info *rt_last,
3b1137fe
DA
4334 struct nl_info *info,
4335 __u16 nlflags)
4336{
4337 /* if this is an APPEND route, then rt points to the first route
4338 * inserted and rt_last points to last route inserted. Userspace
4339 * wants a consistent dump of the route which starts at the first
4340 * nexthop. Since sibling routes are always added at the end of
4341 * the list, find the first sibling of the last route appended
4342 */
93c2fb25
DA
4343 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
4344 rt = list_first_entry(&rt_last->fib6_siblings,
8d1c802b 4345 struct fib6_info,
93c2fb25 4346 fib6_siblings);
3b1137fe
DA
4347 }
4348
4349 if (rt)
4350 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4351}
4352
333c4301
DA
4353static int ip6_route_multipath_add(struct fib6_config *cfg,
4354 struct netlink_ext_ack *extack)
51ebd318 4355{
8d1c802b 4356 struct fib6_info *rt_notif = NULL, *rt_last = NULL;
3b1137fe 4357 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4358 struct fib6_config r_cfg;
4359 struct rtnexthop *rtnh;
8d1c802b 4360 struct fib6_info *rt;
6b9ea5a6
RP
4361 struct rt6_nh *err_nh;
4362 struct rt6_nh *nh, *nh_safe;
3b1137fe 4363 __u16 nlflags;
51ebd318
ND
4364 int remaining;
4365 int attrlen;
6b9ea5a6
RP
4366 int err = 1;
4367 int nhn = 0;
4368 int replace = (cfg->fc_nlinfo.nlh &&
4369 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4370 LIST_HEAD(rt6_nh_list);
51ebd318 4371
3b1137fe
DA
4372 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4373 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4374 nlflags |= NLM_F_APPEND;
4375
35f1b4e9 4376 remaining = cfg->fc_mp_len;
51ebd318 4377 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4378
6b9ea5a6 4379 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
8d1c802b 4380 * fib6_info structs per nexthop
6b9ea5a6 4381 */
51ebd318
ND
4382 while (rtnh_ok(rtnh, remaining)) {
4383 memcpy(&r_cfg, cfg, sizeof(*cfg));
4384 if (rtnh->rtnh_ifindex)
4385 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4386
4387 attrlen = rtnh_attrlen(rtnh);
4388 if (attrlen > 0) {
4389 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4390
4391 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4392 if (nla) {
67b61f6c 4393 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4394 r_cfg.fc_flags |= RTF_GATEWAY;
4395 }
19e42e45
RP
4396 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4397 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4398 if (nla)
4399 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4400 }
6b9ea5a6 4401
68e2ffde 4402 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
acb54e3c 4403 rt = ip6_route_info_create(&r_cfg, GFP_KERNEL, extack);
8c5b83f0
RP
4404 if (IS_ERR(rt)) {
4405 err = PTR_ERR(rt);
4406 rt = NULL;
6b9ea5a6 4407 goto cleanup;
8c5b83f0 4408 }
b5d2d75e
DA
4409 if (!rt6_qualify_for_ecmp(rt)) {
4410 err = -EINVAL;
4411 NL_SET_ERR_MSG(extack,
4412 "Device only routes can not be added for IPv6 using the multipath API.");
4413 fib6_info_release(rt);
4414 goto cleanup;
4415 }
6b9ea5a6 4416
5e670d84 4417 rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
398958ae 4418
d4ead6b3
DA
4419 err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
4420 rt, &r_cfg);
51ebd318 4421 if (err) {
93531c67 4422 fib6_info_release(rt);
6b9ea5a6
RP
4423 goto cleanup;
4424 }
4425
4426 rtnh = rtnh_next(rtnh, &remaining);
4427 }
4428
3b1137fe
DA
4429 /* for add and replace send one notification with all nexthops.
4430 * Skip the notification in fib6_add_rt2node and send one with
4431 * the full route when done
4432 */
4433 info->skip_notify = 1;
4434
6b9ea5a6
RP
4435 err_nh = NULL;
4436 list_for_each_entry(nh, &rt6_nh_list, next) {
8d1c802b
DA
4437 err = __ip6_ins_rt(nh->fib6_info, info, extack);
4438 fib6_info_release(nh->fib6_info);
93531c67 4439
f7225172
DA
4440 if (!err) {
4441 /* save reference to last route successfully inserted */
4442 rt_last = nh->fib6_info;
4443
4444 /* save reference to first route for notification */
4445 if (!rt_notif)
4446 rt_notif = nh->fib6_info;
4447 }
3b1137fe 4448
8d1c802b
DA
4449 /* nh->fib6_info is used or freed at this point, reset to NULL*/
4450 nh->fib6_info = NULL;
6b9ea5a6
RP
4451 if (err) {
4452 if (replace && nhn)
4453 ip6_print_replace_route_err(&rt6_nh_list);
4454 err_nh = nh;
4455 goto add_errout;
51ebd318 4456 }
6b9ea5a6 4457
1a72418b 4458 /* Because each route is added like a single route we remove
27596472
MK
4459 * these flags after the first nexthop: if there is a collision,
4460 * we have already failed to add the first nexthop:
4461 * fib6_add_rt2node() has rejected it; when replacing, old
4462 * nexthops have been replaced by first new, the rest should
4463 * be added to it.
1a72418b 4464 */
27596472
MK
4465 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4466 NLM_F_REPLACE);
6b9ea5a6
RP
4467 nhn++;
4468 }
4469
3b1137fe
DA
4470 /* success ... tell user about new route */
4471 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4472 goto cleanup;
4473
4474add_errout:
3b1137fe
DA
4475 /* send notification for routes that were added so that
4476 * the delete notifications sent by ip6_route_del are
4477 * coherent
4478 */
4479 if (rt_notif)
4480 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4481
6b9ea5a6
RP
4482 /* Delete routes that were already added */
4483 list_for_each_entry(nh, &rt6_nh_list, next) {
4484 if (err_nh == nh)
4485 break;
333c4301 4486 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4487 }
4488
4489cleanup:
4490 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
8d1c802b
DA
4491 if (nh->fib6_info)
4492 fib6_info_release(nh->fib6_info);
6b9ea5a6
RP
4493 list_del(&nh->next);
4494 kfree(nh);
4495 }
4496
4497 return err;
4498}
4499
333c4301
DA
4500static int ip6_route_multipath_del(struct fib6_config *cfg,
4501 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4502{
4503 struct fib6_config r_cfg;
4504 struct rtnexthop *rtnh;
4505 int remaining;
4506 int attrlen;
4507 int err = 1, last_err = 0;
4508
4509 remaining = cfg->fc_mp_len;
4510 rtnh = (struct rtnexthop *)cfg->fc_mp;
4511
4512 /* Parse a Multipath Entry */
4513 while (rtnh_ok(rtnh, remaining)) {
4514 memcpy(&r_cfg, cfg, sizeof(*cfg));
4515 if (rtnh->rtnh_ifindex)
4516 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4517
4518 attrlen = rtnh_attrlen(rtnh);
4519 if (attrlen > 0) {
4520 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4521
4522 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4523 if (nla) {
4524 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4525 r_cfg.fc_flags |= RTF_GATEWAY;
4526 }
4527 }
333c4301 4528 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4529 if (err)
4530 last_err = err;
4531
51ebd318
ND
4532 rtnh = rtnh_next(rtnh, &remaining);
4533 }
4534
4535 return last_err;
4536}
4537
c21ef3e3
DA
4538static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4539 struct netlink_ext_ack *extack)
1da177e4 4540{
86872cb5
TG
4541 struct fib6_config cfg;
4542 int err;
1da177e4 4543
333c4301 4544 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4545 if (err < 0)
4546 return err;
4547
51ebd318 4548 if (cfg.fc_mp)
333c4301 4549 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4550 else {
4551 cfg.fc_delete_all_nh = 1;
333c4301 4552 return ip6_route_del(&cfg, extack);
0ae81335 4553 }
1da177e4
LT
4554}
4555
c21ef3e3
DA
4556static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4557 struct netlink_ext_ack *extack)
1da177e4 4558{
86872cb5
TG
4559 struct fib6_config cfg;
4560 int err;
1da177e4 4561
333c4301 4562 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4563 if (err < 0)
4564 return err;
4565
51ebd318 4566 if (cfg.fc_mp)
333c4301 4567 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4568 else
acb54e3c 4569 return ip6_route_add(&cfg, GFP_KERNEL, extack);
1da177e4
LT
4570}
4571
8d1c802b 4572static size_t rt6_nlmsg_size(struct fib6_info *rt)
339bf98f 4573{
beb1afac
DA
4574 int nexthop_len = 0;
4575
93c2fb25 4576 if (rt->fib6_nsiblings) {
beb1afac
DA
4577 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4578 + NLA_ALIGN(sizeof(struct rtnexthop))
4579 + nla_total_size(16) /* RTA_GATEWAY */
5e670d84 4580 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
beb1afac 4581
93c2fb25 4582 nexthop_len *= rt->fib6_nsiblings;
beb1afac
DA
4583 }
4584
339bf98f
TG
4585 return NLMSG_ALIGN(sizeof(struct rtmsg))
4586 + nla_total_size(16) /* RTA_SRC */
4587 + nla_total_size(16) /* RTA_DST */
4588 + nla_total_size(16) /* RTA_GATEWAY */
4589 + nla_total_size(16) /* RTA_PREFSRC */
4590 + nla_total_size(4) /* RTA_TABLE */
4591 + nla_total_size(4) /* RTA_IIF */
4592 + nla_total_size(4) /* RTA_OIF */
4593 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4594 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4595 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4596 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4597 + nla_total_size(1) /* RTA_PREF */
5e670d84 4598 + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
beb1afac
DA
4599 + nexthop_len;
4600}
4601
8d1c802b 4602static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
5be083ce 4603 unsigned int *flags, bool skip_oif)
beb1afac 4604{
5e670d84 4605 if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
f9d882ea
IS
4606 *flags |= RTNH_F_DEAD;
4607
5e670d84 4608 if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
beb1afac 4609 *flags |= RTNH_F_LINKDOWN;
dcd1f572
DA
4610
4611 rcu_read_lock();
4612 if (fib6_ignore_linkdown(rt))
beb1afac 4613 *flags |= RTNH_F_DEAD;
dcd1f572 4614 rcu_read_unlock();
beb1afac
DA
4615 }
4616
93c2fb25 4617 if (rt->fib6_flags & RTF_GATEWAY) {
5e670d84 4618 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
beb1afac
DA
4619 goto nla_put_failure;
4620 }
4621
5e670d84
DA
4622 *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
4623 if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4624 *flags |= RTNH_F_OFFLOAD;
4625
5be083ce 4626 /* not needed for multipath encoding b/c it has a rtnexthop struct */
5e670d84
DA
4627 if (!skip_oif && rt->fib6_nh.nh_dev &&
4628 nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
beb1afac
DA
4629 goto nla_put_failure;
4630
5e670d84
DA
4631 if (rt->fib6_nh.nh_lwtstate &&
4632 lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
beb1afac
DA
4633 goto nla_put_failure;
4634
4635 return 0;
4636
4637nla_put_failure:
4638 return -EMSGSIZE;
4639}
4640
5be083ce 4641/* add multipath next hop */
8d1c802b 4642static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
beb1afac 4643{
5e670d84 4644 const struct net_device *dev = rt->fib6_nh.nh_dev;
beb1afac
DA
4645 struct rtnexthop *rtnh;
4646 unsigned int flags = 0;
4647
4648 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4649 if (!rtnh)
4650 goto nla_put_failure;
4651
5e670d84
DA
4652 rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
4653 rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
beb1afac 4654
5be083ce 4655 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4656 goto nla_put_failure;
4657
4658 rtnh->rtnh_flags = flags;
4659
4660 /* length of rtnetlink header + attributes */
4661 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4662
4663 return 0;
4664
4665nla_put_failure:
4666 return -EMSGSIZE;
339bf98f
TG
4667}
4668
d4ead6b3 4669static int rt6_fill_node(struct net *net, struct sk_buff *skb,
8d1c802b 4670 struct fib6_info *rt, struct dst_entry *dst,
d4ead6b3 4671 struct in6_addr *dest, struct in6_addr *src,
15e47304 4672 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4673 unsigned int flags)
1da177e4 4674{
22d0bd82
XL
4675 struct rt6_info *rt6 = (struct rt6_info *)dst;
4676 struct rt6key *rt6_dst, *rt6_src;
4677 u32 *pmetrics, table, rt6_flags;
2d7202bf 4678 struct nlmsghdr *nlh;
22d0bd82 4679 struct rtmsg *rtm;
d4ead6b3 4680 long expires = 0;
1da177e4 4681
15e47304 4682 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4683 if (!nlh)
26932566 4684 return -EMSGSIZE;
2d7202bf 4685
22d0bd82
XL
4686 if (rt6) {
4687 rt6_dst = &rt6->rt6i_dst;
4688 rt6_src = &rt6->rt6i_src;
4689 rt6_flags = rt6->rt6i_flags;
4690 } else {
4691 rt6_dst = &rt->fib6_dst;
4692 rt6_src = &rt->fib6_src;
4693 rt6_flags = rt->fib6_flags;
4694 }
4695
2d7202bf 4696 rtm = nlmsg_data(nlh);
1da177e4 4697 rtm->rtm_family = AF_INET6;
22d0bd82
XL
4698 rtm->rtm_dst_len = rt6_dst->plen;
4699 rtm->rtm_src_len = rt6_src->plen;
1da177e4 4700 rtm->rtm_tos = 0;
93c2fb25
DA
4701 if (rt->fib6_table)
4702 table = rt->fib6_table->tb6_id;
c71099ac 4703 else
9e762a4a
PM
4704 table = RT6_TABLE_UNSPEC;
4705 rtm->rtm_table = table;
c78679e8
DM
4706 if (nla_put_u32(skb, RTA_TABLE, table))
4707 goto nla_put_failure;
e8478e80
DA
4708
4709 rtm->rtm_type = rt->fib6_type;
1da177e4
LT
4710 rtm->rtm_flags = 0;
4711 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
93c2fb25 4712 rtm->rtm_protocol = rt->fib6_protocol;
1da177e4 4713
22d0bd82 4714 if (rt6_flags & RTF_CACHE)
1da177e4
LT
4715 rtm->rtm_flags |= RTM_F_CLONED;
4716
d4ead6b3
DA
4717 if (dest) {
4718 if (nla_put_in6_addr(skb, RTA_DST, dest))
c78679e8 4719 goto nla_put_failure;
1ab1457c 4720 rtm->rtm_dst_len = 128;
1da177e4 4721 } else if (rtm->rtm_dst_len)
22d0bd82 4722 if (nla_put_in6_addr(skb, RTA_DST, &rt6_dst->addr))
c78679e8 4723 goto nla_put_failure;
1da177e4
LT
4724#ifdef CONFIG_IPV6_SUBTREES
4725 if (src) {
930345ea 4726 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4727 goto nla_put_failure;
1ab1457c 4728 rtm->rtm_src_len = 128;
c78679e8 4729 } else if (rtm->rtm_src_len &&
22d0bd82 4730 nla_put_in6_addr(skb, RTA_SRC, &rt6_src->addr))
c78679e8 4731 goto nla_put_failure;
1da177e4 4732#endif
7bc570c8
YH
4733 if (iif) {
4734#ifdef CONFIG_IPV6_MROUTE
22d0bd82 4735 if (ipv6_addr_is_multicast(&rt6_dst->addr)) {
fd61c6ba
DA
4736 int err = ip6mr_get_route(net, skb, rtm, portid);
4737
4738 if (err == 0)
4739 return 0;
4740 if (err < 0)
4741 goto nla_put_failure;
7bc570c8
YH
4742 } else
4743#endif
c78679e8
DM
4744 if (nla_put_u32(skb, RTA_IIF, iif))
4745 goto nla_put_failure;
d4ead6b3 4746 } else if (dest) {
1da177e4 4747 struct in6_addr saddr_buf;
d4ead6b3 4748 if (ip6_route_get_saddr(net, rt, dest, 0, &saddr_buf) == 0 &&
930345ea 4749 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4750 goto nla_put_failure;
1da177e4 4751 }
2d7202bf 4752
93c2fb25 4753 if (rt->fib6_prefsrc.plen) {
c3968a85 4754 struct in6_addr saddr_buf;
93c2fb25 4755 saddr_buf = rt->fib6_prefsrc.addr;
930345ea 4756 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4757 goto nla_put_failure;
c3968a85
DW
4758 }
4759
d4ead6b3
DA
4760 pmetrics = dst ? dst_metrics_ptr(dst) : rt->fib6_metrics->metrics;
4761 if (rtnetlink_put_metrics(skb, pmetrics) < 0)
2d7202bf
TG
4762 goto nla_put_failure;
4763
93c2fb25 4764 if (nla_put_u32(skb, RTA_PRIORITY, rt->fib6_metric))
c78679e8 4765 goto nla_put_failure;
8253947e 4766
beb1afac
DA
4767 /* For multipath routes, walk the siblings list and add
4768 * each as a nexthop within RTA_MULTIPATH.
4769 */
22d0bd82
XL
4770 if (rt6) {
4771 if (rt6_flags & RTF_GATEWAY &&
4772 nla_put_in6_addr(skb, RTA_GATEWAY, &rt6->rt6i_gateway))
4773 goto nla_put_failure;
4774
4775 if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
4776 goto nla_put_failure;
4777 } else if (rt->fib6_nsiblings) {
8d1c802b 4778 struct fib6_info *sibling, *next_sibling;
beb1afac
DA
4779 struct nlattr *mp;
4780
4781 mp = nla_nest_start(skb, RTA_MULTIPATH);
4782 if (!mp)
4783 goto nla_put_failure;
4784
4785 if (rt6_add_nexthop(skb, rt) < 0)
4786 goto nla_put_failure;
4787
4788 list_for_each_entry_safe(sibling, next_sibling,
93c2fb25 4789 &rt->fib6_siblings, fib6_siblings) {
beb1afac
DA
4790 if (rt6_add_nexthop(skb, sibling) < 0)
4791 goto nla_put_failure;
4792 }
4793
4794 nla_nest_end(skb, mp);
4795 } else {
5be083ce 4796 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4797 goto nla_put_failure;
4798 }
4799
22d0bd82 4800 if (rt6_flags & RTF_EXPIRES) {
14895687
DA
4801 expires = dst ? dst->expires : rt->expires;
4802 expires -= jiffies;
4803 }
69cdf8f9 4804
d4ead6b3 4805 if (rtnl_put_cacheinfo(skb, dst, 0, expires, dst ? dst->error : 0) < 0)
e3703b3d 4806 goto nla_put_failure;
2d7202bf 4807
22d0bd82 4808 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt6_flags)))
c78ba6d6
LR
4809 goto nla_put_failure;
4810
19e42e45 4811
053c095a
JB
4812 nlmsg_end(skb, nlh);
4813 return 0;
2d7202bf
TG
4814
4815nla_put_failure:
26932566
PM
4816 nlmsg_cancel(skb, nlh);
4817 return -EMSGSIZE;
1da177e4
LT
4818}
4819
8d1c802b 4820int rt6_dump_route(struct fib6_info *rt, void *p_arg)
1da177e4
LT
4821{
4822 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4823 struct net *net = arg->net;
4824
421842ed 4825 if (rt == net->ipv6.fib6_null_entry)
1f17e2f2 4826 return 0;
1da177e4 4827
2d7202bf
TG
4828 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4829 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4830
4831 /* user wants prefix routes only */
4832 if (rtm->rtm_flags & RTM_F_PREFIX &&
93c2fb25 4833 !(rt->fib6_flags & RTF_PREFIX_RT)) {
f8cfe2ce
DA
4834 /* success since this is not a prefix route */
4835 return 1;
4836 }
4837 }
1da177e4 4838
d4ead6b3
DA
4839 return rt6_fill_node(net, arg->skb, rt, NULL, NULL, NULL, 0,
4840 RTM_NEWROUTE, NETLINK_CB(arg->cb->skb).portid,
4841 arg->cb->nlh->nlmsg_seq, NLM_F_MULTI);
1da177e4
LT
4842}
4843
c21ef3e3
DA
4844static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4845 struct netlink_ext_ack *extack)
1da177e4 4846{
3b1e0a65 4847 struct net *net = sock_net(in_skb->sk);
ab364a6f 4848 struct nlattr *tb[RTA_MAX+1];
18c3a61c 4849 int err, iif = 0, oif = 0;
a68886a6 4850 struct fib6_info *from;
18c3a61c 4851 struct dst_entry *dst;
ab364a6f 4852 struct rt6_info *rt;
1da177e4 4853 struct sk_buff *skb;
ab364a6f 4854 struct rtmsg *rtm;
4c9483b2 4855 struct flowi6 fl6;
18c3a61c 4856 bool fibmatch;
1da177e4 4857
fceb6435 4858 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4859 extack);
ab364a6f
TG
4860 if (err < 0)
4861 goto errout;
1da177e4 4862
ab364a6f 4863 err = -EINVAL;
4c9483b2 4864 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4865 rtm = nlmsg_data(nlh);
4866 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4867 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4868
ab364a6f
TG
4869 if (tb[RTA_SRC]) {
4870 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4871 goto errout;
4872
4e3fd7a0 4873 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4874 }
4875
4876 if (tb[RTA_DST]) {
4877 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4878 goto errout;
4879
4e3fd7a0 4880 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4881 }
4882
4883 if (tb[RTA_IIF])
4884 iif = nla_get_u32(tb[RTA_IIF]);
4885
4886 if (tb[RTA_OIF])
72331bc0 4887 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4888
2e47b291
LC
4889 if (tb[RTA_MARK])
4890 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4891
622ec2c9
LC
4892 if (tb[RTA_UID])
4893 fl6.flowi6_uid = make_kuid(current_user_ns(),
4894 nla_get_u32(tb[RTA_UID]));
4895 else
4896 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4897
eacb9384
RP
4898 if (tb[RTA_SPORT])
4899 fl6.fl6_sport = nla_get_be16(tb[RTA_SPORT]);
4900
4901 if (tb[RTA_DPORT])
4902 fl6.fl6_dport = nla_get_be16(tb[RTA_DPORT]);
4903
4904 if (tb[RTA_IP_PROTO]) {
4905 err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
4906 &fl6.flowi6_proto, extack);
4907 if (err)
4908 goto errout;
4909 }
4910
1da177e4
LT
4911 if (iif) {
4912 struct net_device *dev;
72331bc0
SL
4913 int flags = 0;
4914
121622db
FW
4915 rcu_read_lock();
4916
4917 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4918 if (!dev) {
121622db 4919 rcu_read_unlock();
1da177e4 4920 err = -ENODEV;
ab364a6f 4921 goto errout;
1da177e4 4922 }
72331bc0
SL
4923
4924 fl6.flowi6_iif = iif;
4925
4926 if (!ipv6_addr_any(&fl6.saddr))
4927 flags |= RT6_LOOKUP_F_HAS_SADDR;
4928
b75cc8f9 4929 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4930
4931 rcu_read_unlock();
72331bc0
SL
4932 } else {
4933 fl6.flowi6_oif = oif;
4934
58acfd71 4935 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4936 }
4937
18c3a61c
RP
4938
4939 rt = container_of(dst, struct rt6_info, dst);
4940 if (rt->dst.error) {
4941 err = rt->dst.error;
4942 ip6_rt_put(rt);
4943 goto errout;
1da177e4
LT
4944 }
4945
9d6acb3b
WC
4946 if (rt == net->ipv6.ip6_null_entry) {
4947 err = rt->dst.error;
4948 ip6_rt_put(rt);
4949 goto errout;
4950 }
4951
ab364a6f 4952 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4953 if (!skb) {
94e187c0 4954 ip6_rt_put(rt);
ab364a6f
TG
4955 err = -ENOBUFS;
4956 goto errout;
4957 }
1da177e4 4958
d8d1f30b 4959 skb_dst_set(skb, &rt->dst);
a68886a6
DA
4960
4961 rcu_read_lock();
4962 from = rcu_dereference(rt->from);
4963
18c3a61c 4964 if (fibmatch)
a68886a6 4965 err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
18c3a61c
RP
4966 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4967 nlh->nlmsg_seq, 0);
4968 else
a68886a6
DA
4969 err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
4970 &fl6.saddr, iif, RTM_NEWROUTE,
d4ead6b3
DA
4971 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
4972 0);
a68886a6
DA
4973 rcu_read_unlock();
4974
1da177e4 4975 if (err < 0) {
ab364a6f
TG
4976 kfree_skb(skb);
4977 goto errout;
1da177e4
LT
4978 }
4979
15e47304 4980 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4981errout:
1da177e4 4982 return err;
1da177e4
LT
4983}
4984
8d1c802b 4985void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
37a1d361 4986 unsigned int nlm_flags)
1da177e4
LT
4987{
4988 struct sk_buff *skb;
5578689a 4989 struct net *net = info->nl_net;
528c4ceb
DL
4990 u32 seq;
4991 int err;
4992
4993 err = -ENOBUFS;
38308473 4994 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4995
19e42e45 4996 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4997 if (!skb)
21713ebc
TG
4998 goto errout;
4999
d4ead6b3
DA
5000 err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
5001 event, info->portid, seq, nlm_flags);
26932566
PM
5002 if (err < 0) {
5003 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
5004 WARN_ON(err == -EMSGSIZE);
5005 kfree_skb(skb);
5006 goto errout;
5007 }
15e47304 5008 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
5009 info->nlh, gfp_any());
5010 return;
21713ebc
TG
5011errout:
5012 if (err < 0)
5578689a 5013 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
5014}
5015
8ed67789 5016static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 5017 unsigned long event, void *ptr)
8ed67789 5018{
351638e7 5019 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 5020 struct net *net = dev_net(dev);
8ed67789 5021
242d3a49
WC
5022 if (!(dev->flags & IFF_LOOPBACK))
5023 return NOTIFY_OK;
5024
5025 if (event == NETDEV_REGISTER) {
421842ed 5026 net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
d8d1f30b 5027 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
5028 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
5029#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 5030 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 5031 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 5032 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 5033 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 5034#endif
76da0704
WC
5035 } else if (event == NETDEV_UNREGISTER &&
5036 dev->reg_state != NETREG_UNREGISTERED) {
5037 /* NETDEV_UNREGISTER could be fired for multiple times by
5038 * netdev_wait_allrefs(). Make sure we only call this once.
5039 */
12d94a80 5040 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 5041#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
5042 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
5043 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
5044#endif
5045 }
5046
5047 return NOTIFY_OK;
5048}
5049
1da177e4
LT
5050/*
5051 * /proc
5052 */
5053
5054#ifdef CONFIG_PROC_FS
1da177e4
LT
5055static int rt6_stats_seq_show(struct seq_file *seq, void *v)
5056{
69ddb805 5057 struct net *net = (struct net *)seq->private;
1da177e4 5058 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
5059 net->ipv6.rt6_stats->fib_nodes,
5060 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 5061 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
5062 net->ipv6.rt6_stats->fib_rt_entries,
5063 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 5064 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 5065 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
5066
5067 return 0;
5068}
1da177e4
LT
5069#endif /* CONFIG_PROC_FS */
5070
5071#ifdef CONFIG_SYSCTL
5072
1da177e4 5073static
fe2c6338 5074int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
5075 void __user *buffer, size_t *lenp, loff_t *ppos)
5076{
c486da34
LAG
5077 struct net *net;
5078 int delay;
5079 if (!write)
1da177e4 5080 return -EINVAL;
c486da34
LAG
5081
5082 net = (struct net *)ctl->extra1;
5083 delay = net->ipv6.sysctl.flush_delay;
5084 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 5085 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 5086 return 0;
1da177e4
LT
5087}
5088
fe2c6338 5089struct ctl_table ipv6_route_table_template[] = {
1ab1457c 5090 {
1da177e4 5091 .procname = "flush",
4990509f 5092 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 5093 .maxlen = sizeof(int),
89c8b3a1 5094 .mode = 0200,
6d9f239a 5095 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
5096 },
5097 {
1da177e4 5098 .procname = "gc_thresh",
9a7ec3a9 5099 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
5100 .maxlen = sizeof(int),
5101 .mode = 0644,
6d9f239a 5102 .proc_handler = proc_dointvec,
1da177e4
LT
5103 },
5104 {
1da177e4 5105 .procname = "max_size",
4990509f 5106 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
5107 .maxlen = sizeof(int),
5108 .mode = 0644,
6d9f239a 5109 .proc_handler = proc_dointvec,
1da177e4
LT
5110 },
5111 {
1da177e4 5112 .procname = "gc_min_interval",
4990509f 5113 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5114 .maxlen = sizeof(int),
5115 .mode = 0644,
6d9f239a 5116 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5117 },
5118 {
1da177e4 5119 .procname = "gc_timeout",
4990509f 5120 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
5121 .maxlen = sizeof(int),
5122 .mode = 0644,
6d9f239a 5123 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5124 },
5125 {
1da177e4 5126 .procname = "gc_interval",
4990509f 5127 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
5128 .maxlen = sizeof(int),
5129 .mode = 0644,
6d9f239a 5130 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5131 },
5132 {
1da177e4 5133 .procname = "gc_elasticity",
4990509f 5134 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
5135 .maxlen = sizeof(int),
5136 .mode = 0644,
f3d3f616 5137 .proc_handler = proc_dointvec,
1da177e4
LT
5138 },
5139 {
1da177e4 5140 .procname = "mtu_expires",
4990509f 5141 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
5142 .maxlen = sizeof(int),
5143 .mode = 0644,
6d9f239a 5144 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
5145 },
5146 {
1da177e4 5147 .procname = "min_adv_mss",
4990509f 5148 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
5149 .maxlen = sizeof(int),
5150 .mode = 0644,
f3d3f616 5151 .proc_handler = proc_dointvec,
1da177e4
LT
5152 },
5153 {
1da177e4 5154 .procname = "gc_min_interval_ms",
4990509f 5155 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
5156 .maxlen = sizeof(int),
5157 .mode = 0644,
6d9f239a 5158 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 5159 },
f8572d8f 5160 { }
1da177e4
LT
5161};
5162
2c8c1e72 5163struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
5164{
5165 struct ctl_table *table;
5166
5167 table = kmemdup(ipv6_route_table_template,
5168 sizeof(ipv6_route_table_template),
5169 GFP_KERNEL);
5ee09105
YH
5170
5171 if (table) {
5172 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5173 table[0].extra1 = net;
86393e52 5174 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5175 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5176 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5177 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5178 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5179 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5180 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5181 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5182 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5183
5184 /* Don't export sysctls to unprivileged users */
5185 if (net->user_ns != &init_user_ns)
5186 table[0].procname = NULL;
5ee09105
YH
5187 }
5188
760f2d01
DL
5189 return table;
5190}
1da177e4
LT
5191#endif
5192
2c8c1e72 5193static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5194{
633d424b 5195 int ret = -ENOMEM;
8ed67789 5196
86393e52
AD
5197 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5198 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5199
fc66f95c
ED
5200 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5201 goto out_ip6_dst_ops;
5202
421842ed
DA
5203 net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
5204 sizeof(*net->ipv6.fib6_null_entry),
5205 GFP_KERNEL);
5206 if (!net->ipv6.fib6_null_entry)
5207 goto out_ip6_dst_entries;
5208
8ed67789
DL
5209 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5210 sizeof(*net->ipv6.ip6_null_entry),
5211 GFP_KERNEL);
5212 if (!net->ipv6.ip6_null_entry)
421842ed 5213 goto out_fib6_null_entry;
d8d1f30b 5214 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5215 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5216 ip6_template_metrics, true);
8ed67789
DL
5217
5218#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5219 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5220 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5221 sizeof(*net->ipv6.ip6_prohibit_entry),
5222 GFP_KERNEL);
68fffc67
PZ
5223 if (!net->ipv6.ip6_prohibit_entry)
5224 goto out_ip6_null_entry;
d8d1f30b 5225 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5226 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5227 ip6_template_metrics, true);
8ed67789
DL
5228
5229 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5230 sizeof(*net->ipv6.ip6_blk_hole_entry),
5231 GFP_KERNEL);
68fffc67
PZ
5232 if (!net->ipv6.ip6_blk_hole_entry)
5233 goto out_ip6_prohibit_entry;
d8d1f30b 5234 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5235 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5236 ip6_template_metrics, true);
8ed67789
DL
5237#endif
5238
b339a47c
PZ
5239 net->ipv6.sysctl.flush_delay = 0;
5240 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5241 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5242 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5243 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5244 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5245 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5246 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5247
6891a346
BT
5248 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5249
8ed67789
DL
5250 ret = 0;
5251out:
5252 return ret;
f2fc6a54 5253
68fffc67
PZ
5254#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5255out_ip6_prohibit_entry:
5256 kfree(net->ipv6.ip6_prohibit_entry);
5257out_ip6_null_entry:
5258 kfree(net->ipv6.ip6_null_entry);
5259#endif
421842ed
DA
5260out_fib6_null_entry:
5261 kfree(net->ipv6.fib6_null_entry);
fc66f95c
ED
5262out_ip6_dst_entries:
5263 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5264out_ip6_dst_ops:
f2fc6a54 5265 goto out;
cdb18761
DL
5266}
5267
2c8c1e72 5268static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5269{
421842ed 5270 kfree(net->ipv6.fib6_null_entry);
8ed67789
DL
5271 kfree(net->ipv6.ip6_null_entry);
5272#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5273 kfree(net->ipv6.ip6_prohibit_entry);
5274 kfree(net->ipv6.ip6_blk_hole_entry);
5275#endif
41bb78b4 5276 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5277}
5278
d189634e
TG
5279static int __net_init ip6_route_net_init_late(struct net *net)
5280{
5281#ifdef CONFIG_PROC_FS
c3506372
CH
5282 proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
5283 sizeof(struct ipv6_route_iter));
3617d949
CH
5284 proc_create_net_single("rt6_stats", 0444, net->proc_net,
5285 rt6_stats_seq_show, NULL);
d189634e
TG
5286#endif
5287 return 0;
5288}
5289
5290static void __net_exit ip6_route_net_exit_late(struct net *net)
5291{
5292#ifdef CONFIG_PROC_FS
ece31ffd
G
5293 remove_proc_entry("ipv6_route", net->proc_net);
5294 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5295#endif
5296}
5297
cdb18761
DL
5298static struct pernet_operations ip6_route_net_ops = {
5299 .init = ip6_route_net_init,
5300 .exit = ip6_route_net_exit,
5301};
5302
c3426b47
DM
5303static int __net_init ipv6_inetpeer_init(struct net *net)
5304{
5305 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5306
5307 if (!bp)
5308 return -ENOMEM;
5309 inet_peer_base_init(bp);
5310 net->ipv6.peers = bp;
5311 return 0;
5312}
5313
5314static void __net_exit ipv6_inetpeer_exit(struct net *net)
5315{
5316 struct inet_peer_base *bp = net->ipv6.peers;
5317
5318 net->ipv6.peers = NULL;
56a6b248 5319 inetpeer_invalidate_tree(bp);
c3426b47
DM
5320 kfree(bp);
5321}
5322
2b823f72 5323static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5324 .init = ipv6_inetpeer_init,
5325 .exit = ipv6_inetpeer_exit,
5326};
5327
d189634e
TG
5328static struct pernet_operations ip6_route_net_late_ops = {
5329 .init = ip6_route_net_init_late,
5330 .exit = ip6_route_net_exit_late,
5331};
5332
8ed67789
DL
5333static struct notifier_block ip6_route_dev_notifier = {
5334 .notifier_call = ip6_route_dev_notify,
242d3a49 5335 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5336};
5337
2f460933
WC
5338void __init ip6_route_init_special_entries(void)
5339{
5340 /* Registering of the loopback is done before this portion of code,
5341 * the loopback reference in rt6_info will not be taken, do it
5342 * manually for init_net */
421842ed 5343 init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
2f460933
WC
5344 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5345 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5346 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5347 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5348 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5349 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5350 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5351 #endif
5352}
5353
433d49c3 5354int __init ip6_route_init(void)
1da177e4 5355{
433d49c3 5356 int ret;
8d0b94af 5357 int cpu;
433d49c3 5358
9a7ec3a9
DL
5359 ret = -ENOMEM;
5360 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5361 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5362 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5363 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5364 goto out;
14e50e57 5365
fc66f95c 5366 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5367 if (ret)
bdb3289f 5368 goto out_kmem_cache;
bdb3289f 5369
c3426b47
DM
5370 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5371 if (ret)
e8803b6c 5372 goto out_dst_entries;
2a0c451a 5373
7e52b33b
DM
5374 ret = register_pernet_subsys(&ip6_route_net_ops);
5375 if (ret)
5376 goto out_register_inetpeer;
c3426b47 5377
5dc121e9
AE
5378 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5379
e8803b6c 5380 ret = fib6_init();
433d49c3 5381 if (ret)
8ed67789 5382 goto out_register_subsys;
433d49c3 5383
433d49c3
DL
5384 ret = xfrm6_init();
5385 if (ret)
e8803b6c 5386 goto out_fib6_init;
c35b7e72 5387
433d49c3
DL
5388 ret = fib6_rules_init();
5389 if (ret)
5390 goto xfrm6_init;
7e5449c2 5391
d189634e
TG
5392 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5393 if (ret)
5394 goto fib6_rules_init;
5395
16feebcf
FW
5396 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5397 inet6_rtm_newroute, NULL, 0);
5398 if (ret < 0)
5399 goto out_register_late_subsys;
5400
5401 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5402 inet6_rtm_delroute, NULL, 0);
5403 if (ret < 0)
5404 goto out_register_late_subsys;
5405
5406 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5407 inet6_rtm_getroute, NULL,
5408 RTNL_FLAG_DOIT_UNLOCKED);
5409 if (ret < 0)
d189634e 5410 goto out_register_late_subsys;
c127ea2c 5411
8ed67789 5412 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5413 if (ret)
d189634e 5414 goto out_register_late_subsys;
8ed67789 5415
8d0b94af
MKL
5416 for_each_possible_cpu(cpu) {
5417 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5418
5419 INIT_LIST_HEAD(&ul->head);
5420 spin_lock_init(&ul->lock);
5421 }
5422
433d49c3
DL
5423out:
5424 return ret;
5425
d189634e 5426out_register_late_subsys:
16feebcf 5427 rtnl_unregister_all(PF_INET6);
d189634e 5428 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5429fib6_rules_init:
433d49c3
DL
5430 fib6_rules_cleanup();
5431xfrm6_init:
433d49c3 5432 xfrm6_fini();
2a0c451a
TG
5433out_fib6_init:
5434 fib6_gc_cleanup();
8ed67789
DL
5435out_register_subsys:
5436 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5437out_register_inetpeer:
5438 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5439out_dst_entries:
5440 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5441out_kmem_cache:
f2fc6a54 5442 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5443 goto out;
1da177e4
LT
5444}
5445
5446void ip6_route_cleanup(void)
5447{
8ed67789 5448 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5449 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5450 fib6_rules_cleanup();
1da177e4 5451 xfrm6_fini();
1da177e4 5452 fib6_gc_cleanup();
c3426b47 5453 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5454 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5455 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5456 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5457}