]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/blame - net/ipv6/route.c
Merge tag 'sh-for-4.17-fixes' of git://git.libc.org/linux-sh
[mirror_ubuntu-eoan-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
35732d01 47#include <linux/jhash.h>
457c4cbc 48#include <net/net_namespace.h>
1da177e4
LT
49#include <net/snmp.h>
50#include <net/ipv6.h>
51#include <net/ip6_fib.h>
52#include <net/ip6_route.h>
53#include <net/ndisc.h>
54#include <net/addrconf.h>
55#include <net/tcp.h>
56#include <linux/rtnetlink.h>
57#include <net/dst.h>
904af04d 58#include <net/dst_metadata.h>
1da177e4 59#include <net/xfrm.h>
8d71740c 60#include <net/netevent.h>
21713ebc 61#include <net/netlink.h>
51ebd318 62#include <net/nexthop.h>
19e42e45 63#include <net/lwtunnel.h>
904af04d 64#include <net/ip_tunnels.h>
ca254490 65#include <net/l3mdev.h>
b811580d 66#include <trace/events/fib6.h>
1da177e4 67
7c0f6ba6 68#include <linux/uaccess.h>
1da177e4
LT
69
70#ifdef CONFIG_SYSCTL
71#include <linux/sysctl.h>
72#endif
73
afc154e9 74enum rt6_nud_state {
7e980569
JB
75 RT6_NUD_FAIL_HARD = -3,
76 RT6_NUD_FAIL_PROBE = -2,
77 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
78 RT6_NUD_SUCCEED = 1
79};
80
83a09abd 81static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 82static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 83static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 84static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
85static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86static void ip6_dst_destroy(struct dst_entry *);
87static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
569d3645 89static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
90
91static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 92static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 93static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 94static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 95static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
96static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
97 struct sk_buff *skb, u32 mtu);
98static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
99 struct sk_buff *skb);
4b32b5ad 100static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 101static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
102static size_t rt6_nlmsg_size(struct rt6_info *rt);
103static int rt6_fill_node(struct net *net,
104 struct sk_buff *skb, struct rt6_info *rt,
105 struct in6_addr *dst, struct in6_addr *src,
106 int iif, int type, u32 portid, u32 seq,
107 unsigned int flags);
35732d01
WW
108static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
109 struct in6_addr *daddr,
110 struct in6_addr *saddr);
1da177e4 111
70ceb4f5 112#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 113static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 114 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
115 const struct in6_addr *gwaddr,
116 struct net_device *dev,
95c96174 117 unsigned int pref);
efa2cea0 118static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 119 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
120 const struct in6_addr *gwaddr,
121 struct net_device *dev);
70ceb4f5
YH
122#endif
123
8d0b94af
MKL
124struct uncached_list {
125 spinlock_t lock;
126 struct list_head head;
127};
128
129static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
130
510c321b 131void rt6_uncached_list_add(struct rt6_info *rt)
8d0b94af
MKL
132{
133 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
134
8d0b94af
MKL
135 rt->rt6i_uncached_list = ul;
136
137 spin_lock_bh(&ul->lock);
138 list_add_tail(&rt->rt6i_uncached, &ul->head);
139 spin_unlock_bh(&ul->lock);
140}
141
510c321b 142void rt6_uncached_list_del(struct rt6_info *rt)
8d0b94af
MKL
143{
144 if (!list_empty(&rt->rt6i_uncached)) {
145 struct uncached_list *ul = rt->rt6i_uncached_list;
81eb8447 146 struct net *net = dev_net(rt->dst.dev);
8d0b94af
MKL
147
148 spin_lock_bh(&ul->lock);
149 list_del(&rt->rt6i_uncached);
81eb8447 150 atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
8d0b94af
MKL
151 spin_unlock_bh(&ul->lock);
152 }
153}
154
155static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
156{
157 struct net_device *loopback_dev = net->loopback_dev;
158 int cpu;
159
e332bc67
EB
160 if (dev == loopback_dev)
161 return;
162
8d0b94af
MKL
163 for_each_possible_cpu(cpu) {
164 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
165 struct rt6_info *rt;
166
167 spin_lock_bh(&ul->lock);
168 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
169 struct inet6_dev *rt_idev = rt->rt6i_idev;
170 struct net_device *rt_dev = rt->dst.dev;
171
e332bc67 172 if (rt_idev->dev == dev) {
8d0b94af
MKL
173 rt->rt6i_idev = in6_dev_get(loopback_dev);
174 in6_dev_put(rt_idev);
175 }
176
e332bc67 177 if (rt_dev == dev) {
8d0b94af
MKL
178 rt->dst.dev = loopback_dev;
179 dev_hold(rt->dst.dev);
180 dev_put(rt_dev);
181 }
182 }
183 spin_unlock_bh(&ul->lock);
184 }
185}
186
d52d3997
MKL
187static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
188{
3a2232e9 189 return dst_metrics_write_ptr(&rt->from->dst);
d52d3997
MKL
190}
191
06582540
DM
192static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
193{
4b32b5ad 194 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 195
d52d3997
MKL
196 if (rt->rt6i_flags & RTF_PCPU)
197 return rt6_pcpu_cow_metrics(rt);
198 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
199 return NULL;
200 else
3b471175 201 return dst_cow_metrics_generic(dst, old);
06582540
DM
202}
203
f894cbf8
DM
204static inline const void *choose_neigh_daddr(struct rt6_info *rt,
205 struct sk_buff *skb,
206 const void *daddr)
39232973
DM
207{
208 struct in6_addr *p = &rt->rt6i_gateway;
209
a7563f34 210 if (!ipv6_addr_any(p))
39232973 211 return (const void *) p;
f894cbf8
DM
212 else if (skb)
213 return &ipv6_hdr(skb)->daddr;
39232973
DM
214 return daddr;
215}
216
f894cbf8
DM
217static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
218 struct sk_buff *skb,
219 const void *daddr)
d3aaeb38 220{
39232973
DM
221 struct rt6_info *rt = (struct rt6_info *) dst;
222 struct neighbour *n;
223
f894cbf8 224 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 225 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
226 if (n)
227 return n;
228 return neigh_create(&nd_tbl, daddr, dst->dev);
229}
230
63fca65d
JA
231static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
232{
233 struct net_device *dev = dst->dev;
234 struct rt6_info *rt = (struct rt6_info *)dst;
235
236 daddr = choose_neigh_daddr(rt, NULL, daddr);
237 if (!daddr)
238 return;
239 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
240 return;
241 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
242 return;
243 __ipv6_confirm_neigh(dev, daddr);
244}
245
9a7ec3a9 246static struct dst_ops ip6_dst_ops_template = {
1da177e4 247 .family = AF_INET6,
1da177e4
LT
248 .gc = ip6_dst_gc,
249 .gc_thresh = 1024,
250 .check = ip6_dst_check,
0dbaee3b 251 .default_advmss = ip6_default_advmss,
ebb762f2 252 .mtu = ip6_mtu,
06582540 253 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
254 .destroy = ip6_dst_destroy,
255 .ifdown = ip6_dst_ifdown,
256 .negative_advice = ip6_negative_advice,
257 .link_failure = ip6_link_failure,
258 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 259 .redirect = rt6_do_redirect,
9f8955cc 260 .local_out = __ip6_local_out,
d3aaeb38 261 .neigh_lookup = ip6_neigh_lookup,
63fca65d 262 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
263};
264
ebb762f2 265static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 266{
618f9bc7
SK
267 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
268
269 return mtu ? : dst->dev->mtu;
ec831ea7
RD
270}
271
6700c270
DM
272static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb, u32 mtu)
14e50e57
DM
274{
275}
276
6700c270
DM
277static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
278 struct sk_buff *skb)
b587ee3b
DM
279{
280}
281
14e50e57
DM
282static struct dst_ops ip6_dst_blackhole_ops = {
283 .family = AF_INET6,
14e50e57
DM
284 .destroy = ip6_dst_destroy,
285 .check = ip6_dst_check,
ebb762f2 286 .mtu = ip6_blackhole_mtu,
214f45c9 287 .default_advmss = ip6_default_advmss,
14e50e57 288 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 289 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 290 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 291 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
292};
293
62fa8a84 294static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 295 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
296};
297
fb0af4c7 298static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
299 .dst = {
300 .__refcnt = ATOMIC_INIT(1),
301 .__use = 1,
2c20cbd7 302 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 303 .error = -ENETUNREACH,
d8d1f30b
CG
304 .input = ip6_pkt_discard,
305 .output = ip6_pkt_discard_out,
1da177e4
LT
306 },
307 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 308 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
309 .rt6i_metric = ~(u32) 0,
310 .rt6i_ref = ATOMIC_INIT(1),
311};
312
101367c2
TG
313#ifdef CONFIG_IPV6_MULTIPLE_TABLES
314
fb0af4c7 315static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
316 .dst = {
317 .__refcnt = ATOMIC_INIT(1),
318 .__use = 1,
2c20cbd7 319 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 320 .error = -EACCES,
d8d1f30b
CG
321 .input = ip6_pkt_prohibit,
322 .output = ip6_pkt_prohibit_out,
101367c2
TG
323 },
324 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 325 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
326 .rt6i_metric = ~(u32) 0,
327 .rt6i_ref = ATOMIC_INIT(1),
328};
329
fb0af4c7 330static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
331 .dst = {
332 .__refcnt = ATOMIC_INIT(1),
333 .__use = 1,
2c20cbd7 334 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 335 .error = -EINVAL,
d8d1f30b 336 .input = dst_discard,
ede2059d 337 .output = dst_discard_out,
101367c2
TG
338 },
339 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 340 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
341 .rt6i_metric = ~(u32) 0,
342 .rt6i_ref = ATOMIC_INIT(1),
343};
344
345#endif
346
ebfa45f0
MKL
347static void rt6_info_init(struct rt6_info *rt)
348{
349 struct dst_entry *dst = &rt->dst;
350
351 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
352 INIT_LIST_HEAD(&rt->rt6i_siblings);
353 INIT_LIST_HEAD(&rt->rt6i_uncached);
354}
355
1da177e4 356/* allocate dst with ip6_dst_ops */
d52d3997
MKL
357static struct rt6_info *__ip6_dst_alloc(struct net *net,
358 struct net_device *dev,
ad706862 359 int flags)
1da177e4 360{
97bab73f 361 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
b2a9c0ed 362 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 363
81eb8447 364 if (rt) {
ebfa45f0 365 rt6_info_init(rt);
81eb8447
WW
366 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
367 }
8104891b 368
cf911662 369 return rt;
1da177e4
LT
370}
371
9ab179d8
DA
372struct rt6_info *ip6_dst_alloc(struct net *net,
373 struct net_device *dev,
374 int flags)
d52d3997 375{
ad706862 376 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
377
378 if (rt) {
379 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
bfd8e5a4 380 if (!rt->rt6i_pcpu) {
587fea74 381 dst_release_immediate(&rt->dst);
d52d3997
MKL
382 return NULL;
383 }
384 }
385
386 return rt;
387}
9ab179d8 388EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 389
1da177e4
LT
390static void ip6_dst_destroy(struct dst_entry *dst)
391{
392 struct rt6_info *rt = (struct rt6_info *)dst;
35732d01 393 struct rt6_exception_bucket *bucket;
3a2232e9 394 struct rt6_info *from = rt->from;
8d0b94af 395 struct inet6_dev *idev;
1da177e4 396
4b32b5ad 397 dst_destroy_metrics_generic(dst);
87775312 398 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
399 rt6_uncached_list_del(rt);
400
401 idev = rt->rt6i_idev;
38308473 402 if (idev) {
1da177e4
LT
403 rt->rt6i_idev = NULL;
404 in6_dev_put(idev);
1ab1457c 405 }
35732d01
WW
406 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
407 if (bucket) {
408 rt->rt6i_exception_bucket = NULL;
409 kfree(bucket);
410 }
1716a961 411
3a2232e9
DM
412 rt->from = NULL;
413 dst_release(&from->dst);
b3419363
DM
414}
415
1da177e4
LT
416static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
417 int how)
418{
419 struct rt6_info *rt = (struct rt6_info *)dst;
420 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 421 struct net_device *loopback_dev =
c346dca1 422 dev_net(dev)->loopback_dev;
1da177e4 423
e5645f51
WW
424 if (idev && idev->dev != loopback_dev) {
425 struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
426 if (loopback_idev) {
427 rt->rt6i_idev = loopback_idev;
428 in6_dev_put(idev);
97cac082 429 }
1da177e4
LT
430 }
431}
432
5973fb1e
MKL
433static bool __rt6_check_expired(const struct rt6_info *rt)
434{
435 if (rt->rt6i_flags & RTF_EXPIRES)
436 return time_after(jiffies, rt->dst.expires);
437 else
438 return false;
439}
440
a50feda5 441static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 442{
1716a961
G
443 if (rt->rt6i_flags & RTF_EXPIRES) {
444 if (time_after(jiffies, rt->dst.expires))
a50feda5 445 return true;
3a2232e9 446 } else if (rt->from) {
1e2ea8ad 447 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
3a2232e9 448 rt6_check_expired(rt->from);
1716a961 449 }
a50feda5 450 return false;
1da177e4
LT
451}
452
b4bac172
DA
453static struct rt6_info *rt6_multipath_select(const struct net *net,
454 struct rt6_info *match,
52bd4c0c 455 struct flowi6 *fl6, int oif,
b75cc8f9 456 const struct sk_buff *skb,
52bd4c0c 457 int strict)
51ebd318
ND
458{
459 struct rt6_info *sibling, *next_sibling;
51ebd318 460
b673d6cc
JS
461 /* We might have already computed the hash for ICMPv6 errors. In such
462 * case it will always be non-zero. Otherwise now is the time to do it.
463 */
464 if (!fl6->mp_hash)
b4bac172 465 fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
b673d6cc 466
3d709f69
IS
467 if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
468 return match;
469
470 list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
471 rt6i_siblings) {
472 if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
473 continue;
474 if (rt6_score_route(sibling, oif, strict) < 0)
475 break;
476 match = sibling;
477 break;
478 }
479
51ebd318
ND
480 return match;
481}
482
1da177e4 483/*
66f5d6ce 484 * Route lookup. rcu_read_lock() should be held.
1da177e4
LT
485 */
486
8ed67789
DL
487static inline struct rt6_info *rt6_device_match(struct net *net,
488 struct rt6_info *rt,
b71d1d42 489 const struct in6_addr *saddr,
1da177e4 490 int oif,
d420895e 491 int flags)
1da177e4
LT
492{
493 struct rt6_info *local = NULL;
494 struct rt6_info *sprt;
495
8067bb8c
IS
496 if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
497 return rt;
dd3abc4e 498
071fb37e 499 for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
d1918542 500 struct net_device *dev = sprt->dst.dev;
dd3abc4e 501
8067bb8c
IS
502 if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
503 continue;
504
dd3abc4e 505 if (oif) {
1da177e4
LT
506 if (dev->ifindex == oif)
507 return sprt;
508 if (dev->flags & IFF_LOOPBACK) {
38308473 509 if (!sprt->rt6i_idev ||
1da177e4 510 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 511 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 512 continue;
17fb0b2b
DA
513 if (local &&
514 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
515 continue;
516 }
517 local = sprt;
518 }
dd3abc4e
YH
519 } else {
520 if (ipv6_chk_addr(net, saddr, dev,
521 flags & RT6_LOOKUP_F_IFACE))
522 return sprt;
1da177e4 523 }
dd3abc4e 524 }
1da177e4 525
dd3abc4e 526 if (oif) {
1da177e4
LT
527 if (local)
528 return local;
529
d420895e 530 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 531 return net->ipv6.ip6_null_entry;
1da177e4 532 }
8067bb8c
IS
533
534 return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
1da177e4
LT
535}
536
27097255 537#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
538struct __rt6_probe_work {
539 struct work_struct work;
540 struct in6_addr target;
541 struct net_device *dev;
542};
543
544static void rt6_probe_deferred(struct work_struct *w)
545{
546 struct in6_addr mcaddr;
547 struct __rt6_probe_work *work =
548 container_of(w, struct __rt6_probe_work, work);
549
550 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 551 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 552 dev_put(work->dev);
662f5533 553 kfree(work);
c2f17e82
HFS
554}
555
27097255
YH
556static void rt6_probe(struct rt6_info *rt)
557{
990edb42 558 struct __rt6_probe_work *work;
f2c31e32 559 struct neighbour *neigh;
27097255
YH
560 /*
561 * Okay, this does not seem to be appropriate
562 * for now, however, we need to check if it
563 * is really so; aka Router Reachability Probing.
564 *
565 * Router Reachability Probe MUST be rate-limited
566 * to no more than one per minute.
567 */
2152caea 568 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 569 return;
2152caea
YH
570 rcu_read_lock_bh();
571 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
572 if (neigh) {
8d6c31bf
MKL
573 if (neigh->nud_state & NUD_VALID)
574 goto out;
575
990edb42 576 work = NULL;
2152caea 577 write_lock(&neigh->lock);
990edb42
MKL
578 if (!(neigh->nud_state & NUD_VALID) &&
579 time_after(jiffies,
580 neigh->updated +
581 rt->rt6i_idev->cnf.rtr_probe_interval)) {
582 work = kmalloc(sizeof(*work), GFP_ATOMIC);
583 if (work)
584 __neigh_set_probe_once(neigh);
c2f17e82 585 }
2152caea 586 write_unlock(&neigh->lock);
990edb42
MKL
587 } else {
588 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 589 }
990edb42
MKL
590
591 if (work) {
592 INIT_WORK(&work->work, rt6_probe_deferred);
593 work->target = rt->rt6i_gateway;
594 dev_hold(rt->dst.dev);
595 work->dev = rt->dst.dev;
596 schedule_work(&work->work);
597 }
598
8d6c31bf 599out:
2152caea 600 rcu_read_unlock_bh();
27097255
YH
601}
602#else
603static inline void rt6_probe(struct rt6_info *rt)
604{
27097255
YH
605}
606#endif
607
1da177e4 608/*
554cfb7e 609 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 610 */
b6f99a21 611static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 612{
d1918542 613 struct net_device *dev = rt->dst.dev;
161980f4 614 if (!oif || dev->ifindex == oif)
554cfb7e 615 return 2;
161980f4
DM
616 if ((dev->flags & IFF_LOOPBACK) &&
617 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
618 return 1;
619 return 0;
554cfb7e 620}
1da177e4 621
afc154e9 622static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 623{
f2c31e32 624 struct neighbour *neigh;
afc154e9 625 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 626
4d0c5911
YH
627 if (rt->rt6i_flags & RTF_NONEXTHOP ||
628 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 629 return RT6_NUD_SUCCEED;
145a3621
YH
630
631 rcu_read_lock_bh();
632 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
633 if (neigh) {
634 read_lock(&neigh->lock);
554cfb7e 635 if (neigh->nud_state & NUD_VALID)
afc154e9 636 ret = RT6_NUD_SUCCEED;
398bcbeb 637#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 638 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 639 ret = RT6_NUD_SUCCEED;
7e980569
JB
640 else
641 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 642#endif
145a3621 643 read_unlock(&neigh->lock);
afc154e9
HFS
644 } else {
645 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 646 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 647 }
145a3621
YH
648 rcu_read_unlock_bh();
649
a5a81f0b 650 return ret;
1da177e4
LT
651}
652
554cfb7e
YH
653static int rt6_score_route(struct rt6_info *rt, int oif,
654 int strict)
1da177e4 655{
a5a81f0b 656 int m;
1ab1457c 657
4d0c5911 658 m = rt6_check_dev(rt, oif);
77d16f45 659 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 660 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
661#ifdef CONFIG_IPV6_ROUTER_PREF
662 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
663#endif
afc154e9
HFS
664 if (strict & RT6_LOOKUP_F_REACHABLE) {
665 int n = rt6_check_neigh(rt);
666 if (n < 0)
667 return n;
668 }
554cfb7e
YH
669 return m;
670}
671
f11e6659 672static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
673 int *mpri, struct rt6_info *match,
674 bool *do_rr)
554cfb7e 675{
f11e6659 676 int m;
afc154e9 677 bool match_do_rr = false;
35103d11 678 struct inet6_dev *idev = rt->rt6i_idev;
35103d11 679
8067bb8c
IS
680 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
681 goto out;
682
14c5206c
IS
683 if (idev->cnf.ignore_routes_with_linkdown &&
684 rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
d5d32e4b 685 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 686 goto out;
f11e6659
DM
687
688 if (rt6_check_expired(rt))
689 goto out;
690
691 m = rt6_score_route(rt, oif, strict);
7e980569 692 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
693 match_do_rr = true;
694 m = 0; /* lowest valid score */
7e980569 695 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 696 goto out;
afc154e9
HFS
697 }
698
699 if (strict & RT6_LOOKUP_F_REACHABLE)
700 rt6_probe(rt);
f11e6659 701
7e980569 702 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 703 if (m > *mpri) {
afc154e9 704 *do_rr = match_do_rr;
f11e6659
DM
705 *mpri = m;
706 match = rt;
f11e6659 707 }
f11e6659
DM
708out:
709 return match;
710}
711
712static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
8d1040e8 713 struct rt6_info *leaf,
f11e6659 714 struct rt6_info *rr_head,
afc154e9
HFS
715 u32 metric, int oif, int strict,
716 bool *do_rr)
f11e6659 717{
9fbdcfaf 718 struct rt6_info *rt, *match, *cont;
554cfb7e 719 int mpri = -1;
1da177e4 720
f11e6659 721 match = NULL;
9fbdcfaf 722 cont = NULL;
071fb37e 723 for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 }
731
66f5d6ce 732 for (rt = leaf; rt && rt != rr_head;
071fb37e 733 rt = rcu_dereference(rt->rt6_next)) {
9fbdcfaf
SK
734 if (rt->rt6i_metric != metric) {
735 cont = rt;
736 break;
737 }
738
afc154e9 739 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
740 }
741
742 if (match || !cont)
743 return match;
744
071fb37e 745 for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
afc154e9 746 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 747
f11e6659
DM
748 return match;
749}
1da177e4 750
8d1040e8
WW
751static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
752 int oif, int strict)
f11e6659 753{
66f5d6ce 754 struct rt6_info *leaf = rcu_dereference(fn->leaf);
f11e6659 755 struct rt6_info *match, *rt0;
afc154e9 756 bool do_rr = false;
17ecf590 757 int key_plen;
1da177e4 758
87b1af8d 759 if (!leaf || leaf == net->ipv6.ip6_null_entry)
8d1040e8
WW
760 return net->ipv6.ip6_null_entry;
761
66f5d6ce 762 rt0 = rcu_dereference(fn->rr_ptr);
f11e6659 763 if (!rt0)
66f5d6ce 764 rt0 = leaf;
1da177e4 765
17ecf590
WW
766 /* Double check to make sure fn is not an intermediate node
767 * and fn->leaf does not points to its child's leaf
768 * (This might happen if all routes under fn are deleted from
769 * the tree and fib6_repair_tree() is called on the node.)
770 */
771 key_plen = rt0->rt6i_dst.plen;
772#ifdef CONFIG_IPV6_SUBTREES
773 if (rt0->rt6i_src.plen)
774 key_plen = rt0->rt6i_src.plen;
775#endif
776 if (fn->fn_bit != key_plen)
777 return net->ipv6.ip6_null_entry;
778
8d1040e8 779 match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
afc154e9 780 &do_rr);
1da177e4 781
afc154e9 782 if (do_rr) {
071fb37e 783 struct rt6_info *next = rcu_dereference(rt0->rt6_next);
f11e6659 784
554cfb7e 785 /* no entries matched; do round-robin */
f11e6659 786 if (!next || next->rt6i_metric != rt0->rt6i_metric)
8d1040e8 787 next = leaf;
f11e6659 788
66f5d6ce
WW
789 if (next != rt0) {
790 spin_lock_bh(&leaf->rt6i_table->tb6_lock);
791 /* make sure next is not being deleted from the tree */
792 if (next->rt6i_node)
793 rcu_assign_pointer(fn->rr_ptr, next);
794 spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
795 }
1da177e4 796 }
1da177e4 797
a02cec21 798 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
799}
800
8b9df265
MKL
801static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
802{
803 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
804}
805
70ceb4f5
YH
806#ifdef CONFIG_IPV6_ROUTE_INFO
807int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 808 const struct in6_addr *gwaddr)
70ceb4f5 809{
c346dca1 810 struct net *net = dev_net(dev);
70ceb4f5
YH
811 struct route_info *rinfo = (struct route_info *) opt;
812 struct in6_addr prefix_buf, *prefix;
813 unsigned int pref;
4bed72e4 814 unsigned long lifetime;
70ceb4f5
YH
815 struct rt6_info *rt;
816
817 if (len < sizeof(struct route_info)) {
818 return -EINVAL;
819 }
820
821 /* Sanity check for prefix_len and length */
822 if (rinfo->length > 3) {
823 return -EINVAL;
824 } else if (rinfo->prefix_len > 128) {
825 return -EINVAL;
826 } else if (rinfo->prefix_len > 64) {
827 if (rinfo->length < 2) {
828 return -EINVAL;
829 }
830 } else if (rinfo->prefix_len > 0) {
831 if (rinfo->length < 1) {
832 return -EINVAL;
833 }
834 }
835
836 pref = rinfo->route_pref;
837 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 838 return -EINVAL;
70ceb4f5 839
4bed72e4 840 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
841
842 if (rinfo->length == 3)
843 prefix = (struct in6_addr *)rinfo->prefix;
844 else {
845 /* this function is safe */
846 ipv6_addr_prefix(&prefix_buf,
847 (struct in6_addr *)rinfo->prefix,
848 rinfo->prefix_len);
849 prefix = &prefix_buf;
850 }
851
f104a567
DJ
852 if (rinfo->prefix_len == 0)
853 rt = rt6_get_dflt_router(gwaddr, dev);
854 else
855 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 856 gwaddr, dev);
70ceb4f5
YH
857
858 if (rt && !lifetime) {
e0a1ad73 859 ip6_del_rt(rt);
70ceb4f5
YH
860 rt = NULL;
861 }
862
863 if (!rt && lifetime)
830218c1
DA
864 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
865 dev, pref);
70ceb4f5
YH
866 else if (rt)
867 rt->rt6i_flags = RTF_ROUTEINFO |
868 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
869
870 if (rt) {
1716a961
G
871 if (!addrconf_finite_timeout(lifetime))
872 rt6_clean_expires(rt);
873 else
874 rt6_set_expires(rt, jiffies + HZ * lifetime);
875
94e187c0 876 ip6_rt_put(rt);
70ceb4f5
YH
877 }
878 return 0;
879}
880#endif
881
a3c00e46
MKL
882static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
883 struct in6_addr *saddr)
884{
66f5d6ce 885 struct fib6_node *pn, *sn;
a3c00e46
MKL
886 while (1) {
887 if (fn->fn_flags & RTN_TL_ROOT)
888 return NULL;
66f5d6ce
WW
889 pn = rcu_dereference(fn->parent);
890 sn = FIB6_SUBTREE(pn);
891 if (sn && sn != fn)
892 fn = fib6_lookup(sn, NULL, saddr);
a3c00e46
MKL
893 else
894 fn = pn;
895 if (fn->fn_flags & RTN_RTINFO)
896 return fn;
897 }
898}
c71099ac 899
d3843fe5
WW
900static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
901 bool null_fallback)
902{
903 struct rt6_info *rt = *prt;
904
905 if (dst_hold_safe(&rt->dst))
906 return true;
907 if (null_fallback) {
908 rt = net->ipv6.ip6_null_entry;
909 dst_hold(&rt->dst);
910 } else {
911 rt = NULL;
912 }
913 *prt = rt;
914 return false;
915}
916
8ed67789
DL
917static struct rt6_info *ip6_pol_route_lookup(struct net *net,
918 struct fib6_table *table,
b75cc8f9
DA
919 struct flowi6 *fl6,
920 const struct sk_buff *skb,
921 int flags)
1da177e4 922{
2b760fcf 923 struct rt6_info *rt, *rt_cache;
1da177e4 924 struct fib6_node *fn;
1da177e4 925
b6cdbc85
DA
926 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
927 flags &= ~RT6_LOOKUP_F_IFACE;
928
66f5d6ce 929 rcu_read_lock();
4c9483b2 930 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac 931restart:
66f5d6ce
WW
932 rt = rcu_dereference(fn->leaf);
933 if (!rt) {
934 rt = net->ipv6.ip6_null_entry;
935 } else {
936 rt = rt6_device_match(net, rt, &fl6->saddr,
937 fl6->flowi6_oif, flags);
938 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
b4bac172 939 rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
b75cc8f9 940 skb, flags);
66f5d6ce 941 }
a3c00e46
MKL
942 if (rt == net->ipv6.ip6_null_entry) {
943 fn = fib6_backtrack(fn, &fl6->saddr);
944 if (fn)
945 goto restart;
946 }
2b760fcf
WW
947 /* Search through exception table */
948 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
949 if (rt_cache)
950 rt = rt_cache;
951
d3843fe5
WW
952 if (ip6_hold_safe(net, &rt, true))
953 dst_use_noref(&rt->dst, jiffies);
954
66f5d6ce 955 rcu_read_unlock();
b811580d 956
b65f164d 957 trace_fib6_table_lookup(net, rt, table, fl6);
b811580d 958
c71099ac
TG
959 return rt;
960
961}
962
67ba4152 963struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
b75cc8f9 964 const struct sk_buff *skb, int flags)
ea6e574e 965{
b75cc8f9 966 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
ea6e574e
FW
967}
968EXPORT_SYMBOL_GPL(ip6_route_lookup);
969
9acd9f3a 970struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
b75cc8f9
DA
971 const struct in6_addr *saddr, int oif,
972 const struct sk_buff *skb, int strict)
c71099ac 973{
4c9483b2
DM
974 struct flowi6 fl6 = {
975 .flowi6_oif = oif,
976 .daddr = *daddr,
c71099ac
TG
977 };
978 struct dst_entry *dst;
77d16f45 979 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 980
adaa70bb 981 if (saddr) {
4c9483b2 982 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
983 flags |= RT6_LOOKUP_F_HAS_SADDR;
984 }
985
b75cc8f9 986 dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
c71099ac
TG
987 if (dst->error == 0)
988 return (struct rt6_info *) dst;
989
990 dst_release(dst);
991
1da177e4
LT
992 return NULL;
993}
7159039a
YH
994EXPORT_SYMBOL(rt6_lookup);
995
c71099ac 996/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
997 * It takes new route entry, the addition fails by any reason the
998 * route is released.
999 * Caller must hold dst before calling it.
1da177e4
LT
1000 */
1001
e5fd387a 1002static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
1003 struct mx6_config *mxc,
1004 struct netlink_ext_ack *extack)
1da177e4
LT
1005{
1006 int err;
c71099ac 1007 struct fib6_table *table;
1da177e4 1008
c71099ac 1009 table = rt->rt6i_table;
66f5d6ce 1010 spin_lock_bh(&table->tb6_lock);
333c4301 1011 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
66f5d6ce 1012 spin_unlock_bh(&table->tb6_lock);
1da177e4
LT
1013
1014 return err;
1015}
1016
40e22e8f
TG
1017int ip6_ins_rt(struct rt6_info *rt)
1018{
e715b6d3
FW
1019 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
1020 struct mx6_config mxc = { .mx = NULL, };
1021
1cfb71ee
WW
1022 /* Hold dst to account for the reference from the fib6 tree */
1023 dst_hold(&rt->dst);
333c4301 1024 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
1025}
1026
4832c30d
DA
1027/* called with rcu_lock held */
1028static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt)
1029{
1030 struct net_device *dev = rt->dst.dev;
1031
98d11291 1032 if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) {
4832c30d
DA
1033 /* for copies of local routes, dst->dev needs to be the
1034 * device if it is a master device, the master device if
1035 * device is enslaved, and the loopback as the default
1036 */
1037 if (netif_is_l3_slave(dev) &&
1038 !rt6_need_strict(&rt->rt6i_dst.addr))
1039 dev = l3mdev_master_dev_rcu(dev);
1040 else if (!netif_is_l3_master(dev))
1041 dev = dev_net(dev)->loopback_dev;
1042 /* last case is netif_is_l3_master(dev) is true in which
1043 * case we want dev returned to be dev
1044 */
1045 }
1046
1047 return dev;
1048}
1049
8b9df265
MKL
1050static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
1051 const struct in6_addr *daddr,
1052 const struct in6_addr *saddr)
1da177e4 1053{
4832c30d 1054 struct net_device *dev;
1da177e4
LT
1055 struct rt6_info *rt;
1056
1057 /*
1058 * Clone the route.
1059 */
1060
d52d3997 1061 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1062 ort = ort->from;
1da177e4 1063
4832c30d
DA
1064 rcu_read_lock();
1065 dev = ip6_rt_get_dev_rcu(ort);
1066 rt = __ip6_dst_alloc(dev_net(dev), dev, 0);
1067 rcu_read_unlock();
83a09abd
MKL
1068 if (!rt)
1069 return NULL;
1070
1071 ip6_rt_copy_init(rt, ort);
1072 rt->rt6i_flags |= RTF_CACHE;
1073 rt->rt6i_metric = 0;
1074 rt->dst.flags |= DST_HOST;
1075 rt->rt6i_dst.addr = *daddr;
1076 rt->rt6i_dst.plen = 128;
1da177e4 1077
83a09abd
MKL
1078 if (!rt6_is_gw_or_nonexthop(ort)) {
1079 if (ort->rt6i_dst.plen != 128 &&
1080 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
1081 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 1082#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
1083 if (rt->rt6i_src.plen && saddr) {
1084 rt->rt6i_src.addr = *saddr;
1085 rt->rt6i_src.plen = 128;
8b9df265 1086 }
83a09abd 1087#endif
95a9a5ba 1088 }
1da177e4 1089
95a9a5ba
YH
1090 return rt;
1091}
1da177e4 1092
d52d3997
MKL
1093static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1094{
4832c30d 1095 struct net_device *dev;
d52d3997
MKL
1096 struct rt6_info *pcpu_rt;
1097
4832c30d
DA
1098 rcu_read_lock();
1099 dev = ip6_rt_get_dev_rcu(rt);
1100 pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags);
1101 rcu_read_unlock();
d52d3997
MKL
1102 if (!pcpu_rt)
1103 return NULL;
1104 ip6_rt_copy_init(pcpu_rt, rt);
1105 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1106 pcpu_rt->rt6i_flags |= RTF_PCPU;
1107 return pcpu_rt;
1108}
1109
66f5d6ce 1110/* It should be called with rcu_read_lock() acquired */
d52d3997
MKL
1111static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1112{
a73e4195 1113 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1114
1115 p = this_cpu_ptr(rt->rt6i_pcpu);
1116 pcpu_rt = *p;
1117
d3843fe5 1118 if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
a73e4195 1119 rt6_dst_from_metrics_check(pcpu_rt);
d3843fe5 1120
a73e4195
MKL
1121 return pcpu_rt;
1122}
1123
1124static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1125{
1126 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1127
1128 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1129 if (!pcpu_rt) {
1130 struct net *net = dev_net(rt->dst.dev);
1131
9c7370a1
MKL
1132 dst_hold(&net->ipv6.ip6_null_entry->dst);
1133 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1134 }
1135
a94b9367
WW
1136 dst_hold(&pcpu_rt->dst);
1137 p = this_cpu_ptr(rt->rt6i_pcpu);
1138 prev = cmpxchg(p, NULL, pcpu_rt);
951f788a 1139 BUG_ON(prev);
a94b9367 1140
d52d3997
MKL
1141 rt6_dst_from_metrics_check(pcpu_rt);
1142 return pcpu_rt;
1143}
1144
35732d01
WW
1145/* exception hash table implementation
1146 */
1147static DEFINE_SPINLOCK(rt6_exception_lock);
1148
1149/* Remove rt6_ex from hash table and free the memory
1150 * Caller must hold rt6_exception_lock
1151 */
1152static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
1153 struct rt6_exception *rt6_ex)
1154{
b2427e67 1155 struct net *net;
81eb8447 1156
35732d01
WW
1157 if (!bucket || !rt6_ex)
1158 return;
b2427e67
CIK
1159
1160 net = dev_net(rt6_ex->rt6i->dst.dev);
35732d01
WW
1161 rt6_ex->rt6i->rt6i_node = NULL;
1162 hlist_del_rcu(&rt6_ex->hlist);
1163 rt6_release(rt6_ex->rt6i);
1164 kfree_rcu(rt6_ex, rcu);
1165 WARN_ON_ONCE(!bucket->depth);
1166 bucket->depth--;
81eb8447 1167 net->ipv6.rt6_stats->fib_rt_cache--;
35732d01
WW
1168}
1169
1170/* Remove oldest rt6_ex in bucket and free the memory
1171 * Caller must hold rt6_exception_lock
1172 */
1173static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
1174{
1175 struct rt6_exception *rt6_ex, *oldest = NULL;
1176
1177 if (!bucket)
1178 return;
1179
1180 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1181 if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
1182 oldest = rt6_ex;
1183 }
1184 rt6_remove_exception(bucket, oldest);
1185}
1186
1187static u32 rt6_exception_hash(const struct in6_addr *dst,
1188 const struct in6_addr *src)
1189{
1190 static u32 seed __read_mostly;
1191 u32 val;
1192
1193 net_get_random_once(&seed, sizeof(seed));
1194 val = jhash(dst, sizeof(*dst), seed);
1195
1196#ifdef CONFIG_IPV6_SUBTREES
1197 if (src)
1198 val = jhash(src, sizeof(*src), val);
1199#endif
1200 return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
1201}
1202
1203/* Helper function to find the cached rt in the hash table
1204 * and update bucket pointer to point to the bucket for this
1205 * (daddr, saddr) pair
1206 * Caller must hold rt6_exception_lock
1207 */
1208static struct rt6_exception *
1209__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
1210 const struct in6_addr *daddr,
1211 const struct in6_addr *saddr)
1212{
1213 struct rt6_exception *rt6_ex;
1214 u32 hval;
1215
1216 if (!(*bucket) || !daddr)
1217 return NULL;
1218
1219 hval = rt6_exception_hash(daddr, saddr);
1220 *bucket += hval;
1221
1222 hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
1223 struct rt6_info *rt6 = rt6_ex->rt6i;
1224 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1225
1226#ifdef CONFIG_IPV6_SUBTREES
1227 if (matched && saddr)
1228 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1229#endif
1230 if (matched)
1231 return rt6_ex;
1232 }
1233 return NULL;
1234}
1235
1236/* Helper function to find the cached rt in the hash table
1237 * and update bucket pointer to point to the bucket for this
1238 * (daddr, saddr) pair
1239 * Caller must hold rcu_read_lock()
1240 */
1241static struct rt6_exception *
1242__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
1243 const struct in6_addr *daddr,
1244 const struct in6_addr *saddr)
1245{
1246 struct rt6_exception *rt6_ex;
1247 u32 hval;
1248
1249 WARN_ON_ONCE(!rcu_read_lock_held());
1250
1251 if (!(*bucket) || !daddr)
1252 return NULL;
1253
1254 hval = rt6_exception_hash(daddr, saddr);
1255 *bucket += hval;
1256
1257 hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
1258 struct rt6_info *rt6 = rt6_ex->rt6i;
1259 bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
1260
1261#ifdef CONFIG_IPV6_SUBTREES
1262 if (matched && saddr)
1263 matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
1264#endif
1265 if (matched)
1266 return rt6_ex;
1267 }
1268 return NULL;
1269}
1270
1271static int rt6_insert_exception(struct rt6_info *nrt,
1272 struct rt6_info *ort)
1273{
81eb8447 1274 struct net *net = dev_net(ort->dst.dev);
35732d01
WW
1275 struct rt6_exception_bucket *bucket;
1276 struct in6_addr *src_key = NULL;
1277 struct rt6_exception *rt6_ex;
1278 int err = 0;
1279
1280 /* ort can't be a cache or pcpu route */
1281 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
3a2232e9 1282 ort = ort->from;
35732d01
WW
1283 WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
1284
1285 spin_lock_bh(&rt6_exception_lock);
1286
1287 if (ort->exception_bucket_flushed) {
1288 err = -EINVAL;
1289 goto out;
1290 }
1291
1292 bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
1293 lockdep_is_held(&rt6_exception_lock));
1294 if (!bucket) {
1295 bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
1296 GFP_ATOMIC);
1297 if (!bucket) {
1298 err = -ENOMEM;
1299 goto out;
1300 }
1301 rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
1302 }
1303
1304#ifdef CONFIG_IPV6_SUBTREES
1305 /* rt6i_src.plen != 0 indicates ort is in subtree
1306 * and exception table is indexed by a hash of
1307 * both rt6i_dst and rt6i_src.
1308 * Otherwise, the exception table is indexed by
1309 * a hash of only rt6i_dst.
1310 */
1311 if (ort->rt6i_src.plen)
1312 src_key = &nrt->rt6i_src.addr;
1313#endif
60006a48
WW
1314
1315 /* Update rt6i_prefsrc as it could be changed
1316 * in rt6_remove_prefsrc()
1317 */
1318 nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
f5bbe7ee
WW
1319 /* rt6_mtu_change() might lower mtu on ort.
1320 * Only insert this exception route if its mtu
1321 * is less than ort's mtu value.
1322 */
1323 if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
1324 err = -EINVAL;
1325 goto out;
1326 }
60006a48 1327
35732d01
WW
1328 rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
1329 src_key);
1330 if (rt6_ex)
1331 rt6_remove_exception(bucket, rt6_ex);
1332
1333 rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
1334 if (!rt6_ex) {
1335 err = -ENOMEM;
1336 goto out;
1337 }
1338 rt6_ex->rt6i = nrt;
1339 rt6_ex->stamp = jiffies;
1340 atomic_inc(&nrt->rt6i_ref);
1341 nrt->rt6i_node = ort->rt6i_node;
1342 hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
1343 bucket->depth++;
81eb8447 1344 net->ipv6.rt6_stats->fib_rt_cache++;
35732d01
WW
1345
1346 if (bucket->depth > FIB6_MAX_DEPTH)
1347 rt6_exception_remove_oldest(bucket);
1348
1349out:
1350 spin_unlock_bh(&rt6_exception_lock);
1351
1352 /* Update fn->fn_sernum to invalidate all cached dst */
b886d5f2 1353 if (!err) {
922c2ac8 1354 spin_lock_bh(&ort->rt6i_table->tb6_lock);
35732d01 1355 fib6_update_sernum(ort);
922c2ac8 1356 spin_unlock_bh(&ort->rt6i_table->tb6_lock);
b886d5f2
PA
1357 fib6_force_start_gc(net);
1358 }
35732d01
WW
1359
1360 return err;
1361}
1362
1363void rt6_flush_exceptions(struct rt6_info *rt)
1364{
1365 struct rt6_exception_bucket *bucket;
1366 struct rt6_exception *rt6_ex;
1367 struct hlist_node *tmp;
1368 int i;
1369
1370 spin_lock_bh(&rt6_exception_lock);
1371 /* Prevent rt6_insert_exception() to recreate the bucket list */
1372 rt->exception_bucket_flushed = 1;
1373
1374 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1375 lockdep_is_held(&rt6_exception_lock));
1376 if (!bucket)
1377 goto out;
1378
1379 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1380 hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
1381 rt6_remove_exception(bucket, rt6_ex);
1382 WARN_ON_ONCE(bucket->depth);
1383 bucket++;
1384 }
1385
1386out:
1387 spin_unlock_bh(&rt6_exception_lock);
1388}
1389
1390/* Find cached rt in the hash table inside passed in rt
1391 * Caller has to hold rcu_read_lock()
1392 */
1393static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
1394 struct in6_addr *daddr,
1395 struct in6_addr *saddr)
1396{
1397 struct rt6_exception_bucket *bucket;
1398 struct in6_addr *src_key = NULL;
1399 struct rt6_exception *rt6_ex;
1400 struct rt6_info *res = NULL;
1401
1402 bucket = rcu_dereference(rt->rt6i_exception_bucket);
1403
1404#ifdef CONFIG_IPV6_SUBTREES
1405 /* rt6i_src.plen != 0 indicates rt is in subtree
1406 * and exception table is indexed by a hash of
1407 * both rt6i_dst and rt6i_src.
1408 * Otherwise, the exception table is indexed by
1409 * a hash of only rt6i_dst.
1410 */
1411 if (rt->rt6i_src.plen)
1412 src_key = saddr;
1413#endif
1414 rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
1415
1416 if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
1417 res = rt6_ex->rt6i;
1418
1419 return res;
1420}
1421
1422/* Remove the passed in cached rt from the hash table that contains it */
1423int rt6_remove_exception_rt(struct rt6_info *rt)
1424{
35732d01 1425 struct rt6_exception_bucket *bucket;
3a2232e9 1426 struct rt6_info *from = rt->from;
35732d01
WW
1427 struct in6_addr *src_key = NULL;
1428 struct rt6_exception *rt6_ex;
1429 int err;
1430
1431 if (!from ||
442d713b 1432 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1433 return -EINVAL;
1434
1435 if (!rcu_access_pointer(from->rt6i_exception_bucket))
1436 return -ENOENT;
1437
1438 spin_lock_bh(&rt6_exception_lock);
1439 bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
1440 lockdep_is_held(&rt6_exception_lock));
1441#ifdef CONFIG_IPV6_SUBTREES
1442 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1443 * and exception table is indexed by a hash of
1444 * both rt6i_dst and rt6i_src.
1445 * Otherwise, the exception table is indexed by
1446 * a hash of only rt6i_dst.
1447 */
1448 if (from->rt6i_src.plen)
1449 src_key = &rt->rt6i_src.addr;
1450#endif
1451 rt6_ex = __rt6_find_exception_spinlock(&bucket,
1452 &rt->rt6i_dst.addr,
1453 src_key);
1454 if (rt6_ex) {
1455 rt6_remove_exception(bucket, rt6_ex);
1456 err = 0;
1457 } else {
1458 err = -ENOENT;
1459 }
1460
1461 spin_unlock_bh(&rt6_exception_lock);
1462 return err;
1463}
1464
1465/* Find rt6_ex which contains the passed in rt cache and
1466 * refresh its stamp
1467 */
1468static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
1469{
35732d01 1470 struct rt6_exception_bucket *bucket;
3a2232e9 1471 struct rt6_info *from = rt->from;
35732d01
WW
1472 struct in6_addr *src_key = NULL;
1473 struct rt6_exception *rt6_ex;
1474
1475 if (!from ||
442d713b 1476 !(rt->rt6i_flags & RTF_CACHE))
35732d01
WW
1477 return;
1478
1479 rcu_read_lock();
1480 bucket = rcu_dereference(from->rt6i_exception_bucket);
1481
1482#ifdef CONFIG_IPV6_SUBTREES
1483 /* rt6i_src.plen != 0 indicates 'from' is in subtree
1484 * and exception table is indexed by a hash of
1485 * both rt6i_dst and rt6i_src.
1486 * Otherwise, the exception table is indexed by
1487 * a hash of only rt6i_dst.
1488 */
1489 if (from->rt6i_src.plen)
1490 src_key = &rt->rt6i_src.addr;
1491#endif
1492 rt6_ex = __rt6_find_exception_rcu(&bucket,
1493 &rt->rt6i_dst.addr,
1494 src_key);
1495 if (rt6_ex)
1496 rt6_ex->stamp = jiffies;
1497
1498 rcu_read_unlock();
1499}
1500
60006a48
WW
1501static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
1502{
1503 struct rt6_exception_bucket *bucket;
1504 struct rt6_exception *rt6_ex;
1505 int i;
1506
1507 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1508 lockdep_is_held(&rt6_exception_lock));
1509
1510 if (bucket) {
1511 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1512 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1513 rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
1514 }
1515 bucket++;
1516 }
1517 }
1518}
1519
e9fa1495
SB
1520static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
1521 struct rt6_info *rt, int mtu)
1522{
1523 /* If the new MTU is lower than the route PMTU, this new MTU will be the
1524 * lowest MTU in the path: always allow updating the route PMTU to
1525 * reflect PMTU decreases.
1526 *
1527 * If the new MTU is higher, and the route PMTU is equal to the local
1528 * MTU, this means the old MTU is the lowest in the path, so allow
1529 * updating it: if other nodes now have lower MTUs, PMTU discovery will
1530 * handle this.
1531 */
1532
1533 if (dst_mtu(&rt->dst) >= mtu)
1534 return true;
1535
1536 if (dst_mtu(&rt->dst) == idev->cnf.mtu6)
1537 return true;
1538
1539 return false;
1540}
1541
1542static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
1543 struct rt6_info *rt, int mtu)
f5bbe7ee
WW
1544{
1545 struct rt6_exception_bucket *bucket;
1546 struct rt6_exception *rt6_ex;
1547 int i;
1548
1549 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1550 lockdep_is_held(&rt6_exception_lock));
1551
e9fa1495
SB
1552 if (!bucket)
1553 return;
1554
1555 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1556 hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
1557 struct rt6_info *entry = rt6_ex->rt6i;
1558
1559 /* For RTF_CACHE with rt6i_pmtu == 0 (i.e. a redirected
1560 * route), the metrics of its rt->dst.from have already
1561 * been updated.
1562 */
1563 if (entry->rt6i_pmtu &&
1564 rt6_mtu_change_route_allowed(idev, entry, mtu))
1565 entry->rt6i_pmtu = mtu;
f5bbe7ee 1566 }
e9fa1495 1567 bucket++;
f5bbe7ee
WW
1568 }
1569}
1570
b16cb459
WW
1571#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
1572
1573static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
1574 struct in6_addr *gateway)
1575{
1576 struct rt6_exception_bucket *bucket;
1577 struct rt6_exception *rt6_ex;
1578 struct hlist_node *tmp;
1579 int i;
1580
1581 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1582 return;
1583
1584 spin_lock_bh(&rt6_exception_lock);
1585 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1586 lockdep_is_held(&rt6_exception_lock));
1587
1588 if (bucket) {
1589 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1590 hlist_for_each_entry_safe(rt6_ex, tmp,
1591 &bucket->chain, hlist) {
1592 struct rt6_info *entry = rt6_ex->rt6i;
1593
1594 if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
1595 RTF_CACHE_GATEWAY &&
1596 ipv6_addr_equal(gateway,
1597 &entry->rt6i_gateway)) {
1598 rt6_remove_exception(bucket, rt6_ex);
1599 }
1600 }
1601 bucket++;
1602 }
1603 }
1604
1605 spin_unlock_bh(&rt6_exception_lock);
1606}
1607
c757faa8
WW
1608static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
1609 struct rt6_exception *rt6_ex,
1610 struct fib6_gc_args *gc_args,
1611 unsigned long now)
1612{
1613 struct rt6_info *rt = rt6_ex->rt6i;
1614
1859bac0
PA
1615 /* we are pruning and obsoleting aged-out and non gateway exceptions
1616 * even if others have still references to them, so that on next
1617 * dst_check() such references can be dropped.
1618 * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
1619 * expired, independently from their aging, as per RFC 8201 section 4
1620 */
31afeb42
WW
1621 if (!(rt->rt6i_flags & RTF_EXPIRES)) {
1622 if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
1623 RT6_TRACE("aging clone %p\n", rt);
1624 rt6_remove_exception(bucket, rt6_ex);
1625 return;
1626 }
1627 } else if (time_after(jiffies, rt->dst.expires)) {
1628 RT6_TRACE("purging expired route %p\n", rt);
c757faa8
WW
1629 rt6_remove_exception(bucket, rt6_ex);
1630 return;
31afeb42
WW
1631 }
1632
1633 if (rt->rt6i_flags & RTF_GATEWAY) {
c757faa8
WW
1634 struct neighbour *neigh;
1635 __u8 neigh_flags = 0;
1636
1bfa26ff
ED
1637 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
1638 if (neigh)
c757faa8 1639 neigh_flags = neigh->flags;
1bfa26ff 1640
c757faa8
WW
1641 if (!(neigh_flags & NTF_ROUTER)) {
1642 RT6_TRACE("purging route %p via non-router but gateway\n",
1643 rt);
1644 rt6_remove_exception(bucket, rt6_ex);
1645 return;
1646 }
1647 }
31afeb42 1648
c757faa8
WW
1649 gc_args->more++;
1650}
1651
1652void rt6_age_exceptions(struct rt6_info *rt,
1653 struct fib6_gc_args *gc_args,
1654 unsigned long now)
1655{
1656 struct rt6_exception_bucket *bucket;
1657 struct rt6_exception *rt6_ex;
1658 struct hlist_node *tmp;
1659 int i;
1660
1661 if (!rcu_access_pointer(rt->rt6i_exception_bucket))
1662 return;
1663
1bfa26ff
ED
1664 rcu_read_lock_bh();
1665 spin_lock(&rt6_exception_lock);
c757faa8
WW
1666 bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
1667 lockdep_is_held(&rt6_exception_lock));
1668
1669 if (bucket) {
1670 for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
1671 hlist_for_each_entry_safe(rt6_ex, tmp,
1672 &bucket->chain, hlist) {
1673 rt6_age_examine_exception(bucket, rt6_ex,
1674 gc_args, now);
1675 }
1676 bucket++;
1677 }
1678 }
1bfa26ff
ED
1679 spin_unlock(&rt6_exception_lock);
1680 rcu_read_unlock_bh();
c757faa8
WW
1681}
1682
9ff74384 1683struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
b75cc8f9
DA
1684 int oif, struct flowi6 *fl6,
1685 const struct sk_buff *skb, int flags)
1da177e4 1686{
367efcb9 1687 struct fib6_node *fn, *saved_fn;
2b760fcf 1688 struct rt6_info *rt, *rt_cache;
c71099ac 1689 int strict = 0;
1da177e4 1690
77d16f45 1691 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1692 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1693 if (net->ipv6.devconf_all->forwarding == 0)
1694 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1695
66f5d6ce 1696 rcu_read_lock();
1da177e4 1697
4c9483b2 1698 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1699 saved_fn = fn;
1da177e4 1700
ca254490
DA
1701 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1702 oif = 0;
1703
a3c00e46 1704redo_rt6_select:
8d1040e8 1705 rt = rt6_select(net, fn, oif, strict);
52bd4c0c 1706 if (rt->rt6i_nsiblings)
b4bac172 1707 rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
a3c00e46
MKL
1708 if (rt == net->ipv6.ip6_null_entry) {
1709 fn = fib6_backtrack(fn, &fl6->saddr);
1710 if (fn)
1711 goto redo_rt6_select;
367efcb9
MKL
1712 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1713 /* also consider unreachable route */
1714 strict &= ~RT6_LOOKUP_F_REACHABLE;
1715 fn = saved_fn;
1716 goto redo_rt6_select;
367efcb9 1717 }
a3c00e46
MKL
1718 }
1719
2b760fcf
WW
1720 /*Search through exception table */
1721 rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
1722 if (rt_cache)
1723 rt = rt_cache;
fb9de91e 1724
d3843fe5 1725 if (rt == net->ipv6.ip6_null_entry) {
66f5d6ce 1726 rcu_read_unlock();
d3843fe5 1727 dst_hold(&rt->dst);
b65f164d 1728 trace_fib6_table_lookup(net, rt, table, fl6);
d3843fe5
WW
1729 return rt;
1730 } else if (rt->rt6i_flags & RTF_CACHE) {
1731 if (ip6_hold_safe(net, &rt, true)) {
1732 dst_use_noref(&rt->dst, jiffies);
1733 rt6_dst_from_metrics_check(rt);
1734 }
66f5d6ce 1735 rcu_read_unlock();
b65f164d 1736 trace_fib6_table_lookup(net, rt, table, fl6);
d52d3997 1737 return rt;
3da59bd9
MKL
1738 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1739 !(rt->rt6i_flags & RTF_GATEWAY))) {
1740 /* Create a RTF_CACHE clone which will not be
1741 * owned by the fib6 tree. It is for the special case where
1742 * the daddr in the skb during the neighbor look-up is different
1743 * from the fl6->daddr used to look-up route here.
1744 */
1745
1746 struct rt6_info *uncached_rt;
1747
d3843fe5
WW
1748 if (ip6_hold_safe(net, &rt, true)) {
1749 dst_use_noref(&rt->dst, jiffies);
1750 } else {
66f5d6ce 1751 rcu_read_unlock();
d3843fe5
WW
1752 uncached_rt = rt;
1753 goto uncached_rt_out;
1754 }
66f5d6ce 1755 rcu_read_unlock();
d52d3997 1756
3da59bd9
MKL
1757 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1758 dst_release(&rt->dst);
c71099ac 1759
1cfb71ee
WW
1760 if (uncached_rt) {
1761 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1762 * No need for another dst_hold()
1763 */
8d0b94af 1764 rt6_uncached_list_add(uncached_rt);
81eb8447 1765 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1cfb71ee 1766 } else {
3da59bd9 1767 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1768 dst_hold(&uncached_rt->dst);
1769 }
b811580d 1770
d3843fe5 1771uncached_rt_out:
b65f164d 1772 trace_fib6_table_lookup(net, uncached_rt, table, fl6);
3da59bd9 1773 return uncached_rt;
3da59bd9 1774
d52d3997
MKL
1775 } else {
1776 /* Get a percpu copy */
1777
1778 struct rt6_info *pcpu_rt;
1779
d3843fe5 1780 dst_use_noref(&rt->dst, jiffies);
951f788a 1781 local_bh_disable();
d52d3997 1782 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1783
951f788a 1784 if (!pcpu_rt) {
a94b9367
WW
1785 /* atomic_inc_not_zero() is needed when using rcu */
1786 if (atomic_inc_not_zero(&rt->rt6i_ref)) {
951f788a 1787 /* No dst_hold() on rt is needed because grabbing
a94b9367
WW
1788 * rt->rt6i_ref makes sure rt can't be released.
1789 */
a94b9367
WW
1790 pcpu_rt = rt6_make_pcpu_route(rt);
1791 rt6_release(rt);
1792 } else {
1793 /* rt is already removed from tree */
a94b9367
WW
1794 pcpu_rt = net->ipv6.ip6_null_entry;
1795 dst_hold(&pcpu_rt->dst);
1796 }
9c7370a1 1797 }
951f788a
ED
1798 local_bh_enable();
1799 rcu_read_unlock();
b65f164d 1800 trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
d52d3997
MKL
1801 return pcpu_rt;
1802 }
1da177e4 1803}
9ff74384 1804EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1805
b75cc8f9
DA
1806static struct rt6_info *ip6_pol_route_input(struct net *net,
1807 struct fib6_table *table,
1808 struct flowi6 *fl6,
1809 const struct sk_buff *skb,
1810 int flags)
4acad72d 1811{
b75cc8f9 1812 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
4acad72d
PE
1813}
1814
d409b847
MB
1815struct dst_entry *ip6_route_input_lookup(struct net *net,
1816 struct net_device *dev,
b75cc8f9
DA
1817 struct flowi6 *fl6,
1818 const struct sk_buff *skb,
1819 int flags)
72331bc0
SL
1820{
1821 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1822 flags |= RT6_LOOKUP_F_IFACE;
1823
b75cc8f9 1824 return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
72331bc0 1825}
d409b847 1826EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1827
23aebdac 1828static void ip6_multipath_l3_keys(const struct sk_buff *skb,
5e5d6fed
RP
1829 struct flow_keys *keys,
1830 struct flow_keys *flkeys)
23aebdac
JS
1831{
1832 const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
1833 const struct ipv6hdr *key_iph = outer_iph;
5e5d6fed 1834 struct flow_keys *_flkeys = flkeys;
23aebdac
JS
1835 const struct ipv6hdr *inner_iph;
1836 const struct icmp6hdr *icmph;
1837 struct ipv6hdr _inner_iph;
cea67a2d 1838 struct icmp6hdr _icmph;
23aebdac
JS
1839
1840 if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6))
1841 goto out;
1842
cea67a2d
ED
1843 icmph = skb_header_pointer(skb, skb_transport_offset(skb),
1844 sizeof(_icmph), &_icmph);
1845 if (!icmph)
1846 goto out;
1847
23aebdac
JS
1848 if (icmph->icmp6_type != ICMPV6_DEST_UNREACH &&
1849 icmph->icmp6_type != ICMPV6_PKT_TOOBIG &&
1850 icmph->icmp6_type != ICMPV6_TIME_EXCEED &&
1851 icmph->icmp6_type != ICMPV6_PARAMPROB)
1852 goto out;
1853
1854 inner_iph = skb_header_pointer(skb,
1855 skb_transport_offset(skb) + sizeof(*icmph),
1856 sizeof(_inner_iph), &_inner_iph);
1857 if (!inner_iph)
1858 goto out;
1859
1860 key_iph = inner_iph;
5e5d6fed 1861 _flkeys = NULL;
23aebdac 1862out:
5e5d6fed
RP
1863 if (_flkeys) {
1864 keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
1865 keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
1866 keys->tags.flow_label = _flkeys->tags.flow_label;
1867 keys->basic.ip_proto = _flkeys->basic.ip_proto;
1868 } else {
1869 keys->addrs.v6addrs.src = key_iph->saddr;
1870 keys->addrs.v6addrs.dst = key_iph->daddr;
1871 keys->tags.flow_label = ip6_flowinfo(key_iph);
1872 keys->basic.ip_proto = key_iph->nexthdr;
1873 }
23aebdac
JS
1874}
1875
1876/* if skb is set it will be used and fl6 can be NULL */
b4bac172
DA
1877u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
1878 const struct sk_buff *skb, struct flow_keys *flkeys)
23aebdac
JS
1879{
1880 struct flow_keys hash_keys;
9a2a537a 1881 u32 mhash;
23aebdac 1882
bbfa047a 1883 switch (ip6_multipath_hash_policy(net)) {
b4bac172
DA
1884 case 0:
1885 memset(&hash_keys, 0, sizeof(hash_keys));
1886 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1887 if (skb) {
1888 ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
1889 } else {
1890 hash_keys.addrs.v6addrs.src = fl6->saddr;
1891 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1892 hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
1893 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1894 }
1895 break;
1896 case 1:
1897 if (skb) {
1898 unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
1899 struct flow_keys keys;
1900
1901 /* short-circuit if we already have L4 hash present */
1902 if (skb->l4_hash)
1903 return skb_get_hash_raw(skb) >> 1;
1904
1905 memset(&hash_keys, 0, sizeof(hash_keys));
1906
1907 if (!flkeys) {
1908 skb_flow_dissect_flow_keys(skb, &keys, flag);
1909 flkeys = &keys;
1910 }
1911 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1912 hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
1913 hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
1914 hash_keys.ports.src = flkeys->ports.src;
1915 hash_keys.ports.dst = flkeys->ports.dst;
1916 hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
1917 } else {
1918 memset(&hash_keys, 0, sizeof(hash_keys));
1919 hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1920 hash_keys.addrs.v6addrs.src = fl6->saddr;
1921 hash_keys.addrs.v6addrs.dst = fl6->daddr;
1922 hash_keys.ports.src = fl6->fl6_sport;
1923 hash_keys.ports.dst = fl6->fl6_dport;
1924 hash_keys.basic.ip_proto = fl6->flowi6_proto;
1925 }
1926 break;
23aebdac 1927 }
9a2a537a 1928 mhash = flow_hash_from_keys(&hash_keys);
23aebdac 1929
9a2a537a 1930 return mhash >> 1;
23aebdac
JS
1931}
1932
c71099ac
TG
1933void ip6_route_input(struct sk_buff *skb)
1934{
b71d1d42 1935 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1936 struct net *net = dev_net(skb->dev);
adaa70bb 1937 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1938 struct ip_tunnel_info *tun_info;
4c9483b2 1939 struct flowi6 fl6 = {
e0d56fdd 1940 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1941 .daddr = iph->daddr,
1942 .saddr = iph->saddr,
6502ca52 1943 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1944 .flowi6_mark = skb->mark,
1945 .flowi6_proto = iph->nexthdr,
c71099ac 1946 };
5e5d6fed 1947 struct flow_keys *flkeys = NULL, _flkeys;
adaa70bb 1948
904af04d 1949 tun_info = skb_tunnel_info(skb);
46fa062a 1950 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1951 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
5e5d6fed
RP
1952
1953 if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
1954 flkeys = &_flkeys;
1955
23aebdac 1956 if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
b4bac172 1957 fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
06e9d040 1958 skb_dst_drop(skb);
b75cc8f9
DA
1959 skb_dst_set(skb,
1960 ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
c71099ac
TG
1961}
1962
b75cc8f9
DA
1963static struct rt6_info *ip6_pol_route_output(struct net *net,
1964 struct fib6_table *table,
1965 struct flowi6 *fl6,
1966 const struct sk_buff *skb,
1967 int flags)
1da177e4 1968{
b75cc8f9 1969 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
c71099ac
TG
1970}
1971
6f21c96a
PA
1972struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1973 struct flowi6 *fl6, int flags)
c71099ac 1974{
d46a9d67 1975 bool any_src;
c71099ac 1976
4c1feac5
DA
1977 if (rt6_need_strict(&fl6->daddr)) {
1978 struct dst_entry *dst;
1979
1980 dst = l3mdev_link_scope_lookup(net, fl6);
1981 if (dst)
1982 return dst;
1983 }
ca254490 1984
1fb9489b 1985 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1986
d46a9d67 1987 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1988 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1989 (fl6->flowi6_oif && any_src))
77d16f45 1990 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1991
d46a9d67 1992 if (!any_src)
adaa70bb 1993 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1994 else if (sk)
1995 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1996
b75cc8f9 1997 return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
1da177e4 1998}
6f21c96a 1999EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 2000
2774c131 2001struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 2002{
5c1e6aa3 2003 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 2004 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
2005 struct dst_entry *new = NULL;
2006
1dbe3252 2007 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
62cf27e5 2008 DST_OBSOLETE_DEAD, 0);
14e50e57 2009 if (rt) {
0a1f5962 2010 rt6_info_init(rt);
81eb8447 2011 atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
8104891b 2012
0a1f5962 2013 new = &rt->dst;
14e50e57 2014 new->__use = 1;
352e512c 2015 new->input = dst_discard;
ede2059d 2016 new->output = dst_discard_out;
14e50e57 2017
0a1f5962 2018 dst_copy_metrics(new, &ort->dst);
14e50e57 2019
1dbe3252 2020 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 2021 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 2022 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
2023 rt->rt6i_metric = 0;
2024
2025 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
2026#ifdef CONFIG_IPV6_SUBTREES
2027 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
2028#endif
14e50e57
DM
2029 }
2030
69ead7af
DM
2031 dst_release(dst_orig);
2032 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 2033}
14e50e57 2034
1da177e4
LT
2035/*
2036 * Destination cache support functions
2037 */
2038
4b32b5ad
MKL
2039static void rt6_dst_from_metrics_check(struct rt6_info *rt)
2040{
3a2232e9
DM
2041 if (rt->from &&
2042 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
2043 dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
4b32b5ad
MKL
2044}
2045
3da59bd9
MKL
2046static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
2047{
36143645 2048 u32 rt_cookie = 0;
c5cff856
WW
2049
2050 if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
3da59bd9
MKL
2051 return NULL;
2052
2053 if (rt6_check_expired(rt))
2054 return NULL;
2055
2056 return &rt->dst;
2057}
2058
2059static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
2060{
5973fb1e
MKL
2061 if (!__rt6_check_expired(rt) &&
2062 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3a2232e9 2063 rt6_check(rt->from, cookie))
3da59bd9
MKL
2064 return &rt->dst;
2065 else
2066 return NULL;
2067}
2068
1da177e4
LT
2069static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
2070{
2071 struct rt6_info *rt;
2072
2073 rt = (struct rt6_info *) dst;
2074
6f3118b5
ND
2075 /* All IPV6 dsts are created with ->obsolete set to the value
2076 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
2077 * into this function always.
2078 */
e3bc10bd 2079
4b32b5ad
MKL
2080 rt6_dst_from_metrics_check(rt);
2081
02bcf4e0 2082 if (rt->rt6i_flags & RTF_PCPU ||
3a2232e9 2083 (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
3da59bd9
MKL
2084 return rt6_dst_from_check(rt, cookie);
2085 else
2086 return rt6_check(rt, cookie);
1da177e4
LT
2087}
2088
2089static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
2090{
2091 struct rt6_info *rt = (struct rt6_info *) dst;
2092
2093 if (rt) {
54c1a859
YH
2094 if (rt->rt6i_flags & RTF_CACHE) {
2095 if (rt6_check_expired(rt)) {
2096 ip6_del_rt(rt);
2097 dst = NULL;
2098 }
2099 } else {
1da177e4 2100 dst_release(dst);
54c1a859
YH
2101 dst = NULL;
2102 }
1da177e4 2103 }
54c1a859 2104 return dst;
1da177e4
LT
2105}
2106
2107static void ip6_link_failure(struct sk_buff *skb)
2108{
2109 struct rt6_info *rt;
2110
3ffe533c 2111 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 2112
adf30907 2113 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 2114 if (rt) {
1eb4f758 2115 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
2116 if (dst_hold_safe(&rt->dst))
2117 ip6_del_rt(rt);
c5cff856
WW
2118 } else {
2119 struct fib6_node *fn;
2120
2121 rcu_read_lock();
2122 fn = rcu_dereference(rt->rt6i_node);
2123 if (fn && (rt->rt6i_flags & RTF_DEFAULT))
2124 fn->fn_sernum = -1;
2125 rcu_read_unlock();
1eb4f758 2126 }
1da177e4
LT
2127 }
2128}
2129
45e4fd26
MKL
2130static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
2131{
2132 struct net *net = dev_net(rt->dst.dev);
2133
2134 rt->rt6i_flags |= RTF_MODIFIED;
2135 rt->rt6i_pmtu = mtu;
2136 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
2137}
2138
0d3f6d29
MKL
2139static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
2140{
2141 return !(rt->rt6i_flags & RTF_CACHE) &&
4e587ea7
WW
2142 (rt->rt6i_flags & RTF_PCPU ||
2143 rcu_access_pointer(rt->rt6i_node));
0d3f6d29
MKL
2144}
2145
45e4fd26
MKL
2146static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
2147 const struct ipv6hdr *iph, u32 mtu)
1da177e4 2148{
0dec879f 2149 const struct in6_addr *daddr, *saddr;
67ba4152 2150 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 2151
45e4fd26
MKL
2152 if (rt6->rt6i_flags & RTF_LOCAL)
2153 return;
81aded24 2154
19bda36c
XL
2155 if (dst_metric_locked(dst, RTAX_MTU))
2156 return;
2157
0dec879f
JA
2158 if (iph) {
2159 daddr = &iph->daddr;
2160 saddr = &iph->saddr;
2161 } else if (sk) {
2162 daddr = &sk->sk_v6_daddr;
2163 saddr = &inet6_sk(sk)->saddr;
2164 } else {
2165 daddr = NULL;
2166 saddr = NULL;
2167 }
2168 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
2169 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
2170 if (mtu >= dst_mtu(dst))
2171 return;
9d289715 2172
0d3f6d29 2173 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 2174 rt6_do_update_pmtu(rt6, mtu);
2b760fcf
WW
2175 /* update rt6_ex->stamp for cache */
2176 if (rt6->rt6i_flags & RTF_CACHE)
2177 rt6_update_exception_stamp_rt(rt6);
0dec879f 2178 } else if (daddr) {
45e4fd26
MKL
2179 struct rt6_info *nrt6;
2180
45e4fd26
MKL
2181 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
2182 if (nrt6) {
2183 rt6_do_update_pmtu(nrt6, mtu);
2b760fcf
WW
2184 if (rt6_insert_exception(nrt6, rt6))
2185 dst_release_immediate(&nrt6->dst);
45e4fd26 2186 }
1da177e4
LT
2187 }
2188}
2189
45e4fd26
MKL
2190static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
2191 struct sk_buff *skb, u32 mtu)
2192{
2193 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
2194}
2195
42ae66c8 2196void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 2197 int oif, u32 mark, kuid_t uid)
81aded24
DM
2198{
2199 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2200 struct dst_entry *dst;
2201 struct flowi6 fl6;
2202
2203 memset(&fl6, 0, sizeof(fl6));
2204 fl6.flowi6_oif = oif;
1b3c61dc 2205 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
2206 fl6.daddr = iph->daddr;
2207 fl6.saddr = iph->saddr;
6502ca52 2208 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2209 fl6.flowi6_uid = uid;
81aded24
DM
2210
2211 dst = ip6_route_output(net, NULL, &fl6);
2212 if (!dst->error)
45e4fd26 2213 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
2214 dst_release(dst);
2215}
2216EXPORT_SYMBOL_GPL(ip6_update_pmtu);
2217
2218void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
2219{
33c162a9
MKL
2220 struct dst_entry *dst;
2221
81aded24 2222 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 2223 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
2224
2225 dst = __sk_dst_get(sk);
2226 if (!dst || !dst->obsolete ||
2227 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
2228 return;
2229
2230 bh_lock_sock(sk);
2231 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
2232 ip6_datagram_dst_update(sk, false);
2233 bh_unlock_sock(sk);
81aded24
DM
2234}
2235EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
2236
7d6850f7
AK
2237void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
2238 const struct flowi6 *fl6)
2239{
2240#ifdef CONFIG_IPV6_SUBTREES
2241 struct ipv6_pinfo *np = inet6_sk(sk);
2242#endif
2243
2244 ip6_dst_store(sk, dst,
2245 ipv6_addr_equal(&fl6->daddr, &sk->sk_v6_daddr) ?
2246 &sk->sk_v6_daddr : NULL,
2247#ifdef CONFIG_IPV6_SUBTREES
2248 ipv6_addr_equal(&fl6->saddr, &np->saddr) ?
2249 &np->saddr :
2250#endif
2251 NULL);
2252}
2253
b55b76b2
DJ
2254/* Handle redirects */
2255struct ip6rd_flowi {
2256 struct flowi6 fl6;
2257 struct in6_addr gateway;
2258};
2259
2260static struct rt6_info *__ip6_route_redirect(struct net *net,
2261 struct fib6_table *table,
2262 struct flowi6 *fl6,
b75cc8f9 2263 const struct sk_buff *skb,
b55b76b2
DJ
2264 int flags)
2265{
2266 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
2b760fcf 2267 struct rt6_info *rt, *rt_cache;
b55b76b2
DJ
2268 struct fib6_node *fn;
2269
2270 /* Get the "current" route for this destination and
67c408cf 2271 * check if the redirect has come from appropriate router.
b55b76b2
DJ
2272 *
2273 * RFC 4861 specifies that redirects should only be
2274 * accepted if they come from the nexthop to the target.
2275 * Due to the way the routes are chosen, this notion
2276 * is a bit fuzzy and one might need to check all possible
2277 * routes.
2278 */
2279
66f5d6ce 2280 rcu_read_lock();
b55b76b2
DJ
2281 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
2282restart:
66f5d6ce 2283 for_each_fib6_node_rt_rcu(fn) {
8067bb8c
IS
2284 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
2285 continue;
b55b76b2
DJ
2286 if (rt6_check_expired(rt))
2287 continue;
2288 if (rt->dst.error)
2289 break;
2290 if (!(rt->rt6i_flags & RTF_GATEWAY))
2291 continue;
2292 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
2293 continue;
2b760fcf
WW
2294 /* rt_cache's gateway might be different from its 'parent'
2295 * in the case of an ip redirect.
2296 * So we keep searching in the exception table if the gateway
2297 * is different.
2298 */
2299 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
2300 rt_cache = rt6_find_cached_rt(rt,
2301 &fl6->daddr,
2302 &fl6->saddr);
2303 if (rt_cache &&
2304 ipv6_addr_equal(&rdfl->gateway,
2305 &rt_cache->rt6i_gateway)) {
2306 rt = rt_cache;
2307 break;
2308 }
b55b76b2 2309 continue;
2b760fcf 2310 }
b55b76b2
DJ
2311 break;
2312 }
2313
2314 if (!rt)
2315 rt = net->ipv6.ip6_null_entry;
2316 else if (rt->dst.error) {
2317 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
2318 goto out;
2319 }
2320
2321 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
2322 fn = fib6_backtrack(fn, &fl6->saddr);
2323 if (fn)
2324 goto restart;
b55b76b2 2325 }
a3c00e46 2326
b0a1ba59 2327out:
d3843fe5 2328 ip6_hold_safe(net, &rt, true);
b55b76b2 2329
66f5d6ce 2330 rcu_read_unlock();
b55b76b2 2331
b65f164d 2332 trace_fib6_table_lookup(net, rt, table, fl6);
b55b76b2
DJ
2333 return rt;
2334};
2335
2336static struct dst_entry *ip6_route_redirect(struct net *net,
b75cc8f9
DA
2337 const struct flowi6 *fl6,
2338 const struct sk_buff *skb,
2339 const struct in6_addr *gateway)
b55b76b2
DJ
2340{
2341 int flags = RT6_LOOKUP_F_HAS_SADDR;
2342 struct ip6rd_flowi rdfl;
2343
2344 rdfl.fl6 = *fl6;
2345 rdfl.gateway = *gateway;
2346
b75cc8f9 2347 return fib6_rule_lookup(net, &rdfl.fl6, skb,
b55b76b2
DJ
2348 flags, __ip6_route_redirect);
2349}
2350
e2d118a1
LC
2351void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
2352 kuid_t uid)
3a5ad2ee
DM
2353{
2354 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
2355 struct dst_entry *dst;
2356 struct flowi6 fl6;
2357
2358 memset(&fl6, 0, sizeof(fl6));
e374c618 2359 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
2360 fl6.flowi6_oif = oif;
2361 fl6.flowi6_mark = mark;
3a5ad2ee
DM
2362 fl6.daddr = iph->daddr;
2363 fl6.saddr = iph->saddr;
6502ca52 2364 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 2365 fl6.flowi6_uid = uid;
3a5ad2ee 2366
b75cc8f9 2367 dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
b55b76b2 2368 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
2369 dst_release(dst);
2370}
2371EXPORT_SYMBOL_GPL(ip6_redirect);
2372
c92a59ec
DJ
2373void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
2374 u32 mark)
2375{
2376 const struct ipv6hdr *iph = ipv6_hdr(skb);
2377 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
2378 struct dst_entry *dst;
2379 struct flowi6 fl6;
2380
2381 memset(&fl6, 0, sizeof(fl6));
e374c618 2382 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
2383 fl6.flowi6_oif = oif;
2384 fl6.flowi6_mark = mark;
c92a59ec
DJ
2385 fl6.daddr = msg->dest;
2386 fl6.saddr = iph->daddr;
e2d118a1 2387 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 2388
b75cc8f9 2389 dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
b55b76b2 2390 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
2391 dst_release(dst);
2392}
2393
3a5ad2ee
DM
2394void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
2395{
e2d118a1
LC
2396 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
2397 sk->sk_uid);
3a5ad2ee
DM
2398}
2399EXPORT_SYMBOL_GPL(ip6_sk_redirect);
2400
0dbaee3b 2401static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 2402{
0dbaee3b
DM
2403 struct net_device *dev = dst->dev;
2404 unsigned int mtu = dst_mtu(dst);
2405 struct net *net = dev_net(dev);
2406
1da177e4
LT
2407 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
2408
5578689a
DL
2409 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
2410 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
2411
2412 /*
1ab1457c
YH
2413 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
2414 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
2415 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
2416 * rely only on pmtu discovery"
2417 */
2418 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
2419 mtu = IPV6_MAXPLEN;
2420 return mtu;
2421}
2422
ebb762f2 2423static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 2424{
4b32b5ad
MKL
2425 const struct rt6_info *rt = (const struct rt6_info *)dst;
2426 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 2427 struct inet6_dev *idev;
618f9bc7 2428
4b32b5ad
MKL
2429 if (mtu)
2430 goto out;
2431
2432 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 2433 if (mtu)
30f78d8e 2434 goto out;
618f9bc7
SK
2435
2436 mtu = IPV6_MIN_MTU;
d33e4553
DM
2437
2438 rcu_read_lock();
2439 idev = __in6_dev_get(dst->dev);
2440 if (idev)
2441 mtu = idev->cnf.mtu6;
2442 rcu_read_unlock();
2443
30f78d8e 2444out:
14972cbd
RP
2445 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
2446
2447 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
2448}
2449
3b00944c 2450struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 2451 struct flowi6 *fl6)
1da177e4 2452{
87a11578 2453 struct dst_entry *dst;
1da177e4
LT
2454 struct rt6_info *rt;
2455 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 2456 struct net *net = dev_net(dev);
1da177e4 2457
38308473 2458 if (unlikely(!idev))
122bdf67 2459 return ERR_PTR(-ENODEV);
1da177e4 2460
ad706862 2461 rt = ip6_dst_alloc(net, dev, 0);
38308473 2462 if (unlikely(!rt)) {
1da177e4 2463 in6_dev_put(idev);
87a11578 2464 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
2465 goto out;
2466 }
2467
8e2ec639 2468 rt->dst.flags |= DST_HOST;
588753f1 2469 rt->dst.input = ip6_input;
8e2ec639 2470 rt->dst.output = ip6_output;
550bab42 2471 rt->rt6i_gateway = fl6->daddr;
87a11578 2472 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
2473 rt->rt6i_dst.plen = 128;
2474 rt->rt6i_idev = idev;
14edd87d 2475 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 2476
4c981e28 2477 /* Add this dst into uncached_list so that rt6_disable_ip() can
587fea74
WW
2478 * do proper release of the net_device
2479 */
2480 rt6_uncached_list_add(rt);
81eb8447 2481 atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
1da177e4 2482
87a11578
DM
2483 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
2484
1da177e4 2485out:
87a11578 2486 return dst;
1da177e4
LT
2487}
2488
569d3645 2489static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 2490{
86393e52 2491 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
2492 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
2493 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
2494 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
2495 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
2496 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 2497 int entries;
7019b78e 2498
fc66f95c 2499 entries = dst_entries_get_fast(ops);
49a18d86 2500 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 2501 entries <= rt_max_size)
1da177e4
LT
2502 goto out;
2503
6891a346 2504 net->ipv6.ip6_rt_gc_expire++;
14956643 2505 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
2506 entries = dst_entries_get_slow(ops);
2507 if (entries < ops->gc_thresh)
7019b78e 2508 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 2509out:
7019b78e 2510 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 2511 return entries > rt_max_size;
1da177e4
LT
2512}
2513
e715b6d3
FW
2514static int ip6_convert_metrics(struct mx6_config *mxc,
2515 const struct fib6_config *cfg)
2516{
6670e152 2517 struct net *net = cfg->fc_nlinfo.nl_net;
c3a8d947 2518 bool ecn_ca = false;
e715b6d3
FW
2519 struct nlattr *nla;
2520 int remaining;
2521 u32 *mp;
2522
63159f29 2523 if (!cfg->fc_mx)
e715b6d3
FW
2524 return 0;
2525
2526 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
2527 if (unlikely(!mp))
2528 return -ENOMEM;
2529
2530 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
2531 int type = nla_type(nla);
1bb14807 2532 u32 val;
e715b6d3 2533
1bb14807
DB
2534 if (!type)
2535 continue;
2536 if (unlikely(type > RTAX_MAX))
2537 goto err;
ea697639 2538
1bb14807
DB
2539 if (type == RTAX_CC_ALGO) {
2540 char tmp[TCP_CA_NAME_MAX];
e715b6d3 2541
1bb14807 2542 nla_strlcpy(tmp, nla, sizeof(tmp));
6670e152 2543 val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca);
1bb14807
DB
2544 if (val == TCP_CA_UNSPEC)
2545 goto err;
2546 } else {
2547 val = nla_get_u32(nla);
e715b6d3 2548 }
626abd59
PA
2549 if (type == RTAX_HOPLIMIT && val > 255)
2550 val = 255;
b8d3e416
DB
2551 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
2552 goto err;
1bb14807
DB
2553
2554 mp[type - 1] = val;
2555 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
2556 }
2557
c3a8d947
DB
2558 if (ecn_ca) {
2559 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
2560 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
2561 }
e715b6d3 2562
c3a8d947 2563 mxc->mx = mp;
e715b6d3
FW
2564 return 0;
2565 err:
2566 kfree(mp);
2567 return -EINVAL;
2568}
1da177e4 2569
8c14586f
DA
2570static struct rt6_info *ip6_nh_lookup_table(struct net *net,
2571 struct fib6_config *cfg,
f4797b33
DA
2572 const struct in6_addr *gw_addr,
2573 u32 tbid, int flags)
8c14586f
DA
2574{
2575 struct flowi6 fl6 = {
2576 .flowi6_oif = cfg->fc_ifindex,
2577 .daddr = *gw_addr,
2578 .saddr = cfg->fc_prefsrc,
2579 };
2580 struct fib6_table *table;
2581 struct rt6_info *rt;
8c14586f 2582
f4797b33 2583 table = fib6_get_table(net, tbid);
8c14586f
DA
2584 if (!table)
2585 return NULL;
2586
2587 if (!ipv6_addr_any(&cfg->fc_prefsrc))
2588 flags |= RT6_LOOKUP_F_HAS_SADDR;
2589
f4797b33 2590 flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
b75cc8f9 2591 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
8c14586f
DA
2592
2593 /* if table lookup failed, fall back to full lookup */
2594 if (rt == net->ipv6.ip6_null_entry) {
2595 ip6_rt_put(rt);
2596 rt = NULL;
2597 }
2598
2599 return rt;
2600}
2601
fc1e64e1
DA
2602static int ip6_route_check_nh_onlink(struct net *net,
2603 struct fib6_config *cfg,
9fbb704c 2604 const struct net_device *dev,
fc1e64e1
DA
2605 struct netlink_ext_ack *extack)
2606{
44750f84 2607 u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
fc1e64e1
DA
2608 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2609 u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
2610 struct rt6_info *grt;
2611 int err;
2612
2613 err = 0;
2614 grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
2615 if (grt) {
58e354c0
DA
2616 if (!grt->dst.error &&
2617 (grt->rt6i_flags & flags || dev != grt->dst.dev)) {
44750f84
DA
2618 NL_SET_ERR_MSG(extack,
2619 "Nexthop has invalid gateway or device mismatch");
fc1e64e1
DA
2620 err = -EINVAL;
2621 }
2622
2623 ip6_rt_put(grt);
2624 }
2625
2626 return err;
2627}
2628
1edce99f
DA
2629static int ip6_route_check_nh(struct net *net,
2630 struct fib6_config *cfg,
2631 struct net_device **_dev,
2632 struct inet6_dev **idev)
2633{
2634 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2635 struct net_device *dev = _dev ? *_dev : NULL;
2636 struct rt6_info *grt = NULL;
2637 int err = -EHOSTUNREACH;
2638
2639 if (cfg->fc_table) {
f4797b33
DA
2640 int flags = RT6_LOOKUP_F_IFACE;
2641
2642 grt = ip6_nh_lookup_table(net, cfg, gw_addr,
2643 cfg->fc_table, flags);
1edce99f
DA
2644 if (grt) {
2645 if (grt->rt6i_flags & RTF_GATEWAY ||
2646 (dev && dev != grt->dst.dev)) {
2647 ip6_rt_put(grt);
2648 grt = NULL;
2649 }
2650 }
2651 }
2652
2653 if (!grt)
b75cc8f9 2654 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
1edce99f
DA
2655
2656 if (!grt)
2657 goto out;
2658
2659 if (dev) {
2660 if (dev != grt->dst.dev) {
2661 ip6_rt_put(grt);
2662 goto out;
2663 }
2664 } else {
2665 *_dev = dev = grt->dst.dev;
2666 *idev = grt->rt6i_idev;
2667 dev_hold(dev);
2668 in6_dev_hold(grt->rt6i_idev);
2669 }
2670
2671 if (!(grt->rt6i_flags & RTF_GATEWAY))
2672 err = 0;
2673
2674 ip6_rt_put(grt);
2675
2676out:
2677 return err;
2678}
2679
9fbb704c
DA
2680static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
2681 struct net_device **_dev, struct inet6_dev **idev,
2682 struct netlink_ext_ack *extack)
2683{
2684 const struct in6_addr *gw_addr = &cfg->fc_gateway;
2685 int gwa_type = ipv6_addr_type(gw_addr);
232378e8 2686 bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
9fbb704c 2687 const struct net_device *dev = *_dev;
232378e8 2688 bool need_addr_check = !dev;
9fbb704c
DA
2689 int err = -EINVAL;
2690
2691 /* if gw_addr is local we will fail to detect this in case
2692 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2693 * will return already-added prefix route via interface that
2694 * prefix route was assigned to, which might be non-loopback.
2695 */
232378e8
DA
2696 if (dev &&
2697 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2698 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
9fbb704c
DA
2699 goto out;
2700 }
2701
2702 if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
2703 /* IPv6 strictly inhibits using not link-local
2704 * addresses as nexthop address.
2705 * Otherwise, router will not able to send redirects.
2706 * It is very good, but in some (rare!) circumstances
2707 * (SIT, PtP, NBMA NOARP links) it is handy to allow
2708 * some exceptions. --ANK
2709 * We allow IPv4-mapped nexthops to support RFC4798-type
2710 * addressing
2711 */
2712 if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
2713 NL_SET_ERR_MSG(extack, "Invalid gateway address");
2714 goto out;
2715 }
2716
2717 if (cfg->fc_flags & RTNH_F_ONLINK)
2718 err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
2719 else
2720 err = ip6_route_check_nh(net, cfg, _dev, idev);
2721
2722 if (err)
2723 goto out;
2724 }
2725
2726 /* reload in case device was changed */
2727 dev = *_dev;
2728
2729 err = -EINVAL;
2730 if (!dev) {
2731 NL_SET_ERR_MSG(extack, "Egress device not specified");
2732 goto out;
2733 } else if (dev->flags & IFF_LOOPBACK) {
2734 NL_SET_ERR_MSG(extack,
2735 "Egress device can not be loopback device for this route");
2736 goto out;
2737 }
232378e8
DA
2738
2739 /* if we did not check gw_addr above, do so now that the
2740 * egress device has been resolved.
2741 */
2742 if (need_addr_check &&
2743 ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
2744 NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
2745 goto out;
2746 }
2747
9fbb704c
DA
2748 err = 0;
2749out:
2750 return err;
2751}
2752
333c4301
DA
2753static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
2754 struct netlink_ext_ack *extack)
1da177e4 2755{
5578689a 2756 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
2757 struct rt6_info *rt = NULL;
2758 struct net_device *dev = NULL;
2759 struct inet6_dev *idev = NULL;
c71099ac 2760 struct fib6_table *table;
1da177e4 2761 int addr_type;
8c5b83f0 2762 int err = -EINVAL;
1da177e4 2763
557c44be 2764 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
2765 if (cfg->fc_flags & RTF_PCPU) {
2766 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 2767 goto out;
d5d531cb 2768 }
557c44be 2769
2ea2352e
WW
2770 /* RTF_CACHE is an internal flag; can not be set by userspace */
2771 if (cfg->fc_flags & RTF_CACHE) {
2772 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE");
2773 goto out;
2774 }
2775
d5d531cb
DA
2776 if (cfg->fc_dst_len > 128) {
2777 NL_SET_ERR_MSG(extack, "Invalid prefix length");
2778 goto out;
2779 }
2780 if (cfg->fc_src_len > 128) {
2781 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 2782 goto out;
d5d531cb 2783 }
1da177e4 2784#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
2785 if (cfg->fc_src_len) {
2786 NL_SET_ERR_MSG(extack,
2787 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 2788 goto out;
d5d531cb 2789 }
1da177e4 2790#endif
86872cb5 2791 if (cfg->fc_ifindex) {
1da177e4 2792 err = -ENODEV;
5578689a 2793 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
2794 if (!dev)
2795 goto out;
2796 idev = in6_dev_get(dev);
2797 if (!idev)
2798 goto out;
2799 }
2800
86872cb5
TG
2801 if (cfg->fc_metric == 0)
2802 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 2803
fc1e64e1
DA
2804 if (cfg->fc_flags & RTNH_F_ONLINK) {
2805 if (!dev) {
2806 NL_SET_ERR_MSG(extack,
2807 "Nexthop device required for onlink");
2808 err = -ENODEV;
2809 goto out;
2810 }
2811
2812 if (!(dev->flags & IFF_UP)) {
2813 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2814 err = -ENETDOWN;
2815 goto out;
2816 }
2817 }
2818
d71314b4 2819 err = -ENOBUFS;
38308473
DM
2820 if (cfg->fc_nlinfo.nlh &&
2821 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 2822 table = fib6_get_table(net, cfg->fc_table);
38308473 2823 if (!table) {
f3213831 2824 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
2825 table = fib6_new_table(net, cfg->fc_table);
2826 }
2827 } else {
2828 table = fib6_new_table(net, cfg->fc_table);
2829 }
38308473
DM
2830
2831 if (!table)
c71099ac 2832 goto out;
c71099ac 2833
ad706862
MKL
2834 rt = ip6_dst_alloc(net, NULL,
2835 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 2836
38308473 2837 if (!rt) {
1da177e4
LT
2838 err = -ENOMEM;
2839 goto out;
2840 }
2841
1716a961
G
2842 if (cfg->fc_flags & RTF_EXPIRES)
2843 rt6_set_expires(rt, jiffies +
2844 clock_t_to_jiffies(cfg->fc_expires));
2845 else
2846 rt6_clean_expires(rt);
1da177e4 2847
86872cb5
TG
2848 if (cfg->fc_protocol == RTPROT_UNSPEC)
2849 cfg->fc_protocol = RTPROT_BOOT;
2850 rt->rt6i_protocol = cfg->fc_protocol;
2851
2852 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
2853
2854 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 2855 rt->dst.input = ip6_mc_input;
ab79ad14
2856 else if (cfg->fc_flags & RTF_LOCAL)
2857 rt->dst.input = ip6_input;
1da177e4 2858 else
d8d1f30b 2859 rt->dst.input = ip6_forward;
1da177e4 2860
d8d1f30b 2861 rt->dst.output = ip6_output;
1da177e4 2862
19e42e45
RP
2863 if (cfg->fc_encap) {
2864 struct lwtunnel_state *lwtstate;
2865
30357d7d 2866 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 2867 cfg->fc_encap, AF_INET6, cfg,
9ae28727 2868 &lwtstate, extack);
19e42e45
RP
2869 if (err)
2870 goto out;
61adedf3 2871 rt->dst.lwtstate = lwtstate_get(lwtstate);
9942895b 2872 lwtunnel_set_redirect(&rt->dst);
19e42e45
RP
2873 }
2874
86872cb5
TG
2875 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
2876 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 2877 if (rt->rt6i_dst.plen == 128)
e5fd387a 2878 rt->dst.flags |= DST_HOST;
e5fd387a 2879
1da177e4 2880#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
2881 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
2882 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
2883#endif
2884
86872cb5 2885 rt->rt6i_metric = cfg->fc_metric;
398958ae 2886 rt->rt6i_nh_weight = 1;
1da177e4
LT
2887
2888 /* We cannot add true routes via loopback here,
2889 they would result in kernel looping; promote them to reject routes
2890 */
86872cb5 2891 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
2892 (dev && (dev->flags & IFF_LOOPBACK) &&
2893 !(addr_type & IPV6_ADDR_LOOPBACK) &&
2894 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 2895 /* hold loopback dev/idev if we haven't done so. */
5578689a 2896 if (dev != net->loopback_dev) {
1da177e4
LT
2897 if (dev) {
2898 dev_put(dev);
2899 in6_dev_put(idev);
2900 }
5578689a 2901 dev = net->loopback_dev;
1da177e4
LT
2902 dev_hold(dev);
2903 idev = in6_dev_get(dev);
2904 if (!idev) {
2905 err = -ENODEV;
2906 goto out;
2907 }
2908 }
1da177e4 2909 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2910 switch (cfg->fc_type) {
2911 case RTN_BLACKHOLE:
2912 rt->dst.error = -EINVAL;
ede2059d 2913 rt->dst.output = dst_discard_out;
7150aede 2914 rt->dst.input = dst_discard;
ef2c7d7b
ND
2915 break;
2916 case RTN_PROHIBIT:
2917 rt->dst.error = -EACCES;
7150aede
K
2918 rt->dst.output = ip6_pkt_prohibit_out;
2919 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2920 break;
b4949ab2 2921 case RTN_THROW:
0315e382 2922 case RTN_UNREACHABLE:
ef2c7d7b 2923 default:
7150aede 2924 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2925 : (cfg->fc_type == RTN_UNREACHABLE)
2926 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2927 rt->dst.output = ip6_pkt_discard_out;
2928 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2929 break;
2930 }
1da177e4
LT
2931 goto install_route;
2932 }
2933
86872cb5 2934 if (cfg->fc_flags & RTF_GATEWAY) {
9fbb704c
DA
2935 err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
2936 if (err)
48ed7b26 2937 goto out;
1da177e4 2938
9fbb704c 2939 rt->rt6i_gateway = cfg->fc_gateway;
1da177e4
LT
2940 }
2941
2942 err = -ENODEV;
38308473 2943 if (!dev)
1da177e4
LT
2944 goto out;
2945
428604fb
LB
2946 if (idev->cnf.disable_ipv6) {
2947 NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
2948 err = -EACCES;
2949 goto out;
2950 }
2951
955ec4cb
DA
2952 if (!(dev->flags & IFF_UP)) {
2953 NL_SET_ERR_MSG(extack, "Nexthop device is not up");
2954 err = -ENETDOWN;
2955 goto out;
2956 }
2957
c3968a85
DW
2958 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2959 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2960 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2961 err = -EINVAL;
2962 goto out;
2963 }
4e3fd7a0 2964 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2965 rt->rt6i_prefsrc.plen = 128;
2966 } else
2967 rt->rt6i_prefsrc.plen = 0;
2968
86872cb5 2969 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2970
2971install_route:
5609b80a
IS
2972 if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
2973 !netif_carrier_ok(dev))
2974 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
fc1e64e1 2975 rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
d8d1f30b 2976 rt->dst.dev = dev;
1da177e4 2977 rt->rt6i_idev = idev;
c71099ac 2978 rt->rt6i_table = table;
63152fc0 2979
c346dca1 2980 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2981
8c5b83f0 2982 return rt;
6b9ea5a6
RP
2983out:
2984 if (dev)
2985 dev_put(dev);
2986 if (idev)
2987 in6_dev_put(idev);
587fea74
WW
2988 if (rt)
2989 dst_release_immediate(&rt->dst);
6b9ea5a6 2990
8c5b83f0 2991 return ERR_PTR(err);
6b9ea5a6
RP
2992}
2993
333c4301
DA
2994int ip6_route_add(struct fib6_config *cfg,
2995 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2996{
2997 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2998 struct rt6_info *rt;
6b9ea5a6
RP
2999 int err;
3000
333c4301 3001 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
3002 if (IS_ERR(rt)) {
3003 err = PTR_ERR(rt);
3004 rt = NULL;
6b9ea5a6 3005 goto out;
8c5b83f0 3006 }
6b9ea5a6 3007
e715b6d3
FW
3008 err = ip6_convert_metrics(&mxc, cfg);
3009 if (err)
3010 goto out;
1da177e4 3011
333c4301 3012 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
3013
3014 kfree(mxc.mx);
6b9ea5a6 3015
e715b6d3 3016 return err;
1da177e4 3017out:
587fea74
WW
3018 if (rt)
3019 dst_release_immediate(&rt->dst);
6b9ea5a6 3020
1da177e4
LT
3021 return err;
3022}
3023
86872cb5 3024static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
3025{
3026 int err;
c71099ac 3027 struct fib6_table *table;
d1918542 3028 struct net *net = dev_net(rt->dst.dev);
1da177e4 3029
a4c2fd7f 3030 if (rt == net->ipv6.ip6_null_entry) {
6825a26c
G
3031 err = -ENOENT;
3032 goto out;
3033 }
6c813a72 3034
c71099ac 3035 table = rt->rt6i_table;
66f5d6ce 3036 spin_lock_bh(&table->tb6_lock);
86872cb5 3037 err = fib6_del(rt, info);
66f5d6ce 3038 spin_unlock_bh(&table->tb6_lock);
1da177e4 3039
6825a26c 3040out:
94e187c0 3041 ip6_rt_put(rt);
1da177e4
LT
3042 return err;
3043}
3044
e0a1ad73
TG
3045int ip6_del_rt(struct rt6_info *rt)
3046{
4d1169c1 3047 struct nl_info info = {
d1918542 3048 .nl_net = dev_net(rt->dst.dev),
4d1169c1 3049 };
528c4ceb 3050 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
3051}
3052
0ae81335
DA
3053static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
3054{
3055 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 3056 struct net *net = info->nl_net;
16a16cd3 3057 struct sk_buff *skb = NULL;
0ae81335 3058 struct fib6_table *table;
e3330039 3059 int err = -ENOENT;
0ae81335 3060
e3330039
WC
3061 if (rt == net->ipv6.ip6_null_entry)
3062 goto out_put;
0ae81335 3063 table = rt->rt6i_table;
66f5d6ce 3064 spin_lock_bh(&table->tb6_lock);
0ae81335
DA
3065
3066 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
3067 struct rt6_info *sibling, *next_sibling;
3068
16a16cd3
DA
3069 /* prefer to send a single notification with all hops */
3070 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3071 if (skb) {
3072 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3073
e3330039 3074 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
3075 NULL, NULL, 0, RTM_DELROUTE,
3076 info->portid, seq, 0) < 0) {
3077 kfree_skb(skb);
3078 skb = NULL;
3079 } else
3080 info->skip_notify = 1;
3081 }
3082
0ae81335
DA
3083 list_for_each_entry_safe(sibling, next_sibling,
3084 &rt->rt6i_siblings,
3085 rt6i_siblings) {
3086 err = fib6_del(sibling, info);
3087 if (err)
e3330039 3088 goto out_unlock;
0ae81335
DA
3089 }
3090 }
3091
3092 err = fib6_del(rt, info);
e3330039 3093out_unlock:
66f5d6ce 3094 spin_unlock_bh(&table->tb6_lock);
e3330039 3095out_put:
0ae81335 3096 ip6_rt_put(rt);
16a16cd3
DA
3097
3098 if (skb) {
e3330039 3099 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
3100 info->nlh, gfp_any());
3101 }
0ae81335
DA
3102 return err;
3103}
3104
333c4301
DA
3105static int ip6_route_del(struct fib6_config *cfg,
3106 struct netlink_ext_ack *extack)
1da177e4 3107{
2b760fcf 3108 struct rt6_info *rt, *rt_cache;
c71099ac 3109 struct fib6_table *table;
1da177e4 3110 struct fib6_node *fn;
1da177e4
LT
3111 int err = -ESRCH;
3112
5578689a 3113 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
3114 if (!table) {
3115 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 3116 return err;
d5d531cb 3117 }
c71099ac 3118
66f5d6ce 3119 rcu_read_lock();
1da177e4 3120
c71099ac 3121 fn = fib6_locate(&table->tb6_root,
86872cb5 3122 &cfg->fc_dst, cfg->fc_dst_len,
38fbeeee 3123 &cfg->fc_src, cfg->fc_src_len,
2b760fcf 3124 !(cfg->fc_flags & RTF_CACHE));
1ab1457c 3125
1da177e4 3126 if (fn) {
66f5d6ce 3127 for_each_fib6_node_rt_rcu(fn) {
2b760fcf
WW
3128 if (cfg->fc_flags & RTF_CACHE) {
3129 rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
3130 &cfg->fc_src);
3131 if (!rt_cache)
3132 continue;
3133 rt = rt_cache;
3134 }
86872cb5 3135 if (cfg->fc_ifindex &&
d1918542
DM
3136 (!rt->dst.dev ||
3137 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 3138 continue;
86872cb5
TG
3139 if (cfg->fc_flags & RTF_GATEWAY &&
3140 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 3141 continue;
86872cb5 3142 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 3143 continue;
c2ed1880
M
3144 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
3145 continue;
d3843fe5
WW
3146 if (!dst_hold_safe(&rt->dst))
3147 break;
66f5d6ce 3148 rcu_read_unlock();
1da177e4 3149
0ae81335
DA
3150 /* if gateway was specified only delete the one hop */
3151 if (cfg->fc_flags & RTF_GATEWAY)
3152 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
3153
3154 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
3155 }
3156 }
66f5d6ce 3157 rcu_read_unlock();
1da177e4
LT
3158
3159 return err;
3160}
3161
6700c270 3162static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 3163{
a6279458 3164 struct netevent_redirect netevent;
e8599ff4 3165 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
3166 struct ndisc_options ndopts;
3167 struct inet6_dev *in6_dev;
3168 struct neighbour *neigh;
71bcdba0 3169 struct rd_msg *msg;
6e157b6a
DM
3170 int optlen, on_link;
3171 u8 *lladdr;
e8599ff4 3172
29a3cad5 3173 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 3174 optlen -= sizeof(*msg);
e8599ff4
DM
3175
3176 if (optlen < 0) {
6e157b6a 3177 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
3178 return;
3179 }
3180
71bcdba0 3181 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 3182
71bcdba0 3183 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 3184 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
3185 return;
3186 }
3187
6e157b6a 3188 on_link = 0;
71bcdba0 3189 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 3190 on_link = 1;
71bcdba0 3191 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 3192 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 3193 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
3194 return;
3195 }
3196
3197 in6_dev = __in6_dev_get(skb->dev);
3198 if (!in6_dev)
3199 return;
3200 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
3201 return;
3202
3203 /* RFC2461 8.1:
3204 * The IP source address of the Redirect MUST be the same as the current
3205 * first-hop router for the specified ICMP Destination Address.
3206 */
3207
f997c55c 3208 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
3209 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
3210 return;
3211 }
6e157b6a
DM
3212
3213 lladdr = NULL;
e8599ff4
DM
3214 if (ndopts.nd_opts_tgt_lladdr) {
3215 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
3216 skb->dev);
3217 if (!lladdr) {
3218 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
3219 return;
3220 }
3221 }
3222
6e157b6a 3223 rt = (struct rt6_info *) dst;
ec13ad1d 3224 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 3225 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 3226 return;
6e157b6a 3227 }
e8599ff4 3228
6e157b6a
DM
3229 /* Redirect received -> path was valid.
3230 * Look, redirects are sent only in response to data packets,
3231 * so that this nexthop apparently is reachable. --ANK
3232 */
0dec879f 3233 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 3234
71bcdba0 3235 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
3236 if (!neigh)
3237 return;
a6279458 3238
1da177e4
LT
3239 /*
3240 * We have finally decided to accept it.
3241 */
3242
f997c55c 3243 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
3244 NEIGH_UPDATE_F_WEAK_OVERRIDE|
3245 NEIGH_UPDATE_F_OVERRIDE|
3246 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
3247 NEIGH_UPDATE_F_ISROUTER)),
3248 NDISC_REDIRECT, &ndopts);
1da177e4 3249
83a09abd 3250 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 3251 if (!nrt)
1da177e4
LT
3252 goto out;
3253
3254 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
3255 if (on_link)
3256 nrt->rt6i_flags &= ~RTF_GATEWAY;
3257
b91d5329 3258 nrt->rt6i_protocol = RTPROT_REDIRECT;
4e3fd7a0 3259 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 3260
2b760fcf
WW
3261 /* No need to remove rt from the exception table if rt is
3262 * a cached route because rt6_insert_exception() will
3263 * takes care of it
3264 */
3265 if (rt6_insert_exception(nrt, rt)) {
3266 dst_release_immediate(&nrt->dst);
3267 goto out;
3268 }
1da177e4 3269
d8d1f30b
CG
3270 netevent.old = &rt->dst;
3271 netevent.new = &nrt->dst;
71bcdba0 3272 netevent.daddr = &msg->dest;
60592833 3273 netevent.neigh = neigh;
8d71740c
TT
3274 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
3275
1da177e4 3276out:
e8599ff4 3277 neigh_release(neigh);
6e157b6a
DM
3278}
3279
1da177e4
LT
3280/*
3281 * Misc support functions
3282 */
3283
4b32b5ad
MKL
3284static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
3285{
3a2232e9 3286 BUG_ON(from->from);
4b32b5ad
MKL
3287
3288 rt->rt6i_flags &= ~RTF_EXPIRES;
3289 dst_hold(&from->dst);
3a2232e9 3290 rt->from = from;
4b32b5ad
MKL
3291 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
3292}
3293
83a09abd
MKL
3294static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
3295{
3296 rt->dst.input = ort->dst.input;
3297 rt->dst.output = ort->dst.output;
3298 rt->rt6i_dst = ort->rt6i_dst;
3299 rt->dst.error = ort->dst.error;
3300 rt->rt6i_idev = ort->rt6i_idev;
3301 if (rt->rt6i_idev)
3302 in6_dev_hold(rt->rt6i_idev);
3303 rt->dst.lastuse = jiffies;
3304 rt->rt6i_gateway = ort->rt6i_gateway;
3305 rt->rt6i_flags = ort->rt6i_flags;
3306 rt6_set_from(rt, ort);
3307 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 3308#ifdef CONFIG_IPV6_SUBTREES
83a09abd 3309 rt->rt6i_src = ort->rt6i_src;
1da177e4 3310#endif
83a09abd
MKL
3311 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
3312 rt->rt6i_table = ort->rt6i_table;
61adedf3 3313 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
3314}
3315
70ceb4f5 3316#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 3317static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 3318 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3319 const struct in6_addr *gwaddr,
3320 struct net_device *dev)
70ceb4f5 3321{
830218c1
DA
3322 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
3323 int ifindex = dev->ifindex;
70ceb4f5
YH
3324 struct fib6_node *fn;
3325 struct rt6_info *rt = NULL;
c71099ac
TG
3326 struct fib6_table *table;
3327
830218c1 3328 table = fib6_get_table(net, tb_id);
38308473 3329 if (!table)
c71099ac 3330 return NULL;
70ceb4f5 3331
66f5d6ce 3332 rcu_read_lock();
38fbeeee 3333 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
70ceb4f5
YH
3334 if (!fn)
3335 goto out;
3336
66f5d6ce 3337 for_each_fib6_node_rt_rcu(fn) {
d1918542 3338 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
3339 continue;
3340 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
3341 continue;
3342 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
3343 continue;
d3843fe5 3344 ip6_hold_safe(NULL, &rt, false);
70ceb4f5
YH
3345 break;
3346 }
3347out:
66f5d6ce 3348 rcu_read_unlock();
70ceb4f5
YH
3349 return rt;
3350}
3351
efa2cea0 3352static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 3353 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
3354 const struct in6_addr *gwaddr,
3355 struct net_device *dev,
95c96174 3356 unsigned int pref)
70ceb4f5 3357{
86872cb5 3358 struct fib6_config cfg = {
238fc7ea 3359 .fc_metric = IP6_RT_PRIO_USER,
830218c1 3360 .fc_ifindex = dev->ifindex,
86872cb5
TG
3361 .fc_dst_len = prefixlen,
3362 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
3363 RTF_UP | RTF_PREF(pref),
b91d5329 3364 .fc_protocol = RTPROT_RA,
15e47304 3365 .fc_nlinfo.portid = 0,
efa2cea0
DL
3366 .fc_nlinfo.nlh = NULL,
3367 .fc_nlinfo.nl_net = net,
86872cb5
TG
3368 };
3369
830218c1 3370 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
3371 cfg.fc_dst = *prefix;
3372 cfg.fc_gateway = *gwaddr;
70ceb4f5 3373
e317da96
YH
3374 /* We should treat it as a default route if prefix length is 0. */
3375 if (!prefixlen)
86872cb5 3376 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 3377
333c4301 3378 ip6_route_add(&cfg, NULL);
70ceb4f5 3379
830218c1 3380 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
3381}
3382#endif
3383
b71d1d42 3384struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 3385{
830218c1 3386 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 3387 struct rt6_info *rt;
c71099ac 3388 struct fib6_table *table;
1da177e4 3389
830218c1 3390 table = fib6_get_table(dev_net(dev), tb_id);
38308473 3391 if (!table)
c71099ac 3392 return NULL;
1da177e4 3393
66f5d6ce
WW
3394 rcu_read_lock();
3395 for_each_fib6_node_rt_rcu(&table->tb6_root) {
d1918542 3396 if (dev == rt->dst.dev &&
045927ff 3397 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
3398 ipv6_addr_equal(&rt->rt6i_gateway, addr))
3399 break;
3400 }
3401 if (rt)
d3843fe5 3402 ip6_hold_safe(NULL, &rt, false);
66f5d6ce 3403 rcu_read_unlock();
1da177e4
LT
3404 return rt;
3405}
3406
b71d1d42 3407struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
3408 struct net_device *dev,
3409 unsigned int pref)
1da177e4 3410{
86872cb5 3411 struct fib6_config cfg = {
ca254490 3412 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 3413 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
3414 .fc_ifindex = dev->ifindex,
3415 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
3416 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
b91d5329 3417 .fc_protocol = RTPROT_RA,
15e47304 3418 .fc_nlinfo.portid = 0,
5578689a 3419 .fc_nlinfo.nlh = NULL,
c346dca1 3420 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 3421 };
1da177e4 3422
4e3fd7a0 3423 cfg.fc_gateway = *gwaddr;
1da177e4 3424
333c4301 3425 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
3426 struct fib6_table *table;
3427
3428 table = fib6_get_table(dev_net(dev), cfg.fc_table);
3429 if (table)
3430 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
3431 }
1da177e4 3432
1da177e4
LT
3433 return rt6_get_dflt_router(gwaddr, dev);
3434}
3435
830218c1 3436static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
3437{
3438 struct rt6_info *rt;
3439
3440restart:
66f5d6ce
WW
3441 rcu_read_lock();
3442 for_each_fib6_node_rt_rcu(&table->tb6_root) {
3e8b0ac3
LC
3443 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
3444 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d3843fe5 3445 if (dst_hold_safe(&rt->dst)) {
66f5d6ce 3446 rcu_read_unlock();
d3843fe5
WW
3447 ip6_del_rt(rt);
3448 } else {
66f5d6ce 3449 rcu_read_unlock();
d3843fe5 3450 }
1da177e4
LT
3451 goto restart;
3452 }
3453 }
66f5d6ce 3454 rcu_read_unlock();
830218c1
DA
3455
3456 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
3457}
3458
3459void rt6_purge_dflt_routers(struct net *net)
3460{
3461 struct fib6_table *table;
3462 struct hlist_head *head;
3463 unsigned int h;
3464
3465 rcu_read_lock();
3466
3467 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
3468 head = &net->ipv6.fib_table_hash[h];
3469 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
3470 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
3471 __rt6_purge_dflt_routers(table);
3472 }
3473 }
3474
3475 rcu_read_unlock();
1da177e4
LT
3476}
3477
5578689a
DL
3478static void rtmsg_to_fib6_config(struct net *net,
3479 struct in6_rtmsg *rtmsg,
86872cb5
TG
3480 struct fib6_config *cfg)
3481{
3482 memset(cfg, 0, sizeof(*cfg));
3483
ca254490
DA
3484 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
3485 : RT6_TABLE_MAIN;
86872cb5
TG
3486 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
3487 cfg->fc_metric = rtmsg->rtmsg_metric;
3488 cfg->fc_expires = rtmsg->rtmsg_info;
3489 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
3490 cfg->fc_src_len = rtmsg->rtmsg_src_len;
3491 cfg->fc_flags = rtmsg->rtmsg_flags;
3492
5578689a 3493 cfg->fc_nlinfo.nl_net = net;
f1243c2d 3494
4e3fd7a0
AD
3495 cfg->fc_dst = rtmsg->rtmsg_dst;
3496 cfg->fc_src = rtmsg->rtmsg_src;
3497 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
3498}
3499
5578689a 3500int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 3501{
86872cb5 3502 struct fib6_config cfg;
1da177e4
LT
3503 struct in6_rtmsg rtmsg;
3504 int err;
3505
67ba4152 3506 switch (cmd) {
1da177e4
LT
3507 case SIOCADDRT: /* Add a route */
3508 case SIOCDELRT: /* Delete a route */
af31f412 3509 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
3510 return -EPERM;
3511 err = copy_from_user(&rtmsg, arg,
3512 sizeof(struct in6_rtmsg));
3513 if (err)
3514 return -EFAULT;
86872cb5 3515
5578689a 3516 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 3517
1da177e4
LT
3518 rtnl_lock();
3519 switch (cmd) {
3520 case SIOCADDRT:
333c4301 3521 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
3522 break;
3523 case SIOCDELRT:
333c4301 3524 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
3525 break;
3526 default:
3527 err = -EINVAL;
3528 }
3529 rtnl_unlock();
3530
3531 return err;
3ff50b79 3532 }
1da177e4
LT
3533
3534 return -EINVAL;
3535}
3536
3537/*
3538 * Drop the packet on the floor
3539 */
3540
d5fdd6ba 3541static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 3542{
612f09e8 3543 int type;
adf30907 3544 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
3545 switch (ipstats_mib_noroutes) {
3546 case IPSTATS_MIB_INNOROUTES:
0660e03f 3547 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 3548 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
3549 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3550 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
3551 break;
3552 }
3553 /* FALLTHROUGH */
3554 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
3555 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
3556 ipstats_mib_noroutes);
612f09e8
YH
3557 break;
3558 }
3ffe533c 3559 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
3560 kfree_skb(skb);
3561 return 0;
3562}
3563
9ce8ade0
TG
3564static int ip6_pkt_discard(struct sk_buff *skb)
3565{
612f09e8 3566 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3567}
3568
ede2059d 3569static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 3570{
adf30907 3571 skb->dev = skb_dst(skb)->dev;
612f09e8 3572 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
3573}
3574
9ce8ade0
TG
3575static int ip6_pkt_prohibit(struct sk_buff *skb)
3576{
612f09e8 3577 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
3578}
3579
ede2059d 3580static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 3581{
adf30907 3582 skb->dev = skb_dst(skb)->dev;
612f09e8 3583 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
3584}
3585
1da177e4
LT
3586/*
3587 * Allocate a dst for local (unicast / anycast) address.
3588 */
3589
3590struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
3591 const struct in6_addr *addr,
8f031519 3592 bool anycast)
1da177e4 3593{
ca254490 3594 u32 tb_id;
c346dca1 3595 struct net *net = dev_net(idev->dev);
4832c30d 3596 struct net_device *dev = idev->dev;
5f02ce24
DA
3597 struct rt6_info *rt;
3598
5f02ce24 3599 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 3600 if (!rt)
1da177e4
LT
3601 return ERR_PTR(-ENOMEM);
3602
1da177e4
LT
3603 in6_dev_hold(idev);
3604
11d53b49 3605 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
3606 rt->dst.input = ip6_input;
3607 rt->dst.output = ip6_output;
1da177e4 3608 rt->rt6i_idev = idev;
1da177e4 3609
94b5e0f9 3610 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 3611 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
3612 if (anycast)
3613 rt->rt6i_flags |= RTF_ANYCAST;
3614 else
1da177e4 3615 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 3616
550bab42 3617 rt->rt6i_gateway = *addr;
4e3fd7a0 3618 rt->rt6i_dst.addr = *addr;
1da177e4 3619 rt->rt6i_dst.plen = 128;
ca254490
DA
3620 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
3621 rt->rt6i_table = fib6_get_table(net, tb_id);
1da177e4 3622
1da177e4
LT
3623 return rt;
3624}
3625
c3968a85
DW
3626/* remove deleted ip from prefsrc entries */
3627struct arg_dev_net_ip {
3628 struct net_device *dev;
3629 struct net *net;
3630 struct in6_addr *addr;
3631};
3632
3633static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
3634{
3635 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
3636 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
3637 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
3638
d1918542 3639 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
3640 rt != net->ipv6.ip6_null_entry &&
3641 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
60006a48 3642 spin_lock_bh(&rt6_exception_lock);
c3968a85
DW
3643 /* remove prefsrc entry */
3644 rt->rt6i_prefsrc.plen = 0;
60006a48
WW
3645 /* need to update cache as well */
3646 rt6_exceptions_remove_prefsrc(rt);
3647 spin_unlock_bh(&rt6_exception_lock);
c3968a85
DW
3648 }
3649 return 0;
3650}
3651
3652void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
3653{
3654 struct net *net = dev_net(ifp->idev->dev);
3655 struct arg_dev_net_ip adni = {
3656 .dev = ifp->idev->dev,
3657 .net = net,
3658 .addr = &ifp->addr,
3659 };
0c3584d5 3660 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
3661}
3662
be7a010d 3663#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
be7a010d
DJ
3664
3665/* Remove routers and update dst entries when gateway turn into host. */
3666static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
3667{
3668 struct in6_addr *gateway = (struct in6_addr *)arg;
3669
2b760fcf
WW
3670 if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
3671 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
be7a010d
DJ
3672 return -1;
3673 }
b16cb459
WW
3674
3675 /* Further clean up cached routes in exception table.
3676 * This is needed because cached route may have a different
3677 * gateway than its 'parent' in the case of an ip redirect.
3678 */
3679 rt6_exceptions_clean_tohost(rt, gateway);
3680
be7a010d
DJ
3681 return 0;
3682}
3683
3684void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
3685{
3686 fib6_clean_all(net, fib6_clean_tohost, gateway);
3687}
3688
2127d95a
IS
3689struct arg_netdev_event {
3690 const struct net_device *dev;
4c981e28
IS
3691 union {
3692 unsigned int nh_flags;
3693 unsigned long event;
3694 };
2127d95a
IS
3695};
3696
d7dedee1
IS
3697static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
3698{
3699 struct rt6_info *iter;
3700 struct fib6_node *fn;
3701
3702 fn = rcu_dereference_protected(rt->rt6i_node,
3703 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3704 iter = rcu_dereference_protected(fn->leaf,
3705 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3706 while (iter) {
3707 if (iter->rt6i_metric == rt->rt6i_metric &&
3708 rt6_qualify_for_ecmp(iter))
3709 return iter;
3710 iter = rcu_dereference_protected(iter->rt6_next,
3711 lockdep_is_held(&rt->rt6i_table->tb6_lock));
3712 }
3713
3714 return NULL;
3715}
3716
3717static bool rt6_is_dead(const struct rt6_info *rt)
3718{
3719 if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
3720 (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
3721 rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
3722 return true;
3723
3724 return false;
3725}
3726
3727static int rt6_multipath_total_weight(const struct rt6_info *rt)
3728{
3729 struct rt6_info *iter;
3730 int total = 0;
3731
3732 if (!rt6_is_dead(rt))
398958ae 3733 total += rt->rt6i_nh_weight;
d7dedee1
IS
3734
3735 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
3736 if (!rt6_is_dead(iter))
398958ae 3737 total += iter->rt6i_nh_weight;
d7dedee1
IS
3738 }
3739
3740 return total;
3741}
3742
3743static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
3744{
3745 int upper_bound = -1;
3746
3747 if (!rt6_is_dead(rt)) {
398958ae 3748 *weight += rt->rt6i_nh_weight;
d7dedee1
IS
3749 upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
3750 total) - 1;
3751 }
3752 atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
3753}
3754
3755static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
3756{
3757 struct rt6_info *iter;
3758 int weight = 0;
3759
3760 rt6_upper_bound_set(rt, &weight, total);
3761
3762 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3763 rt6_upper_bound_set(iter, &weight, total);
3764}
3765
3766void rt6_multipath_rebalance(struct rt6_info *rt)
3767{
3768 struct rt6_info *first;
3769 int total;
3770
3771 /* In case the entire multipath route was marked for flushing,
3772 * then there is no need to rebalance upon the removal of every
3773 * sibling route.
3774 */
3775 if (!rt->rt6i_nsiblings || rt->should_flush)
3776 return;
3777
3778 /* During lookup routes are evaluated in order, so we need to
3779 * make sure upper bounds are assigned from the first sibling
3780 * onwards.
3781 */
3782 first = rt6_multipath_first_sibling(rt);
3783 if (WARN_ON_ONCE(!first))
3784 return;
3785
3786 total = rt6_multipath_total_weight(first);
3787 rt6_multipath_upper_bound_set(first, total);
3788}
3789
2127d95a
IS
3790static int fib6_ifup(struct rt6_info *rt, void *p_arg)
3791{
3792 const struct arg_netdev_event *arg = p_arg;
3793 const struct net *net = dev_net(arg->dev);
3794
1de178ed 3795 if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
2127d95a 3796 rt->rt6i_nh_flags &= ~arg->nh_flags;
1de178ed 3797 fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
d7dedee1 3798 rt6_multipath_rebalance(rt);
1de178ed 3799 }
2127d95a
IS
3800
3801 return 0;
3802}
3803
3804void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
3805{
3806 struct arg_netdev_event arg = {
3807 .dev = dev,
6802f3ad
IS
3808 {
3809 .nh_flags = nh_flags,
3810 },
2127d95a
IS
3811 };
3812
3813 if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
3814 arg.nh_flags |= RTNH_F_LINKDOWN;
3815
3816 fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
3817}
3818
1de178ed
IS
3819static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
3820 const struct net_device *dev)
3821{
3822 struct rt6_info *iter;
3823
3824 if (rt->dst.dev == dev)
3825 return true;
3826 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3827 if (iter->dst.dev == dev)
3828 return true;
3829
3830 return false;
3831}
3832
3833static void rt6_multipath_flush(struct rt6_info *rt)
3834{
3835 struct rt6_info *iter;
3836
3837 rt->should_flush = 1;
3838 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3839 iter->should_flush = 1;
3840}
3841
3842static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
3843 const struct net_device *down_dev)
3844{
3845 struct rt6_info *iter;
3846 unsigned int dead = 0;
3847
3848 if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
3849 dead++;
3850 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3851 if (iter->dst.dev == down_dev ||
3852 iter->rt6i_nh_flags & RTNH_F_DEAD)
3853 dead++;
3854
3855 return dead;
3856}
3857
3858static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
3859 const struct net_device *dev,
3860 unsigned int nh_flags)
3861{
3862 struct rt6_info *iter;
3863
3864 if (rt->dst.dev == dev)
3865 rt->rt6i_nh_flags |= nh_flags;
3866 list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
3867 if (iter->dst.dev == dev)
3868 iter->rt6i_nh_flags |= nh_flags;
3869}
3870
a1a22c12 3871/* called with write lock held for table with rt */
4c981e28 3872static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
1da177e4 3873{
4c981e28
IS
3874 const struct arg_netdev_event *arg = p_arg;
3875 const struct net_device *dev = arg->dev;
3876 const struct net *net = dev_net(dev);
8ed67789 3877
1de178ed 3878 if (rt == net->ipv6.ip6_null_entry)
27c6fa73
IS
3879 return 0;
3880
3881 switch (arg->event) {
3882 case NETDEV_UNREGISTER:
1de178ed 3883 return rt->dst.dev == dev ? -1 : 0;
27c6fa73 3884 case NETDEV_DOWN:
1de178ed 3885 if (rt->should_flush)
27c6fa73 3886 return -1;
1de178ed
IS
3887 if (!rt->rt6i_nsiblings)
3888 return rt->dst.dev == dev ? -1 : 0;
3889 if (rt6_multipath_uses_dev(rt, dev)) {
3890 unsigned int count;
3891
3892 count = rt6_multipath_dead_count(rt, dev);
3893 if (rt->rt6i_nsiblings + 1 == count) {
3894 rt6_multipath_flush(rt);
3895 return -1;
3896 }
3897 rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
3898 RTNH_F_LINKDOWN);
3899 fib6_update_sernum(rt);
d7dedee1 3900 rt6_multipath_rebalance(rt);
1de178ed
IS
3901 }
3902 return -2;
27c6fa73 3903 case NETDEV_CHANGE:
1de178ed
IS
3904 if (rt->dst.dev != dev ||
3905 rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
27c6fa73
IS
3906 break;
3907 rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
d7dedee1 3908 rt6_multipath_rebalance(rt);
27c6fa73 3909 break;
2b241361 3910 }
c159d30c 3911
1da177e4
LT
3912 return 0;
3913}
3914
27c6fa73 3915void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
1da177e4 3916{
4c981e28 3917 struct arg_netdev_event arg = {
8ed67789 3918 .dev = dev,
6802f3ad
IS
3919 {
3920 .event = event,
3921 },
8ed67789
DL
3922 };
3923
4c981e28
IS
3924 fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
3925}
3926
3927void rt6_disable_ip(struct net_device *dev, unsigned long event)
3928{
3929 rt6_sync_down_dev(dev, event);
3930 rt6_uncached_list_flush_dev(dev_net(dev), dev);
3931 neigh_ifdown(&nd_tbl, dev);
1da177e4
LT
3932}
3933
95c96174 3934struct rt6_mtu_change_arg {
1da177e4 3935 struct net_device *dev;
95c96174 3936 unsigned int mtu;
1da177e4
LT
3937};
3938
3939static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
3940{
3941 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
3942 struct inet6_dev *idev;
3943
3944 /* In IPv6 pmtu discovery is not optional,
3945 so that RTAX_MTU lock cannot disable it.
3946 We still use this lock to block changes
3947 caused by addrconf/ndisc.
3948 */
3949
3950 idev = __in6_dev_get(arg->dev);
38308473 3951 if (!idev)
1da177e4
LT
3952 return 0;
3953
3954 /* For administrative MTU increase, there is no way to discover
3955 IPv6 PMTU increase, so PMTU increase should be updated here.
3956 Since RFC 1981 doesn't include administrative MTU increase
3957 update PMTU increase is a MUST. (i.e. jumbo frame)
3958 */
d1918542 3959 if (rt->dst.dev == arg->dev &&
4b32b5ad 3960 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
f5bbe7ee 3961 spin_lock_bh(&rt6_exception_lock);
e9fa1495
SB
3962 if (dst_metric_raw(&rt->dst, RTAX_MTU) &&
3963 rt6_mtu_change_route_allowed(idev, rt, arg->mtu))
4b32b5ad 3964 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
e9fa1495 3965 rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
f5bbe7ee 3966 spin_unlock_bh(&rt6_exception_lock);
566cfd8f 3967 }
1da177e4
LT
3968 return 0;
3969}
3970
95c96174 3971void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 3972{
c71099ac
TG
3973 struct rt6_mtu_change_arg arg = {
3974 .dev = dev,
3975 .mtu = mtu,
3976 };
1da177e4 3977
0c3584d5 3978 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
3979}
3980
ef7c79ed 3981static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 3982 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
aa8f8778 3983 [RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
86872cb5 3984 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 3985 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
3986 [RTA_PRIORITY] = { .type = NLA_U32 },
3987 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 3988 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 3989 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
3990 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
3991 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 3992 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 3993 [RTA_UID] = { .type = NLA_U32 },
3b45a410 3994 [RTA_MARK] = { .type = NLA_U32 },
aa8f8778 3995 [RTA_TABLE] = { .type = NLA_U32 },
86872cb5
TG
3996};
3997
3998static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
3999 struct fib6_config *cfg,
4000 struct netlink_ext_ack *extack)
1da177e4 4001{
86872cb5
TG
4002 struct rtmsg *rtm;
4003 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 4004 unsigned int pref;
86872cb5 4005 int err;
1da177e4 4006
fceb6435
JB
4007 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
4008 NULL);
86872cb5
TG
4009 if (err < 0)
4010 goto errout;
1da177e4 4011
86872cb5
TG
4012 err = -EINVAL;
4013 rtm = nlmsg_data(nlh);
4014 memset(cfg, 0, sizeof(*cfg));
4015
4016 cfg->fc_table = rtm->rtm_table;
4017 cfg->fc_dst_len = rtm->rtm_dst_len;
4018 cfg->fc_src_len = rtm->rtm_src_len;
4019 cfg->fc_flags = RTF_UP;
4020 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 4021 cfg->fc_type = rtm->rtm_type;
86872cb5 4022
ef2c7d7b
ND
4023 if (rtm->rtm_type == RTN_UNREACHABLE ||
4024 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
4025 rtm->rtm_type == RTN_PROHIBIT ||
4026 rtm->rtm_type == RTN_THROW)
86872cb5
TG
4027 cfg->fc_flags |= RTF_REJECT;
4028
ab79ad14
4029 if (rtm->rtm_type == RTN_LOCAL)
4030 cfg->fc_flags |= RTF_LOCAL;
4031
1f56a01f
MKL
4032 if (rtm->rtm_flags & RTM_F_CLONED)
4033 cfg->fc_flags |= RTF_CACHE;
4034
fc1e64e1
DA
4035 cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
4036
15e47304 4037 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 4038 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 4039 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
4040
4041 if (tb[RTA_GATEWAY]) {
67b61f6c 4042 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 4043 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 4044 }
86872cb5
TG
4045
4046 if (tb[RTA_DST]) {
4047 int plen = (rtm->rtm_dst_len + 7) >> 3;
4048
4049 if (nla_len(tb[RTA_DST]) < plen)
4050 goto errout;
4051
4052 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 4053 }
86872cb5
TG
4054
4055 if (tb[RTA_SRC]) {
4056 int plen = (rtm->rtm_src_len + 7) >> 3;
4057
4058 if (nla_len(tb[RTA_SRC]) < plen)
4059 goto errout;
4060
4061 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 4062 }
86872cb5 4063
c3968a85 4064 if (tb[RTA_PREFSRC])
67b61f6c 4065 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 4066
86872cb5
TG
4067 if (tb[RTA_OIF])
4068 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
4069
4070 if (tb[RTA_PRIORITY])
4071 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
4072
4073 if (tb[RTA_METRICS]) {
4074 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
4075 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 4076 }
86872cb5
TG
4077
4078 if (tb[RTA_TABLE])
4079 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
4080
51ebd318
ND
4081 if (tb[RTA_MULTIPATH]) {
4082 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
4083 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
4084
4085 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 4086 cfg->fc_mp_len, extack);
9ed59592
DA
4087 if (err < 0)
4088 goto errout;
51ebd318
ND
4089 }
4090
c78ba6d6
LR
4091 if (tb[RTA_PREF]) {
4092 pref = nla_get_u8(tb[RTA_PREF]);
4093 if (pref != ICMPV6_ROUTER_PREF_LOW &&
4094 pref != ICMPV6_ROUTER_PREF_HIGH)
4095 pref = ICMPV6_ROUTER_PREF_MEDIUM;
4096 cfg->fc_flags |= RTF_PREF(pref);
4097 }
4098
19e42e45
RP
4099 if (tb[RTA_ENCAP])
4100 cfg->fc_encap = tb[RTA_ENCAP];
4101
9ed59592 4102 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
4103 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
4104
c255bd68 4105 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
4106 if (err < 0)
4107 goto errout;
4108 }
4109
32bc201e
XL
4110 if (tb[RTA_EXPIRES]) {
4111 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
4112
4113 if (addrconf_finite_timeout(timeout)) {
4114 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
4115 cfg->fc_flags |= RTF_EXPIRES;
4116 }
4117 }
4118
86872cb5
TG
4119 err = 0;
4120errout:
4121 return err;
1da177e4
LT
4122}
4123
6b9ea5a6
RP
4124struct rt6_nh {
4125 struct rt6_info *rt6_info;
4126 struct fib6_config r_cfg;
4127 struct mx6_config mxc;
4128 struct list_head next;
4129};
4130
4131static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
4132{
4133 struct rt6_nh *nh;
4134
4135 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 4136 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
4137 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
4138 nh->r_cfg.fc_ifindex);
4139 }
4140}
4141
4142static int ip6_route_info_append(struct list_head *rt6_nh_list,
4143 struct rt6_info *rt, struct fib6_config *r_cfg)
4144{
4145 struct rt6_nh *nh;
6b9ea5a6
RP
4146 int err = -EEXIST;
4147
4148 list_for_each_entry(nh, rt6_nh_list, next) {
4149 /* check if rt6_info already exists */
f06b7549 4150 if (rt6_duplicate_nexthop(nh->rt6_info, rt))
6b9ea5a6
RP
4151 return err;
4152 }
4153
4154 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
4155 if (!nh)
4156 return -ENOMEM;
4157 nh->rt6_info = rt;
4158 err = ip6_convert_metrics(&nh->mxc, r_cfg);
4159 if (err) {
4160 kfree(nh);
4161 return err;
4162 }
4163 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
4164 list_add_tail(&nh->next, rt6_nh_list);
4165
4166 return 0;
4167}
4168
3b1137fe
DA
4169static void ip6_route_mpath_notify(struct rt6_info *rt,
4170 struct rt6_info *rt_last,
4171 struct nl_info *info,
4172 __u16 nlflags)
4173{
4174 /* if this is an APPEND route, then rt points to the first route
4175 * inserted and rt_last points to last route inserted. Userspace
4176 * wants a consistent dump of the route which starts at the first
4177 * nexthop. Since sibling routes are always added at the end of
4178 * the list, find the first sibling of the last route appended
4179 */
4180 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
4181 rt = list_first_entry(&rt_last->rt6i_siblings,
4182 struct rt6_info,
4183 rt6i_siblings);
4184 }
4185
4186 if (rt)
4187 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
4188}
4189
333c4301
DA
4190static int ip6_route_multipath_add(struct fib6_config *cfg,
4191 struct netlink_ext_ack *extack)
51ebd318 4192{
3b1137fe
DA
4193 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
4194 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
4195 struct fib6_config r_cfg;
4196 struct rtnexthop *rtnh;
6b9ea5a6
RP
4197 struct rt6_info *rt;
4198 struct rt6_nh *err_nh;
4199 struct rt6_nh *nh, *nh_safe;
3b1137fe 4200 __u16 nlflags;
51ebd318
ND
4201 int remaining;
4202 int attrlen;
6b9ea5a6
RP
4203 int err = 1;
4204 int nhn = 0;
4205 int replace = (cfg->fc_nlinfo.nlh &&
4206 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
4207 LIST_HEAD(rt6_nh_list);
51ebd318 4208
3b1137fe
DA
4209 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
4210 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
4211 nlflags |= NLM_F_APPEND;
4212
35f1b4e9 4213 remaining = cfg->fc_mp_len;
51ebd318 4214 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 4215
6b9ea5a6
RP
4216 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
4217 * rt6_info structs per nexthop
4218 */
51ebd318
ND
4219 while (rtnh_ok(rtnh, remaining)) {
4220 memcpy(&r_cfg, cfg, sizeof(*cfg));
4221 if (rtnh->rtnh_ifindex)
4222 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4223
4224 attrlen = rtnh_attrlen(rtnh);
4225 if (attrlen > 0) {
4226 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4227
4228 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4229 if (nla) {
67b61f6c 4230 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
4231 r_cfg.fc_flags |= RTF_GATEWAY;
4232 }
19e42e45
RP
4233 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
4234 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
4235 if (nla)
4236 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 4237 }
6b9ea5a6 4238
68e2ffde 4239 r_cfg.fc_flags |= (rtnh->rtnh_flags & RTNH_F_ONLINK);
333c4301 4240 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
4241 if (IS_ERR(rt)) {
4242 err = PTR_ERR(rt);
4243 rt = NULL;
6b9ea5a6 4244 goto cleanup;
8c5b83f0 4245 }
6b9ea5a6 4246
398958ae
IS
4247 rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
4248
6b9ea5a6 4249 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 4250 if (err) {
587fea74 4251 dst_release_immediate(&rt->dst);
6b9ea5a6
RP
4252 goto cleanup;
4253 }
4254
4255 rtnh = rtnh_next(rtnh, &remaining);
4256 }
4257
3b1137fe
DA
4258 /* for add and replace send one notification with all nexthops.
4259 * Skip the notification in fib6_add_rt2node and send one with
4260 * the full route when done
4261 */
4262 info->skip_notify = 1;
4263
6b9ea5a6
RP
4264 err_nh = NULL;
4265 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 4266 rt_last = nh->rt6_info;
333c4301 4267 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
4268 /* save reference to first route for notification */
4269 if (!rt_notif && !err)
4270 rt_notif = nh->rt6_info;
4271
6b9ea5a6
RP
4272 /* nh->rt6_info is used or freed at this point, reset to NULL*/
4273 nh->rt6_info = NULL;
4274 if (err) {
4275 if (replace && nhn)
4276 ip6_print_replace_route_err(&rt6_nh_list);
4277 err_nh = nh;
4278 goto add_errout;
51ebd318 4279 }
6b9ea5a6 4280
1a72418b 4281 /* Because each route is added like a single route we remove
27596472
MK
4282 * these flags after the first nexthop: if there is a collision,
4283 * we have already failed to add the first nexthop:
4284 * fib6_add_rt2node() has rejected it; when replacing, old
4285 * nexthops have been replaced by first new, the rest should
4286 * be added to it.
1a72418b 4287 */
27596472
MK
4288 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
4289 NLM_F_REPLACE);
6b9ea5a6
RP
4290 nhn++;
4291 }
4292
3b1137fe
DA
4293 /* success ... tell user about new route */
4294 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
4295 goto cleanup;
4296
4297add_errout:
3b1137fe
DA
4298 /* send notification for routes that were added so that
4299 * the delete notifications sent by ip6_route_del are
4300 * coherent
4301 */
4302 if (rt_notif)
4303 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
4304
6b9ea5a6
RP
4305 /* Delete routes that were already added */
4306 list_for_each_entry(nh, &rt6_nh_list, next) {
4307 if (err_nh == nh)
4308 break;
333c4301 4309 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
4310 }
4311
4312cleanup:
4313 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
587fea74
WW
4314 if (nh->rt6_info)
4315 dst_release_immediate(&nh->rt6_info->dst);
52fe51f8 4316 kfree(nh->mxc.mx);
6b9ea5a6
RP
4317 list_del(&nh->next);
4318 kfree(nh);
4319 }
4320
4321 return err;
4322}
4323
333c4301
DA
4324static int ip6_route_multipath_del(struct fib6_config *cfg,
4325 struct netlink_ext_ack *extack)
6b9ea5a6
RP
4326{
4327 struct fib6_config r_cfg;
4328 struct rtnexthop *rtnh;
4329 int remaining;
4330 int attrlen;
4331 int err = 1, last_err = 0;
4332
4333 remaining = cfg->fc_mp_len;
4334 rtnh = (struct rtnexthop *)cfg->fc_mp;
4335
4336 /* Parse a Multipath Entry */
4337 while (rtnh_ok(rtnh, remaining)) {
4338 memcpy(&r_cfg, cfg, sizeof(*cfg));
4339 if (rtnh->rtnh_ifindex)
4340 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
4341
4342 attrlen = rtnh_attrlen(rtnh);
4343 if (attrlen > 0) {
4344 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
4345
4346 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
4347 if (nla) {
4348 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
4349 r_cfg.fc_flags |= RTF_GATEWAY;
4350 }
4351 }
333c4301 4352 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
4353 if (err)
4354 last_err = err;
4355
51ebd318
ND
4356 rtnh = rtnh_next(rtnh, &remaining);
4357 }
4358
4359 return last_err;
4360}
4361
c21ef3e3
DA
4362static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4363 struct netlink_ext_ack *extack)
1da177e4 4364{
86872cb5
TG
4365 struct fib6_config cfg;
4366 int err;
1da177e4 4367
333c4301 4368 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4369 if (err < 0)
4370 return err;
4371
51ebd318 4372 if (cfg.fc_mp)
333c4301 4373 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
4374 else {
4375 cfg.fc_delete_all_nh = 1;
333c4301 4376 return ip6_route_del(&cfg, extack);
0ae81335 4377 }
1da177e4
LT
4378}
4379
c21ef3e3
DA
4380static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
4381 struct netlink_ext_ack *extack)
1da177e4 4382{
86872cb5
TG
4383 struct fib6_config cfg;
4384 int err;
1da177e4 4385
333c4301 4386 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
4387 if (err < 0)
4388 return err;
4389
51ebd318 4390 if (cfg.fc_mp)
333c4301 4391 return ip6_route_multipath_add(&cfg, extack);
51ebd318 4392 else
333c4301 4393 return ip6_route_add(&cfg, extack);
1da177e4
LT
4394}
4395
beb1afac 4396static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 4397{
beb1afac
DA
4398 int nexthop_len = 0;
4399
4400 if (rt->rt6i_nsiblings) {
4401 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
4402 + NLA_ALIGN(sizeof(struct rtnexthop))
4403 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
4404 + lwtunnel_get_encap_size(rt->dst.lwtstate);
4405
4406 nexthop_len *= rt->rt6i_nsiblings;
4407 }
4408
339bf98f
TG
4409 return NLMSG_ALIGN(sizeof(struct rtmsg))
4410 + nla_total_size(16) /* RTA_SRC */
4411 + nla_total_size(16) /* RTA_DST */
4412 + nla_total_size(16) /* RTA_GATEWAY */
4413 + nla_total_size(16) /* RTA_PREFSRC */
4414 + nla_total_size(4) /* RTA_TABLE */
4415 + nla_total_size(4) /* RTA_IIF */
4416 + nla_total_size(4) /* RTA_OIF */
4417 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 4418 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 4419 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 4420 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 4421 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
4422 + lwtunnel_get_encap_size(rt->dst.lwtstate)
4423 + nexthop_len;
4424}
4425
4426static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 4427 unsigned int *flags, bool skip_oif)
beb1afac 4428{
f9d882ea
IS
4429 if (rt->rt6i_nh_flags & RTNH_F_DEAD)
4430 *flags |= RTNH_F_DEAD;
4431
44c9f2f2 4432 if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
beb1afac
DA
4433 *flags |= RTNH_F_LINKDOWN;
4434 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
4435 *flags |= RTNH_F_DEAD;
4436 }
4437
4438 if (rt->rt6i_flags & RTF_GATEWAY) {
4439 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
4440 goto nla_put_failure;
4441 }
4442
fc1e64e1 4443 *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
fe400799 4444 if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
61e4d01e
IS
4445 *flags |= RTNH_F_OFFLOAD;
4446
5be083ce
DA
4447 /* not needed for multipath encoding b/c it has a rtnexthop struct */
4448 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
4449 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
4450 goto nla_put_failure;
4451
4452 if (rt->dst.lwtstate &&
4453 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
4454 goto nla_put_failure;
4455
4456 return 0;
4457
4458nla_put_failure:
4459 return -EMSGSIZE;
4460}
4461
5be083ce 4462/* add multipath next hop */
beb1afac
DA
4463static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
4464{
4465 struct rtnexthop *rtnh;
4466 unsigned int flags = 0;
4467
4468 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
4469 if (!rtnh)
4470 goto nla_put_failure;
4471
398958ae 4472 rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
beb1afac
DA
4473 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
4474
5be083ce 4475 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
4476 goto nla_put_failure;
4477
4478 rtnh->rtnh_flags = flags;
4479
4480 /* length of rtnetlink header + attributes */
4481 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
4482
4483 return 0;
4484
4485nla_put_failure:
4486 return -EMSGSIZE;
339bf98f
TG
4487}
4488
191cd582
BH
4489static int rt6_fill_node(struct net *net,
4490 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 4491 struct in6_addr *dst, struct in6_addr *src,
15e47304 4492 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 4493 unsigned int flags)
1da177e4 4494{
4b32b5ad 4495 u32 metrics[RTAX_MAX];
1da177e4 4496 struct rtmsg *rtm;
2d7202bf 4497 struct nlmsghdr *nlh;
e3703b3d 4498 long expires;
9e762a4a 4499 u32 table;
1da177e4 4500
15e47304 4501 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 4502 if (!nlh)
26932566 4503 return -EMSGSIZE;
2d7202bf
TG
4504
4505 rtm = nlmsg_data(nlh);
1da177e4
LT
4506 rtm->rtm_family = AF_INET6;
4507 rtm->rtm_dst_len = rt->rt6i_dst.plen;
4508 rtm->rtm_src_len = rt->rt6i_src.plen;
4509 rtm->rtm_tos = 0;
c71099ac 4510 if (rt->rt6i_table)
9e762a4a 4511 table = rt->rt6i_table->tb6_id;
c71099ac 4512 else
9e762a4a
PM
4513 table = RT6_TABLE_UNSPEC;
4514 rtm->rtm_table = table;
c78679e8
DM
4515 if (nla_put_u32(skb, RTA_TABLE, table))
4516 goto nla_put_failure;
ef2c7d7b
ND
4517 if (rt->rt6i_flags & RTF_REJECT) {
4518 switch (rt->dst.error) {
4519 case -EINVAL:
4520 rtm->rtm_type = RTN_BLACKHOLE;
4521 break;
4522 case -EACCES:
4523 rtm->rtm_type = RTN_PROHIBIT;
4524 break;
b4949ab2
ND
4525 case -EAGAIN:
4526 rtm->rtm_type = RTN_THROW;
4527 break;
ef2c7d7b
ND
4528 default:
4529 rtm->rtm_type = RTN_UNREACHABLE;
4530 break;
4531 }
4532 }
38308473 4533 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 4534 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
4535 else if (rt->rt6i_flags & RTF_ANYCAST)
4536 rtm->rtm_type = RTN_ANYCAST;
d1918542 4537 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
4538 rtm->rtm_type = RTN_LOCAL;
4539 else
4540 rtm->rtm_type = RTN_UNICAST;
4541 rtm->rtm_flags = 0;
4542 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
4543 rtm->rtm_protocol = rt->rt6i_protocol;
1da177e4 4544
38308473 4545 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
4546 rtm->rtm_flags |= RTM_F_CLONED;
4547
4548 if (dst) {
930345ea 4549 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 4550 goto nla_put_failure;
1ab1457c 4551 rtm->rtm_dst_len = 128;
1da177e4 4552 } else if (rtm->rtm_dst_len)
930345ea 4553 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 4554 goto nla_put_failure;
1da177e4
LT
4555#ifdef CONFIG_IPV6_SUBTREES
4556 if (src) {
930345ea 4557 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 4558 goto nla_put_failure;
1ab1457c 4559 rtm->rtm_src_len = 128;
c78679e8 4560 } else if (rtm->rtm_src_len &&
930345ea 4561 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 4562 goto nla_put_failure;
1da177e4 4563#endif
7bc570c8
YH
4564 if (iif) {
4565#ifdef CONFIG_IPV6_MROUTE
4566 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
4567 int err = ip6mr_get_route(net, skb, rtm, portid);
4568
4569 if (err == 0)
4570 return 0;
4571 if (err < 0)
4572 goto nla_put_failure;
7bc570c8
YH
4573 } else
4574#endif
c78679e8
DM
4575 if (nla_put_u32(skb, RTA_IIF, iif))
4576 goto nla_put_failure;
7bc570c8 4577 } else if (dst) {
1da177e4 4578 struct in6_addr saddr_buf;
c78679e8 4579 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 4580 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4581 goto nla_put_failure;
1da177e4 4582 }
2d7202bf 4583
c3968a85
DW
4584 if (rt->rt6i_prefsrc.plen) {
4585 struct in6_addr saddr_buf;
4e3fd7a0 4586 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 4587 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 4588 goto nla_put_failure;
c3968a85
DW
4589 }
4590
4b32b5ad
MKL
4591 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
4592 if (rt->rt6i_pmtu)
4593 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
4594 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
4595 goto nla_put_failure;
4596
c78679e8
DM
4597 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
4598 goto nla_put_failure;
8253947e 4599
beb1afac
DA
4600 /* For multipath routes, walk the siblings list and add
4601 * each as a nexthop within RTA_MULTIPATH.
4602 */
4603 if (rt->rt6i_nsiblings) {
4604 struct rt6_info *sibling, *next_sibling;
4605 struct nlattr *mp;
4606
4607 mp = nla_nest_start(skb, RTA_MULTIPATH);
4608 if (!mp)
4609 goto nla_put_failure;
4610
4611 if (rt6_add_nexthop(skb, rt) < 0)
4612 goto nla_put_failure;
4613
4614 list_for_each_entry_safe(sibling, next_sibling,
4615 &rt->rt6i_siblings, rt6i_siblings) {
4616 if (rt6_add_nexthop(skb, sibling) < 0)
4617 goto nla_put_failure;
4618 }
4619
4620 nla_nest_end(skb, mp);
4621 } else {
5be083ce 4622 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
4623 goto nla_put_failure;
4624 }
4625
8253947e 4626 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 4627
87a50699 4628 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 4629 goto nla_put_failure;
2d7202bf 4630
c78ba6d6
LR
4631 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
4632 goto nla_put_failure;
4633
19e42e45 4634
053c095a
JB
4635 nlmsg_end(skb, nlh);
4636 return 0;
2d7202bf
TG
4637
4638nla_put_failure:
26932566
PM
4639 nlmsg_cancel(skb, nlh);
4640 return -EMSGSIZE;
1da177e4
LT
4641}
4642
1b43af54 4643int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
4644{
4645 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
4646 struct net *net = arg->net;
4647
4648 if (rt == net->ipv6.ip6_null_entry)
4649 return 0;
1da177e4 4650
2d7202bf
TG
4651 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
4652 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
4653
4654 /* user wants prefix routes only */
4655 if (rtm->rtm_flags & RTM_F_PREFIX &&
4656 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
4657 /* success since this is not a prefix route */
4658 return 1;
4659 }
4660 }
1da177e4 4661
1f17e2f2 4662 return rt6_fill_node(net,
191cd582 4663 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 4664 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 4665 NLM_F_MULTI);
1da177e4
LT
4666}
4667
c21ef3e3
DA
4668static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
4669 struct netlink_ext_ack *extack)
1da177e4 4670{
3b1e0a65 4671 struct net *net = sock_net(in_skb->sk);
ab364a6f 4672 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
4673 int err, iif = 0, oif = 0;
4674 struct dst_entry *dst;
ab364a6f 4675 struct rt6_info *rt;
1da177e4 4676 struct sk_buff *skb;
ab364a6f 4677 struct rtmsg *rtm;
4c9483b2 4678 struct flowi6 fl6;
18c3a61c 4679 bool fibmatch;
1da177e4 4680
fceb6435 4681 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 4682 extack);
ab364a6f
TG
4683 if (err < 0)
4684 goto errout;
1da177e4 4685
ab364a6f 4686 err = -EINVAL;
4c9483b2 4687 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
4688 rtm = nlmsg_data(nlh);
4689 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 4690 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 4691
ab364a6f
TG
4692 if (tb[RTA_SRC]) {
4693 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
4694 goto errout;
4695
4e3fd7a0 4696 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
4697 }
4698
4699 if (tb[RTA_DST]) {
4700 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
4701 goto errout;
4702
4e3fd7a0 4703 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
4704 }
4705
4706 if (tb[RTA_IIF])
4707 iif = nla_get_u32(tb[RTA_IIF]);
4708
4709 if (tb[RTA_OIF])
72331bc0 4710 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 4711
2e47b291
LC
4712 if (tb[RTA_MARK])
4713 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
4714
622ec2c9
LC
4715 if (tb[RTA_UID])
4716 fl6.flowi6_uid = make_kuid(current_user_ns(),
4717 nla_get_u32(tb[RTA_UID]));
4718 else
4719 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
4720
1da177e4
LT
4721 if (iif) {
4722 struct net_device *dev;
72331bc0
SL
4723 int flags = 0;
4724
121622db
FW
4725 rcu_read_lock();
4726
4727 dev = dev_get_by_index_rcu(net, iif);
1da177e4 4728 if (!dev) {
121622db 4729 rcu_read_unlock();
1da177e4 4730 err = -ENODEV;
ab364a6f 4731 goto errout;
1da177e4 4732 }
72331bc0
SL
4733
4734 fl6.flowi6_iif = iif;
4735
4736 if (!ipv6_addr_any(&fl6.saddr))
4737 flags |= RT6_LOOKUP_F_HAS_SADDR;
4738
b75cc8f9 4739 dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
121622db
FW
4740
4741 rcu_read_unlock();
72331bc0
SL
4742 } else {
4743 fl6.flowi6_oif = oif;
4744
58acfd71 4745 dst = ip6_route_output(net, NULL, &fl6);
18c3a61c
RP
4746 }
4747
18c3a61c
RP
4748
4749 rt = container_of(dst, struct rt6_info, dst);
4750 if (rt->dst.error) {
4751 err = rt->dst.error;
4752 ip6_rt_put(rt);
4753 goto errout;
1da177e4
LT
4754 }
4755
9d6acb3b
WC
4756 if (rt == net->ipv6.ip6_null_entry) {
4757 err = rt->dst.error;
4758 ip6_rt_put(rt);
4759 goto errout;
4760 }
4761
fba961ab
DM
4762 if (fibmatch && rt->from) {
4763 struct rt6_info *ort = rt->from;
58acfd71
IS
4764
4765 dst_hold(&ort->dst);
4766 ip6_rt_put(rt);
4767 rt = ort;
4768 }
4769
ab364a6f 4770 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 4771 if (!skb) {
94e187c0 4772 ip6_rt_put(rt);
ab364a6f
TG
4773 err = -ENOBUFS;
4774 goto errout;
4775 }
1da177e4 4776
d8d1f30b 4777 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
4778 if (fibmatch)
4779 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
4780 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4781 nlh->nlmsg_seq, 0);
4782 else
4783 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
4784 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
4785 nlh->nlmsg_seq, 0);
1da177e4 4786 if (err < 0) {
ab364a6f
TG
4787 kfree_skb(skb);
4788 goto errout;
1da177e4
LT
4789 }
4790
15e47304 4791 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 4792errout:
1da177e4 4793 return err;
1da177e4
LT
4794}
4795
37a1d361
RP
4796void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
4797 unsigned int nlm_flags)
1da177e4
LT
4798{
4799 struct sk_buff *skb;
5578689a 4800 struct net *net = info->nl_net;
528c4ceb
DL
4801 u32 seq;
4802 int err;
4803
4804 err = -ENOBUFS;
38308473 4805 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 4806
19e42e45 4807 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 4808 if (!skb)
21713ebc
TG
4809 goto errout;
4810
191cd582 4811 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 4812 event, info->portid, seq, nlm_flags);
26932566
PM
4813 if (err < 0) {
4814 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
4815 WARN_ON(err == -EMSGSIZE);
4816 kfree_skb(skb);
4817 goto errout;
4818 }
15e47304 4819 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
4820 info->nlh, gfp_any());
4821 return;
21713ebc
TG
4822errout:
4823 if (err < 0)
5578689a 4824 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
4825}
4826
8ed67789 4827static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 4828 unsigned long event, void *ptr)
8ed67789 4829{
351638e7 4830 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 4831 struct net *net = dev_net(dev);
8ed67789 4832
242d3a49
WC
4833 if (!(dev->flags & IFF_LOOPBACK))
4834 return NOTIFY_OK;
4835
4836 if (event == NETDEV_REGISTER) {
d8d1f30b 4837 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
4838 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
4839#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 4840 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 4841 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 4842 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 4843 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49 4844#endif
76da0704
WC
4845 } else if (event == NETDEV_UNREGISTER &&
4846 dev->reg_state != NETREG_UNREGISTERED) {
4847 /* NETDEV_UNREGISTER could be fired for multiple times by
4848 * netdev_wait_allrefs(). Make sure we only call this once.
4849 */
12d94a80 4850 in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
242d3a49 4851#ifdef CONFIG_IPV6_MULTIPLE_TABLES
12d94a80
ED
4852 in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
4853 in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
4854#endif
4855 }
4856
4857 return NOTIFY_OK;
4858}
4859
1da177e4
LT
4860/*
4861 * /proc
4862 */
4863
4864#ifdef CONFIG_PROC_FS
4865
33120b30 4866static const struct file_operations ipv6_route_proc_fops = {
33120b30
AD
4867 .open = ipv6_route_open,
4868 .read = seq_read,
4869 .llseek = seq_lseek,
8d2ca1d7 4870 .release = seq_release_net,
33120b30
AD
4871};
4872
1da177e4
LT
4873static int rt6_stats_seq_show(struct seq_file *seq, void *v)
4874{
69ddb805 4875 struct net *net = (struct net *)seq->private;
1da177e4 4876 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
4877 net->ipv6.rt6_stats->fib_nodes,
4878 net->ipv6.rt6_stats->fib_route_nodes,
81eb8447 4879 atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
69ddb805
DL
4880 net->ipv6.rt6_stats->fib_rt_entries,
4881 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 4882 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 4883 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
4884
4885 return 0;
4886}
4887
4888static int rt6_stats_seq_open(struct inode *inode, struct file *file)
4889{
de05c557 4890 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
4891}
4892
9a32144e 4893static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
4894 .open = rt6_stats_seq_open,
4895 .read = seq_read,
4896 .llseek = seq_lseek,
b6fcbdb4 4897 .release = single_release_net,
1da177e4
LT
4898};
4899#endif /* CONFIG_PROC_FS */
4900
4901#ifdef CONFIG_SYSCTL
4902
1da177e4 4903static
fe2c6338 4904int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
4905 void __user *buffer, size_t *lenp, loff_t *ppos)
4906{
c486da34
LAG
4907 struct net *net;
4908 int delay;
4909 if (!write)
1da177e4 4910 return -EINVAL;
c486da34
LAG
4911
4912 net = (struct net *)ctl->extra1;
4913 delay = net->ipv6.sysctl.flush_delay;
4914 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 4915 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 4916 return 0;
1da177e4
LT
4917}
4918
fe2c6338 4919struct ctl_table ipv6_route_table_template[] = {
1ab1457c 4920 {
1da177e4 4921 .procname = "flush",
4990509f 4922 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 4923 .maxlen = sizeof(int),
89c8b3a1 4924 .mode = 0200,
6d9f239a 4925 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
4926 },
4927 {
1da177e4 4928 .procname = "gc_thresh",
9a7ec3a9 4929 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
4930 .maxlen = sizeof(int),
4931 .mode = 0644,
6d9f239a 4932 .proc_handler = proc_dointvec,
1da177e4
LT
4933 },
4934 {
1da177e4 4935 .procname = "max_size",
4990509f 4936 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
4937 .maxlen = sizeof(int),
4938 .mode = 0644,
6d9f239a 4939 .proc_handler = proc_dointvec,
1da177e4
LT
4940 },
4941 {
1da177e4 4942 .procname = "gc_min_interval",
4990509f 4943 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4944 .maxlen = sizeof(int),
4945 .mode = 0644,
6d9f239a 4946 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4947 },
4948 {
1da177e4 4949 .procname = "gc_timeout",
4990509f 4950 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
4951 .maxlen = sizeof(int),
4952 .mode = 0644,
6d9f239a 4953 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4954 },
4955 {
1da177e4 4956 .procname = "gc_interval",
4990509f 4957 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
4958 .maxlen = sizeof(int),
4959 .mode = 0644,
6d9f239a 4960 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4961 },
4962 {
1da177e4 4963 .procname = "gc_elasticity",
4990509f 4964 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
4965 .maxlen = sizeof(int),
4966 .mode = 0644,
f3d3f616 4967 .proc_handler = proc_dointvec,
1da177e4
LT
4968 },
4969 {
1da177e4 4970 .procname = "mtu_expires",
4990509f 4971 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
4972 .maxlen = sizeof(int),
4973 .mode = 0644,
6d9f239a 4974 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
4975 },
4976 {
1da177e4 4977 .procname = "min_adv_mss",
4990509f 4978 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
4979 .maxlen = sizeof(int),
4980 .mode = 0644,
f3d3f616 4981 .proc_handler = proc_dointvec,
1da177e4
LT
4982 },
4983 {
1da177e4 4984 .procname = "gc_min_interval_ms",
4990509f 4985 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
4986 .maxlen = sizeof(int),
4987 .mode = 0644,
6d9f239a 4988 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 4989 },
f8572d8f 4990 { }
1da177e4
LT
4991};
4992
2c8c1e72 4993struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
4994{
4995 struct ctl_table *table;
4996
4997 table = kmemdup(ipv6_route_table_template,
4998 sizeof(ipv6_route_table_template),
4999 GFP_KERNEL);
5ee09105
YH
5000
5001 if (table) {
5002 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 5003 table[0].extra1 = net;
86393e52 5004 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
5005 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
5006 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
5007 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
5008 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
5009 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
5010 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
5011 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 5012 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
5013
5014 /* Don't export sysctls to unprivileged users */
5015 if (net->user_ns != &init_user_ns)
5016 table[0].procname = NULL;
5ee09105
YH
5017 }
5018
760f2d01
DL
5019 return table;
5020}
1da177e4
LT
5021#endif
5022
2c8c1e72 5023static int __net_init ip6_route_net_init(struct net *net)
cdb18761 5024{
633d424b 5025 int ret = -ENOMEM;
8ed67789 5026
86393e52
AD
5027 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
5028 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 5029
fc66f95c
ED
5030 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
5031 goto out_ip6_dst_ops;
5032
8ed67789
DL
5033 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
5034 sizeof(*net->ipv6.ip6_null_entry),
5035 GFP_KERNEL);
5036 if (!net->ipv6.ip6_null_entry)
fc66f95c 5037 goto out_ip6_dst_entries;
d8d1f30b 5038 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5039 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
5040 ip6_template_metrics, true);
8ed67789
DL
5041
5042#ifdef CONFIG_IPV6_MULTIPLE_TABLES
feca7d8c 5043 net->ipv6.fib6_has_custom_rules = false;
8ed67789
DL
5044 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
5045 sizeof(*net->ipv6.ip6_prohibit_entry),
5046 GFP_KERNEL);
68fffc67
PZ
5047 if (!net->ipv6.ip6_prohibit_entry)
5048 goto out_ip6_null_entry;
d8d1f30b 5049 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5050 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
5051 ip6_template_metrics, true);
8ed67789
DL
5052
5053 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
5054 sizeof(*net->ipv6.ip6_blk_hole_entry),
5055 GFP_KERNEL);
68fffc67
PZ
5056 if (!net->ipv6.ip6_blk_hole_entry)
5057 goto out_ip6_prohibit_entry;
d8d1f30b 5058 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
5059 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
5060 ip6_template_metrics, true);
8ed67789
DL
5061#endif
5062
b339a47c
PZ
5063 net->ipv6.sysctl.flush_delay = 0;
5064 net->ipv6.sysctl.ip6_rt_max_size = 4096;
5065 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
5066 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
5067 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
5068 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
5069 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
5070 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
5071
6891a346
BT
5072 net->ipv6.ip6_rt_gc_expire = 30*HZ;
5073
8ed67789
DL
5074 ret = 0;
5075out:
5076 return ret;
f2fc6a54 5077
68fffc67
PZ
5078#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5079out_ip6_prohibit_entry:
5080 kfree(net->ipv6.ip6_prohibit_entry);
5081out_ip6_null_entry:
5082 kfree(net->ipv6.ip6_null_entry);
5083#endif
fc66f95c
ED
5084out_ip6_dst_entries:
5085 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 5086out_ip6_dst_ops:
f2fc6a54 5087 goto out;
cdb18761
DL
5088}
5089
2c8c1e72 5090static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 5091{
8ed67789
DL
5092 kfree(net->ipv6.ip6_null_entry);
5093#ifdef CONFIG_IPV6_MULTIPLE_TABLES
5094 kfree(net->ipv6.ip6_prohibit_entry);
5095 kfree(net->ipv6.ip6_blk_hole_entry);
5096#endif
41bb78b4 5097 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
5098}
5099
d189634e
TG
5100static int __net_init ip6_route_net_init_late(struct net *net)
5101{
5102#ifdef CONFIG_PROC_FS
d4beaa66 5103 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
d6444062 5104 proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
5105#endif
5106 return 0;
5107}
5108
5109static void __net_exit ip6_route_net_exit_late(struct net *net)
5110{
5111#ifdef CONFIG_PROC_FS
ece31ffd
G
5112 remove_proc_entry("ipv6_route", net->proc_net);
5113 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
5114#endif
5115}
5116
cdb18761
DL
5117static struct pernet_operations ip6_route_net_ops = {
5118 .init = ip6_route_net_init,
5119 .exit = ip6_route_net_exit,
5120};
5121
c3426b47
DM
5122static int __net_init ipv6_inetpeer_init(struct net *net)
5123{
5124 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
5125
5126 if (!bp)
5127 return -ENOMEM;
5128 inet_peer_base_init(bp);
5129 net->ipv6.peers = bp;
5130 return 0;
5131}
5132
5133static void __net_exit ipv6_inetpeer_exit(struct net *net)
5134{
5135 struct inet_peer_base *bp = net->ipv6.peers;
5136
5137 net->ipv6.peers = NULL;
56a6b248 5138 inetpeer_invalidate_tree(bp);
c3426b47
DM
5139 kfree(bp);
5140}
5141
2b823f72 5142static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
5143 .init = ipv6_inetpeer_init,
5144 .exit = ipv6_inetpeer_exit,
5145};
5146
d189634e
TG
5147static struct pernet_operations ip6_route_net_late_ops = {
5148 .init = ip6_route_net_init_late,
5149 .exit = ip6_route_net_exit_late,
5150};
5151
8ed67789
DL
5152static struct notifier_block ip6_route_dev_notifier = {
5153 .notifier_call = ip6_route_dev_notify,
242d3a49 5154 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
5155};
5156
2f460933
WC
5157void __init ip6_route_init_special_entries(void)
5158{
5159 /* Registering of the loopback is done before this portion of code,
5160 * the loopback reference in rt6_info will not be taken, do it
5161 * manually for init_net */
5162 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
5163 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5164 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
5165 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
5166 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5167 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
5168 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
5169 #endif
5170}
5171
433d49c3 5172int __init ip6_route_init(void)
1da177e4 5173{
433d49c3 5174 int ret;
8d0b94af 5175 int cpu;
433d49c3 5176
9a7ec3a9
DL
5177 ret = -ENOMEM;
5178 ip6_dst_ops_template.kmem_cachep =
e5d679f3 5179 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 5180 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 5181 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 5182 goto out;
14e50e57 5183
fc66f95c 5184 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 5185 if (ret)
bdb3289f 5186 goto out_kmem_cache;
bdb3289f 5187
c3426b47
DM
5188 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
5189 if (ret)
e8803b6c 5190 goto out_dst_entries;
2a0c451a 5191
7e52b33b
DM
5192 ret = register_pernet_subsys(&ip6_route_net_ops);
5193 if (ret)
5194 goto out_register_inetpeer;
c3426b47 5195
5dc121e9
AE
5196 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
5197
e8803b6c 5198 ret = fib6_init();
433d49c3 5199 if (ret)
8ed67789 5200 goto out_register_subsys;
433d49c3 5201
433d49c3
DL
5202 ret = xfrm6_init();
5203 if (ret)
e8803b6c 5204 goto out_fib6_init;
c35b7e72 5205
433d49c3
DL
5206 ret = fib6_rules_init();
5207 if (ret)
5208 goto xfrm6_init;
7e5449c2 5209
d189634e
TG
5210 ret = register_pernet_subsys(&ip6_route_net_late_ops);
5211 if (ret)
5212 goto fib6_rules_init;
5213
16feebcf
FW
5214 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
5215 inet6_rtm_newroute, NULL, 0);
5216 if (ret < 0)
5217 goto out_register_late_subsys;
5218
5219 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
5220 inet6_rtm_delroute, NULL, 0);
5221 if (ret < 0)
5222 goto out_register_late_subsys;
5223
5224 ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
5225 inet6_rtm_getroute, NULL,
5226 RTNL_FLAG_DOIT_UNLOCKED);
5227 if (ret < 0)
d189634e 5228 goto out_register_late_subsys;
c127ea2c 5229
8ed67789 5230 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 5231 if (ret)
d189634e 5232 goto out_register_late_subsys;
8ed67789 5233
8d0b94af
MKL
5234 for_each_possible_cpu(cpu) {
5235 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
5236
5237 INIT_LIST_HEAD(&ul->head);
5238 spin_lock_init(&ul->lock);
5239 }
5240
433d49c3
DL
5241out:
5242 return ret;
5243
d189634e 5244out_register_late_subsys:
16feebcf 5245 rtnl_unregister_all(PF_INET6);
d189634e 5246 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 5247fib6_rules_init:
433d49c3
DL
5248 fib6_rules_cleanup();
5249xfrm6_init:
433d49c3 5250 xfrm6_fini();
2a0c451a
TG
5251out_fib6_init:
5252 fib6_gc_cleanup();
8ed67789
DL
5253out_register_subsys:
5254 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
5255out_register_inetpeer:
5256 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
5257out_dst_entries:
5258 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 5259out_kmem_cache:
f2fc6a54 5260 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 5261 goto out;
1da177e4
LT
5262}
5263
5264void ip6_route_cleanup(void)
5265{
8ed67789 5266 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 5267 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 5268 fib6_rules_cleanup();
1da177e4 5269 xfrm6_fini();
1da177e4 5270 fib6_gc_cleanup();
c3426b47 5271 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 5272 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 5273 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 5274 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 5275}