]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv6/route.c
ipv6: call dst_hold_safe() properly
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
CommitLineData
1da177e4
LT
1/*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
1ab1457c 6 * Pedro Roque <roque@di.fc.ul.pt>
1da177e4 7 *
1da177e4
LT
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14/* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
c0bece9f
YH
23 * Ville Nuorvala
24 * Fixed routing subtrees.
1da177e4
LT
25 */
26
f3213831
JP
27#define pr_fmt(fmt) "IPv6: " fmt
28
4fc268d2 29#include <linux/capability.h>
1da177e4 30#include <linux/errno.h>
bc3b2d7f 31#include <linux/export.h>
1da177e4
LT
32#include <linux/types.h>
33#include <linux/times.h>
34#include <linux/socket.h>
35#include <linux/sockios.h>
36#include <linux/net.h>
37#include <linux/route.h>
38#include <linux/netdevice.h>
39#include <linux/in6.h>
7bc570c8 40#include <linux/mroute6.h>
1da177e4 41#include <linux/init.h>
1da177e4 42#include <linux/if_arp.h>
1da177e4
LT
43#include <linux/proc_fs.h>
44#include <linux/seq_file.h>
5b7c931d 45#include <linux/nsproxy.h>
5a0e3ad6 46#include <linux/slab.h>
457c4cbc 47#include <net/net_namespace.h>
1da177e4
LT
48#include <net/snmp.h>
49#include <net/ipv6.h>
50#include <net/ip6_fib.h>
51#include <net/ip6_route.h>
52#include <net/ndisc.h>
53#include <net/addrconf.h>
54#include <net/tcp.h>
55#include <linux/rtnetlink.h>
56#include <net/dst.h>
904af04d 57#include <net/dst_metadata.h>
1da177e4 58#include <net/xfrm.h>
8d71740c 59#include <net/netevent.h>
21713ebc 60#include <net/netlink.h>
51ebd318 61#include <net/nexthop.h>
19e42e45 62#include <net/lwtunnel.h>
904af04d 63#include <net/ip_tunnels.h>
ca254490 64#include <net/l3mdev.h>
b811580d 65#include <trace/events/fib6.h>
1da177e4 66
7c0f6ba6 67#include <linux/uaccess.h>
1da177e4
LT
68
69#ifdef CONFIG_SYSCTL
70#include <linux/sysctl.h>
71#endif
72
afc154e9 73enum rt6_nud_state {
7e980569
JB
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
afc154e9
HFS
77 RT6_NUD_SUCCEED = 1
78};
79
83a09abd 80static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
1da177e4 81static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
0dbaee3b 82static unsigned int ip6_default_advmss(const struct dst_entry *dst);
ebb762f2 83static unsigned int ip6_mtu(const struct dst_entry *dst);
1da177e4
LT
84static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85static void ip6_dst_destroy(struct dst_entry *);
86static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
569d3645 88static int ip6_dst_gc(struct dst_ops *ops);
1da177e4
LT
89
90static int ip6_pkt_discard(struct sk_buff *skb);
ede2059d 91static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
7150aede 92static int ip6_pkt_prohibit(struct sk_buff *skb);
ede2059d 93static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
1da177e4 94static void ip6_link_failure(struct sk_buff *skb);
6700c270
DM
95static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
4b32b5ad 99static void rt6_dst_from_metrics_check(struct rt6_info *rt);
52bd4c0c 100static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
16a16cd3
DA
101static size_t rt6_nlmsg_size(struct rt6_info *rt);
102static int rt6_fill_node(struct net *net,
103 struct sk_buff *skb, struct rt6_info *rt,
104 struct in6_addr *dst, struct in6_addr *src,
105 int iif, int type, u32 portid, u32 seq,
106 unsigned int flags);
1da177e4 107
70ceb4f5 108#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 109static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 110 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
111 const struct in6_addr *gwaddr,
112 struct net_device *dev,
95c96174 113 unsigned int pref);
efa2cea0 114static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 115 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
116 const struct in6_addr *gwaddr,
117 struct net_device *dev);
70ceb4f5
YH
118#endif
119
8d0b94af
MKL
120struct uncached_list {
121 spinlock_t lock;
122 struct list_head head;
123};
124
125static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
126
127static void rt6_uncached_list_add(struct rt6_info *rt)
128{
129 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
130
131 rt->dst.flags |= DST_NOCACHE;
132 rt->rt6i_uncached_list = ul;
133
134 spin_lock_bh(&ul->lock);
135 list_add_tail(&rt->rt6i_uncached, &ul->head);
136 spin_unlock_bh(&ul->lock);
137}
138
139static void rt6_uncached_list_del(struct rt6_info *rt)
140{
141 if (!list_empty(&rt->rt6i_uncached)) {
142 struct uncached_list *ul = rt->rt6i_uncached_list;
143
144 spin_lock_bh(&ul->lock);
145 list_del(&rt->rt6i_uncached);
146 spin_unlock_bh(&ul->lock);
147 }
148}
149
150static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
151{
152 struct net_device *loopback_dev = net->loopback_dev;
153 int cpu;
154
e332bc67
EB
155 if (dev == loopback_dev)
156 return;
157
8d0b94af
MKL
158 for_each_possible_cpu(cpu) {
159 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
160 struct rt6_info *rt;
161
162 spin_lock_bh(&ul->lock);
163 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
164 struct inet6_dev *rt_idev = rt->rt6i_idev;
165 struct net_device *rt_dev = rt->dst.dev;
166
e332bc67 167 if (rt_idev->dev == dev) {
8d0b94af
MKL
168 rt->rt6i_idev = in6_dev_get(loopback_dev);
169 in6_dev_put(rt_idev);
170 }
171
e332bc67 172 if (rt_dev == dev) {
8d0b94af
MKL
173 rt->dst.dev = loopback_dev;
174 dev_hold(rt->dst.dev);
175 dev_put(rt_dev);
176 }
177 }
178 spin_unlock_bh(&ul->lock);
179 }
180}
181
d52d3997
MKL
182static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
183{
184 return dst_metrics_write_ptr(rt->dst.from);
185}
186
06582540
DM
187static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
188{
4b32b5ad 189 struct rt6_info *rt = (struct rt6_info *)dst;
06582540 190
d52d3997
MKL
191 if (rt->rt6i_flags & RTF_PCPU)
192 return rt6_pcpu_cow_metrics(rt);
193 else if (rt->rt6i_flags & RTF_CACHE)
4b32b5ad
MKL
194 return NULL;
195 else
3b471175 196 return dst_cow_metrics_generic(dst, old);
06582540
DM
197}
198
f894cbf8
DM
199static inline const void *choose_neigh_daddr(struct rt6_info *rt,
200 struct sk_buff *skb,
201 const void *daddr)
39232973
DM
202{
203 struct in6_addr *p = &rt->rt6i_gateway;
204
a7563f34 205 if (!ipv6_addr_any(p))
39232973 206 return (const void *) p;
f894cbf8
DM
207 else if (skb)
208 return &ipv6_hdr(skb)->daddr;
39232973
DM
209 return daddr;
210}
211
f894cbf8
DM
212static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
213 struct sk_buff *skb,
214 const void *daddr)
d3aaeb38 215{
39232973
DM
216 struct rt6_info *rt = (struct rt6_info *) dst;
217 struct neighbour *n;
218
f894cbf8 219 daddr = choose_neigh_daddr(rt, skb, daddr);
8e022ee6 220 n = __ipv6_neigh_lookup(dst->dev, daddr);
f83c7790
DM
221 if (n)
222 return n;
223 return neigh_create(&nd_tbl, daddr, dst->dev);
224}
225
63fca65d
JA
226static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
227{
228 struct net_device *dev = dst->dev;
229 struct rt6_info *rt = (struct rt6_info *)dst;
230
231 daddr = choose_neigh_daddr(rt, NULL, daddr);
232 if (!daddr)
233 return;
234 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
235 return;
236 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
237 return;
238 __ipv6_confirm_neigh(dev, daddr);
239}
240
9a7ec3a9 241static struct dst_ops ip6_dst_ops_template = {
1da177e4 242 .family = AF_INET6,
1da177e4
LT
243 .gc = ip6_dst_gc,
244 .gc_thresh = 1024,
245 .check = ip6_dst_check,
0dbaee3b 246 .default_advmss = ip6_default_advmss,
ebb762f2 247 .mtu = ip6_mtu,
06582540 248 .cow_metrics = ipv6_cow_metrics,
1da177e4
LT
249 .destroy = ip6_dst_destroy,
250 .ifdown = ip6_dst_ifdown,
251 .negative_advice = ip6_negative_advice,
252 .link_failure = ip6_link_failure,
253 .update_pmtu = ip6_rt_update_pmtu,
6e157b6a 254 .redirect = rt6_do_redirect,
9f8955cc 255 .local_out = __ip6_local_out,
d3aaeb38 256 .neigh_lookup = ip6_neigh_lookup,
63fca65d 257 .confirm_neigh = ip6_confirm_neigh,
1da177e4
LT
258};
259
ebb762f2 260static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
ec831ea7 261{
618f9bc7
SK
262 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
263
264 return mtu ? : dst->dev->mtu;
ec831ea7
RD
265}
266
6700c270
DM
267static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
268 struct sk_buff *skb, u32 mtu)
14e50e57
DM
269{
270}
271
6700c270
DM
272static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
273 struct sk_buff *skb)
b587ee3b
DM
274{
275}
276
14e50e57
DM
277static struct dst_ops ip6_dst_blackhole_ops = {
278 .family = AF_INET6,
14e50e57
DM
279 .destroy = ip6_dst_destroy,
280 .check = ip6_dst_check,
ebb762f2 281 .mtu = ip6_blackhole_mtu,
214f45c9 282 .default_advmss = ip6_default_advmss,
14e50e57 283 .update_pmtu = ip6_rt_blackhole_update_pmtu,
b587ee3b 284 .redirect = ip6_rt_blackhole_redirect,
0a1f5962 285 .cow_metrics = dst_cow_metrics_generic,
d3aaeb38 286 .neigh_lookup = ip6_neigh_lookup,
14e50e57
DM
287};
288
62fa8a84 289static const u32 ip6_template_metrics[RTAX_MAX] = {
14edd87d 290 [RTAX_HOPLIMIT - 1] = 0,
62fa8a84
DM
291};
292
fb0af4c7 293static const struct rt6_info ip6_null_entry_template = {
d8d1f30b
CG
294 .dst = {
295 .__refcnt = ATOMIC_INIT(1),
296 .__use = 1,
2c20cbd7 297 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 298 .error = -ENETUNREACH,
d8d1f30b
CG
299 .input = ip6_pkt_discard,
300 .output = ip6_pkt_discard_out,
1da177e4
LT
301 },
302 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 303 .rt6i_protocol = RTPROT_KERNEL,
1da177e4
LT
304 .rt6i_metric = ~(u32) 0,
305 .rt6i_ref = ATOMIC_INIT(1),
306};
307
101367c2
TG
308#ifdef CONFIG_IPV6_MULTIPLE_TABLES
309
fb0af4c7 310static const struct rt6_info ip6_prohibit_entry_template = {
d8d1f30b
CG
311 .dst = {
312 .__refcnt = ATOMIC_INIT(1),
313 .__use = 1,
2c20cbd7 314 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 315 .error = -EACCES,
d8d1f30b
CG
316 .input = ip6_pkt_prohibit,
317 .output = ip6_pkt_prohibit_out,
101367c2
TG
318 },
319 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 320 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
321 .rt6i_metric = ~(u32) 0,
322 .rt6i_ref = ATOMIC_INIT(1),
323};
324
fb0af4c7 325static const struct rt6_info ip6_blk_hole_entry_template = {
d8d1f30b
CG
326 .dst = {
327 .__refcnt = ATOMIC_INIT(1),
328 .__use = 1,
2c20cbd7 329 .obsolete = DST_OBSOLETE_FORCE_CHK,
d8d1f30b 330 .error = -EINVAL,
d8d1f30b 331 .input = dst_discard,
ede2059d 332 .output = dst_discard_out,
101367c2
TG
333 },
334 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
4f724279 335 .rt6i_protocol = RTPROT_KERNEL,
101367c2
TG
336 .rt6i_metric = ~(u32) 0,
337 .rt6i_ref = ATOMIC_INIT(1),
338};
339
340#endif
341
ebfa45f0
MKL
342static void rt6_info_init(struct rt6_info *rt)
343{
344 struct dst_entry *dst = &rt->dst;
345
346 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
347 INIT_LIST_HEAD(&rt->rt6i_siblings);
348 INIT_LIST_HEAD(&rt->rt6i_uncached);
349}
350
1da177e4 351/* allocate dst with ip6_dst_ops */
d52d3997
MKL
352static struct rt6_info *__ip6_dst_alloc(struct net *net,
353 struct net_device *dev,
ad706862 354 int flags)
1da177e4 355{
97bab73f 356 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1cfb71ee 357 1, DST_OBSOLETE_FORCE_CHK, flags);
cf911662 358
ebfa45f0
MKL
359 if (rt)
360 rt6_info_init(rt);
8104891b 361
cf911662 362 return rt;
1da177e4
LT
363}
364
9ab179d8
DA
365struct rt6_info *ip6_dst_alloc(struct net *net,
366 struct net_device *dev,
367 int flags)
d52d3997 368{
ad706862 369 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
d52d3997
MKL
370
371 if (rt) {
372 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
373 if (rt->rt6i_pcpu) {
374 int cpu;
375
376 for_each_possible_cpu(cpu) {
377 struct rt6_info **p;
378
379 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
380 /* no one shares rt */
381 *p = NULL;
382 }
383 } else {
1cfb71ee
WW
384 dst_release(&rt->dst);
385 if (!(flags & DST_NOCACHE))
386 dst_destroy((struct dst_entry *)rt);
d52d3997
MKL
387 return NULL;
388 }
389 }
390
391 return rt;
392}
9ab179d8 393EXPORT_SYMBOL(ip6_dst_alloc);
d52d3997 394
1da177e4
LT
395static void ip6_dst_destroy(struct dst_entry *dst)
396{
397 struct rt6_info *rt = (struct rt6_info *)dst;
ecd98837 398 struct dst_entry *from = dst->from;
8d0b94af 399 struct inet6_dev *idev;
1da177e4 400
4b32b5ad 401 dst_destroy_metrics_generic(dst);
87775312 402 free_percpu(rt->rt6i_pcpu);
8d0b94af
MKL
403 rt6_uncached_list_del(rt);
404
405 idev = rt->rt6i_idev;
38308473 406 if (idev) {
1da177e4
LT
407 rt->rt6i_idev = NULL;
408 in6_dev_put(idev);
1ab1457c 409 }
1716a961 410
ecd98837
YH
411 dst->from = NULL;
412 dst_release(from);
b3419363
DM
413}
414
1da177e4
LT
415static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
416 int how)
417{
418 struct rt6_info *rt = (struct rt6_info *)dst;
419 struct inet6_dev *idev = rt->rt6i_idev;
5a3e55d6 420 struct net_device *loopback_dev =
c346dca1 421 dev_net(dev)->loopback_dev;
1da177e4 422
97cac082
DM
423 if (dev != loopback_dev) {
424 if (idev && idev->dev == dev) {
425 struct inet6_dev *loopback_idev =
426 in6_dev_get(loopback_dev);
427 if (loopback_idev) {
428 rt->rt6i_idev = loopback_idev;
429 in6_dev_put(idev);
430 }
431 }
1da177e4
LT
432 }
433}
434
5973fb1e
MKL
435static bool __rt6_check_expired(const struct rt6_info *rt)
436{
437 if (rt->rt6i_flags & RTF_EXPIRES)
438 return time_after(jiffies, rt->dst.expires);
439 else
440 return false;
441}
442
a50feda5 443static bool rt6_check_expired(const struct rt6_info *rt)
1da177e4 444{
1716a961
G
445 if (rt->rt6i_flags & RTF_EXPIRES) {
446 if (time_after(jiffies, rt->dst.expires))
a50feda5 447 return true;
1716a961 448 } else if (rt->dst.from) {
3fd91fb3 449 return rt6_check_expired((struct rt6_info *) rt->dst.from);
1716a961 450 }
a50feda5 451 return false;
1da177e4
LT
452}
453
51ebd318
ND
454/* Multipath route selection:
455 * Hash based function using packet header and flowlabel.
456 * Adapted from fib_info_hashfn()
457 */
458static int rt6_info_hash_nhsfn(unsigned int candidate_count,
459 const struct flowi6 *fl6)
460{
644d0e65 461 return get_hash_from_flowi6(fl6) % candidate_count;
51ebd318
ND
462}
463
464static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
52bd4c0c
ND
465 struct flowi6 *fl6, int oif,
466 int strict)
51ebd318
ND
467{
468 struct rt6_info *sibling, *next_sibling;
469 int route_choosen;
470
471 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
472 /* Don't change the route, if route_choosen == 0
473 * (siblings does not include ourself)
474 */
475 if (route_choosen)
476 list_for_each_entry_safe(sibling, next_sibling,
477 &match->rt6i_siblings, rt6i_siblings) {
478 route_choosen--;
479 if (route_choosen == 0) {
52bd4c0c
ND
480 if (rt6_score_route(sibling, oif, strict) < 0)
481 break;
51ebd318
ND
482 match = sibling;
483 break;
484 }
485 }
486 return match;
487}
488
1da177e4 489/*
c71099ac 490 * Route lookup. Any table->tb6_lock is implied.
1da177e4
LT
491 */
492
8ed67789
DL
493static inline struct rt6_info *rt6_device_match(struct net *net,
494 struct rt6_info *rt,
b71d1d42 495 const struct in6_addr *saddr,
1da177e4 496 int oif,
d420895e 497 int flags)
1da177e4
LT
498{
499 struct rt6_info *local = NULL;
500 struct rt6_info *sprt;
501
dd3abc4e
YH
502 if (!oif && ipv6_addr_any(saddr))
503 goto out;
504
d8d1f30b 505 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
d1918542 506 struct net_device *dev = sprt->dst.dev;
dd3abc4e
YH
507
508 if (oif) {
1da177e4
LT
509 if (dev->ifindex == oif)
510 return sprt;
511 if (dev->flags & IFF_LOOPBACK) {
38308473 512 if (!sprt->rt6i_idev ||
1da177e4 513 sprt->rt6i_idev->dev->ifindex != oif) {
17fb0b2b 514 if (flags & RT6_LOOKUP_F_IFACE)
1da177e4 515 continue;
17fb0b2b
DA
516 if (local &&
517 local->rt6i_idev->dev->ifindex == oif)
1da177e4
LT
518 continue;
519 }
520 local = sprt;
521 }
dd3abc4e
YH
522 } else {
523 if (ipv6_chk_addr(net, saddr, dev,
524 flags & RT6_LOOKUP_F_IFACE))
525 return sprt;
1da177e4 526 }
dd3abc4e 527 }
1da177e4 528
dd3abc4e 529 if (oif) {
1da177e4
LT
530 if (local)
531 return local;
532
d420895e 533 if (flags & RT6_LOOKUP_F_IFACE)
8ed67789 534 return net->ipv6.ip6_null_entry;
1da177e4 535 }
dd3abc4e 536out:
1da177e4
LT
537 return rt;
538}
539
27097255 540#ifdef CONFIG_IPV6_ROUTER_PREF
c2f17e82
HFS
541struct __rt6_probe_work {
542 struct work_struct work;
543 struct in6_addr target;
544 struct net_device *dev;
545};
546
547static void rt6_probe_deferred(struct work_struct *w)
548{
549 struct in6_addr mcaddr;
550 struct __rt6_probe_work *work =
551 container_of(w, struct __rt6_probe_work, work);
552
553 addrconf_addr_solict_mult(&work->target, &mcaddr);
adc176c5 554 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
c2f17e82 555 dev_put(work->dev);
662f5533 556 kfree(work);
c2f17e82
HFS
557}
558
27097255
YH
559static void rt6_probe(struct rt6_info *rt)
560{
990edb42 561 struct __rt6_probe_work *work;
f2c31e32 562 struct neighbour *neigh;
27097255
YH
563 /*
564 * Okay, this does not seem to be appropriate
565 * for now, however, we need to check if it
566 * is really so; aka Router Reachability Probing.
567 *
568 * Router Reachability Probe MUST be rate-limited
569 * to no more than one per minute.
570 */
2152caea 571 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
7ff74a59 572 return;
2152caea
YH
573 rcu_read_lock_bh();
574 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
575 if (neigh) {
8d6c31bf
MKL
576 if (neigh->nud_state & NUD_VALID)
577 goto out;
578
990edb42 579 work = NULL;
2152caea 580 write_lock(&neigh->lock);
990edb42
MKL
581 if (!(neigh->nud_state & NUD_VALID) &&
582 time_after(jiffies,
583 neigh->updated +
584 rt->rt6i_idev->cnf.rtr_probe_interval)) {
585 work = kmalloc(sizeof(*work), GFP_ATOMIC);
586 if (work)
587 __neigh_set_probe_once(neigh);
c2f17e82 588 }
2152caea 589 write_unlock(&neigh->lock);
990edb42
MKL
590 } else {
591 work = kmalloc(sizeof(*work), GFP_ATOMIC);
f2c31e32 592 }
990edb42
MKL
593
594 if (work) {
595 INIT_WORK(&work->work, rt6_probe_deferred);
596 work->target = rt->rt6i_gateway;
597 dev_hold(rt->dst.dev);
598 work->dev = rt->dst.dev;
599 schedule_work(&work->work);
600 }
601
8d6c31bf 602out:
2152caea 603 rcu_read_unlock_bh();
27097255
YH
604}
605#else
606static inline void rt6_probe(struct rt6_info *rt)
607{
27097255
YH
608}
609#endif
610
1da177e4 611/*
554cfb7e 612 * Default Router Selection (RFC 2461 6.3.6)
1da177e4 613 */
b6f99a21 614static inline int rt6_check_dev(struct rt6_info *rt, int oif)
554cfb7e 615{
d1918542 616 struct net_device *dev = rt->dst.dev;
161980f4 617 if (!oif || dev->ifindex == oif)
554cfb7e 618 return 2;
161980f4
DM
619 if ((dev->flags & IFF_LOOPBACK) &&
620 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
621 return 1;
622 return 0;
554cfb7e 623}
1da177e4 624
afc154e9 625static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
1da177e4 626{
f2c31e32 627 struct neighbour *neigh;
afc154e9 628 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
f2c31e32 629
4d0c5911
YH
630 if (rt->rt6i_flags & RTF_NONEXTHOP ||
631 !(rt->rt6i_flags & RTF_GATEWAY))
afc154e9 632 return RT6_NUD_SUCCEED;
145a3621
YH
633
634 rcu_read_lock_bh();
635 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
636 if (neigh) {
637 read_lock(&neigh->lock);
554cfb7e 638 if (neigh->nud_state & NUD_VALID)
afc154e9 639 ret = RT6_NUD_SUCCEED;
398bcbeb 640#ifdef CONFIG_IPV6_ROUTER_PREF
a5a81f0b 641 else if (!(neigh->nud_state & NUD_FAILED))
afc154e9 642 ret = RT6_NUD_SUCCEED;
7e980569
JB
643 else
644 ret = RT6_NUD_FAIL_PROBE;
398bcbeb 645#endif
145a3621 646 read_unlock(&neigh->lock);
afc154e9
HFS
647 } else {
648 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
7e980569 649 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
a5a81f0b 650 }
145a3621
YH
651 rcu_read_unlock_bh();
652
a5a81f0b 653 return ret;
1da177e4
LT
654}
655
554cfb7e
YH
656static int rt6_score_route(struct rt6_info *rt, int oif,
657 int strict)
1da177e4 658{
a5a81f0b 659 int m;
1ab1457c 660
4d0c5911 661 m = rt6_check_dev(rt, oif);
77d16f45 662 if (!m && (strict & RT6_LOOKUP_F_IFACE))
afc154e9 663 return RT6_NUD_FAIL_HARD;
ebacaaa0
YH
664#ifdef CONFIG_IPV6_ROUTER_PREF
665 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
666#endif
afc154e9
HFS
667 if (strict & RT6_LOOKUP_F_REACHABLE) {
668 int n = rt6_check_neigh(rt);
669 if (n < 0)
670 return n;
671 }
554cfb7e
YH
672 return m;
673}
674
f11e6659 675static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
afc154e9
HFS
676 int *mpri, struct rt6_info *match,
677 bool *do_rr)
554cfb7e 678{
f11e6659 679 int m;
afc154e9 680 bool match_do_rr = false;
35103d11
AG
681 struct inet6_dev *idev = rt->rt6i_idev;
682 struct net_device *dev = rt->dst.dev;
683
684 if (dev && !netif_carrier_ok(dev) &&
d5d32e4b
DA
685 idev->cnf.ignore_routes_with_linkdown &&
686 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
35103d11 687 goto out;
f11e6659
DM
688
689 if (rt6_check_expired(rt))
690 goto out;
691
692 m = rt6_score_route(rt, oif, strict);
7e980569 693 if (m == RT6_NUD_FAIL_DO_RR) {
afc154e9
HFS
694 match_do_rr = true;
695 m = 0; /* lowest valid score */
7e980569 696 } else if (m == RT6_NUD_FAIL_HARD) {
f11e6659 697 goto out;
afc154e9
HFS
698 }
699
700 if (strict & RT6_LOOKUP_F_REACHABLE)
701 rt6_probe(rt);
f11e6659 702
7e980569 703 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
f11e6659 704 if (m > *mpri) {
afc154e9 705 *do_rr = match_do_rr;
f11e6659
DM
706 *mpri = m;
707 match = rt;
f11e6659 708 }
f11e6659
DM
709out:
710 return match;
711}
712
713static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
714 struct rt6_info *rr_head,
afc154e9
HFS
715 u32 metric, int oif, int strict,
716 bool *do_rr)
f11e6659 717{
9fbdcfaf 718 struct rt6_info *rt, *match, *cont;
554cfb7e 719 int mpri = -1;
1da177e4 720
f11e6659 721 match = NULL;
9fbdcfaf
SK
722 cont = NULL;
723 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
724 if (rt->rt6i_metric != metric) {
725 cont = rt;
726 break;
727 }
728
729 match = find_match(rt, oif, strict, &mpri, match, do_rr);
730 }
731
732 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
733 if (rt->rt6i_metric != metric) {
734 cont = rt;
735 break;
736 }
737
afc154e9 738 match = find_match(rt, oif, strict, &mpri, match, do_rr);
9fbdcfaf
SK
739 }
740
741 if (match || !cont)
742 return match;
743
744 for (rt = cont; rt; rt = rt->dst.rt6_next)
afc154e9 745 match = find_match(rt, oif, strict, &mpri, match, do_rr);
1da177e4 746
f11e6659
DM
747 return match;
748}
1da177e4 749
f11e6659
DM
750static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
751{
752 struct rt6_info *match, *rt0;
8ed67789 753 struct net *net;
afc154e9 754 bool do_rr = false;
1da177e4 755
f11e6659
DM
756 rt0 = fn->rr_ptr;
757 if (!rt0)
758 fn->rr_ptr = rt0 = fn->leaf;
1da177e4 759
afc154e9
HFS
760 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
761 &do_rr);
1da177e4 762
afc154e9 763 if (do_rr) {
d8d1f30b 764 struct rt6_info *next = rt0->dst.rt6_next;
f11e6659 765
554cfb7e 766 /* no entries matched; do round-robin */
f11e6659
DM
767 if (!next || next->rt6i_metric != rt0->rt6i_metric)
768 next = fn->leaf;
769
770 if (next != rt0)
771 fn->rr_ptr = next;
1da177e4 772 }
1da177e4 773
d1918542 774 net = dev_net(rt0->dst.dev);
a02cec21 775 return match ? match : net->ipv6.ip6_null_entry;
1da177e4
LT
776}
777
8b9df265
MKL
778static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
779{
780 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
781}
782
70ceb4f5
YH
783#ifdef CONFIG_IPV6_ROUTE_INFO
784int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
b71d1d42 785 const struct in6_addr *gwaddr)
70ceb4f5 786{
c346dca1 787 struct net *net = dev_net(dev);
70ceb4f5
YH
788 struct route_info *rinfo = (struct route_info *) opt;
789 struct in6_addr prefix_buf, *prefix;
790 unsigned int pref;
4bed72e4 791 unsigned long lifetime;
70ceb4f5
YH
792 struct rt6_info *rt;
793
794 if (len < sizeof(struct route_info)) {
795 return -EINVAL;
796 }
797
798 /* Sanity check for prefix_len and length */
799 if (rinfo->length > 3) {
800 return -EINVAL;
801 } else if (rinfo->prefix_len > 128) {
802 return -EINVAL;
803 } else if (rinfo->prefix_len > 64) {
804 if (rinfo->length < 2) {
805 return -EINVAL;
806 }
807 } else if (rinfo->prefix_len > 0) {
808 if (rinfo->length < 1) {
809 return -EINVAL;
810 }
811 }
812
813 pref = rinfo->route_pref;
814 if (pref == ICMPV6_ROUTER_PREF_INVALID)
3933fc95 815 return -EINVAL;
70ceb4f5 816
4bed72e4 817 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
70ceb4f5
YH
818
819 if (rinfo->length == 3)
820 prefix = (struct in6_addr *)rinfo->prefix;
821 else {
822 /* this function is safe */
823 ipv6_addr_prefix(&prefix_buf,
824 (struct in6_addr *)rinfo->prefix,
825 rinfo->prefix_len);
826 prefix = &prefix_buf;
827 }
828
f104a567
DJ
829 if (rinfo->prefix_len == 0)
830 rt = rt6_get_dflt_router(gwaddr, dev);
831 else
832 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
830218c1 833 gwaddr, dev);
70ceb4f5
YH
834
835 if (rt && !lifetime) {
e0a1ad73 836 ip6_del_rt(rt);
70ceb4f5
YH
837 rt = NULL;
838 }
839
840 if (!rt && lifetime)
830218c1
DA
841 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
842 dev, pref);
70ceb4f5
YH
843 else if (rt)
844 rt->rt6i_flags = RTF_ROUTEINFO |
845 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
846
847 if (rt) {
1716a961
G
848 if (!addrconf_finite_timeout(lifetime))
849 rt6_clean_expires(rt);
850 else
851 rt6_set_expires(rt, jiffies + HZ * lifetime);
852
94e187c0 853 ip6_rt_put(rt);
70ceb4f5
YH
854 }
855 return 0;
856}
857#endif
858
a3c00e46
MKL
859static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
860 struct in6_addr *saddr)
861{
862 struct fib6_node *pn;
863 while (1) {
864 if (fn->fn_flags & RTN_TL_ROOT)
865 return NULL;
866 pn = fn->parent;
867 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
868 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
869 else
870 fn = pn;
871 if (fn->fn_flags & RTN_RTINFO)
872 return fn;
873 }
874}
c71099ac 875
8ed67789
DL
876static struct rt6_info *ip6_pol_route_lookup(struct net *net,
877 struct fib6_table *table,
4c9483b2 878 struct flowi6 *fl6, int flags)
1da177e4
LT
879{
880 struct fib6_node *fn;
881 struct rt6_info *rt;
882
c71099ac 883 read_lock_bh(&table->tb6_lock);
4c9483b2 884 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
c71099ac
TG
885restart:
886 rt = fn->leaf;
4c9483b2 887 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
51ebd318 888 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
52bd4c0c 889 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
a3c00e46
MKL
890 if (rt == net->ipv6.ip6_null_entry) {
891 fn = fib6_backtrack(fn, &fl6->saddr);
892 if (fn)
893 goto restart;
894 }
d8d1f30b 895 dst_use(&rt->dst, jiffies);
c71099ac 896 read_unlock_bh(&table->tb6_lock);
b811580d
DA
897
898 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
899
c71099ac
TG
900 return rt;
901
902}
903
67ba4152 904struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
ea6e574e
FW
905 int flags)
906{
907 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
908}
909EXPORT_SYMBOL_GPL(ip6_route_lookup);
910
9acd9f3a
YH
911struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
912 const struct in6_addr *saddr, int oif, int strict)
c71099ac 913{
4c9483b2
DM
914 struct flowi6 fl6 = {
915 .flowi6_oif = oif,
916 .daddr = *daddr,
c71099ac
TG
917 };
918 struct dst_entry *dst;
77d16f45 919 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
c71099ac 920
adaa70bb 921 if (saddr) {
4c9483b2 922 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
adaa70bb
TG
923 flags |= RT6_LOOKUP_F_HAS_SADDR;
924 }
925
4c9483b2 926 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
c71099ac
TG
927 if (dst->error == 0)
928 return (struct rt6_info *) dst;
929
930 dst_release(dst);
931
1da177e4
LT
932 return NULL;
933}
7159039a
YH
934EXPORT_SYMBOL(rt6_lookup);
935
c71099ac 936/* ip6_ins_rt is called with FREE table->tb6_lock.
1cfb71ee
WW
937 * It takes new route entry, the addition fails by any reason the
938 * route is released.
939 * Caller must hold dst before calling it.
1da177e4
LT
940 */
941
e5fd387a 942static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
333c4301
DA
943 struct mx6_config *mxc,
944 struct netlink_ext_ack *extack)
1da177e4
LT
945{
946 int err;
c71099ac 947 struct fib6_table *table;
1da177e4 948
c71099ac
TG
949 table = rt->rt6i_table;
950 write_lock_bh(&table->tb6_lock);
333c4301 951 err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
c71099ac 952 write_unlock_bh(&table->tb6_lock);
1da177e4
LT
953
954 return err;
955}
956
40e22e8f
TG
957int ip6_ins_rt(struct rt6_info *rt)
958{
e715b6d3
FW
959 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
960 struct mx6_config mxc = { .mx = NULL, };
961
1cfb71ee
WW
962 /* Hold dst to account for the reference from the fib6 tree */
963 dst_hold(&rt->dst);
333c4301 964 return __ip6_ins_rt(rt, &info, &mxc, NULL);
40e22e8f
TG
965}
966
8b9df265
MKL
967static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
968 const struct in6_addr *daddr,
969 const struct in6_addr *saddr)
1da177e4 970{
1da177e4
LT
971 struct rt6_info *rt;
972
973 /*
974 * Clone the route.
975 */
976
d52d3997 977 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
83a09abd 978 ort = (struct rt6_info *)ort->dst.from;
1da177e4 979
ad706862 980 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
83a09abd
MKL
981
982 if (!rt)
983 return NULL;
984
985 ip6_rt_copy_init(rt, ort);
986 rt->rt6i_flags |= RTF_CACHE;
987 rt->rt6i_metric = 0;
988 rt->dst.flags |= DST_HOST;
989 rt->rt6i_dst.addr = *daddr;
990 rt->rt6i_dst.plen = 128;
1da177e4 991
83a09abd
MKL
992 if (!rt6_is_gw_or_nonexthop(ort)) {
993 if (ort->rt6i_dst.plen != 128 &&
994 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
995 rt->rt6i_flags |= RTF_ANYCAST;
1da177e4 996#ifdef CONFIG_IPV6_SUBTREES
83a09abd
MKL
997 if (rt->rt6i_src.plen && saddr) {
998 rt->rt6i_src.addr = *saddr;
999 rt->rt6i_src.plen = 128;
8b9df265 1000 }
83a09abd 1001#endif
95a9a5ba 1002 }
1da177e4 1003
95a9a5ba
YH
1004 return rt;
1005}
1da177e4 1006
d52d3997
MKL
1007static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
1008{
1009 struct rt6_info *pcpu_rt;
1010
1011 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
ad706862 1012 rt->dst.dev, rt->dst.flags);
d52d3997
MKL
1013
1014 if (!pcpu_rt)
1015 return NULL;
1016 ip6_rt_copy_init(pcpu_rt, rt);
1017 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1018 pcpu_rt->rt6i_flags |= RTF_PCPU;
1019 return pcpu_rt;
1020}
1021
1022/* It should be called with read_lock_bh(&tb6_lock) acquired */
1023static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1024{
a73e4195 1025 struct rt6_info *pcpu_rt, **p;
d52d3997
MKL
1026
1027 p = this_cpu_ptr(rt->rt6i_pcpu);
1028 pcpu_rt = *p;
1029
a73e4195
MKL
1030 if (pcpu_rt) {
1031 dst_hold(&pcpu_rt->dst);
1032 rt6_dst_from_metrics_check(pcpu_rt);
1033 }
1034 return pcpu_rt;
1035}
1036
1037static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1038{
9c7370a1 1039 struct fib6_table *table = rt->rt6i_table;
a73e4195 1040 struct rt6_info *pcpu_rt, *prev, **p;
d52d3997
MKL
1041
1042 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1043 if (!pcpu_rt) {
1044 struct net *net = dev_net(rt->dst.dev);
1045
9c7370a1
MKL
1046 dst_hold(&net->ipv6.ip6_null_entry->dst);
1047 return net->ipv6.ip6_null_entry;
d52d3997
MKL
1048 }
1049
9c7370a1
MKL
1050 read_lock_bh(&table->tb6_lock);
1051 if (rt->rt6i_pcpu) {
1052 p = this_cpu_ptr(rt->rt6i_pcpu);
1053 prev = cmpxchg(p, NULL, pcpu_rt);
1054 if (prev) {
1055 /* If someone did it before us, return prev instead */
1cfb71ee 1056 dst_release(&pcpu_rt->dst);
9c7370a1
MKL
1057 dst_destroy(&pcpu_rt->dst);
1058 pcpu_rt = prev;
1059 }
1060 } else {
1061 /* rt has been removed from the fib6 tree
1062 * before we have a chance to acquire the read_lock.
1063 * In this case, don't brother to create a pcpu rt
1064 * since rt is going away anyway. The next
1065 * dst_check() will trigger a re-lookup.
1066 */
1cfb71ee 1067 dst_release(&pcpu_rt->dst);
d52d3997 1068 dst_destroy(&pcpu_rt->dst);
9c7370a1 1069 pcpu_rt = rt;
d52d3997 1070 }
d52d3997
MKL
1071 dst_hold(&pcpu_rt->dst);
1072 rt6_dst_from_metrics_check(pcpu_rt);
9c7370a1 1073 read_unlock_bh(&table->tb6_lock);
d52d3997
MKL
1074 return pcpu_rt;
1075}
1076
9ff74384
DA
1077struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1078 int oif, struct flowi6 *fl6, int flags)
1da177e4 1079{
367efcb9 1080 struct fib6_node *fn, *saved_fn;
45e4fd26 1081 struct rt6_info *rt;
c71099ac 1082 int strict = 0;
1da177e4 1083
77d16f45 1084 strict |= flags & RT6_LOOKUP_F_IFACE;
d5d32e4b 1085 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
367efcb9
MKL
1086 if (net->ipv6.devconf_all->forwarding == 0)
1087 strict |= RT6_LOOKUP_F_REACHABLE;
1da177e4 1088
c71099ac 1089 read_lock_bh(&table->tb6_lock);
1da177e4 1090
4c9483b2 1091 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
367efcb9 1092 saved_fn = fn;
1da177e4 1093
ca254490
DA
1094 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1095 oif = 0;
1096
a3c00e46 1097redo_rt6_select:
367efcb9 1098 rt = rt6_select(fn, oif, strict);
52bd4c0c 1099 if (rt->rt6i_nsiblings)
367efcb9 1100 rt = rt6_multipath_select(rt, fl6, oif, strict);
a3c00e46
MKL
1101 if (rt == net->ipv6.ip6_null_entry) {
1102 fn = fib6_backtrack(fn, &fl6->saddr);
1103 if (fn)
1104 goto redo_rt6_select;
367efcb9
MKL
1105 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1106 /* also consider unreachable route */
1107 strict &= ~RT6_LOOKUP_F_REACHABLE;
1108 fn = saved_fn;
1109 goto redo_rt6_select;
367efcb9 1110 }
a3c00e46
MKL
1111 }
1112
fb9de91e 1113
3da59bd9 1114 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
d52d3997
MKL
1115 dst_use(&rt->dst, jiffies);
1116 read_unlock_bh(&table->tb6_lock);
1117
1118 rt6_dst_from_metrics_check(rt);
b811580d
DA
1119
1120 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
d52d3997 1121 return rt;
3da59bd9
MKL
1122 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1123 !(rt->rt6i_flags & RTF_GATEWAY))) {
1124 /* Create a RTF_CACHE clone which will not be
1125 * owned by the fib6 tree. It is for the special case where
1126 * the daddr in the skb during the neighbor look-up is different
1127 * from the fl6->daddr used to look-up route here.
1128 */
1129
1130 struct rt6_info *uncached_rt;
1131
d52d3997
MKL
1132 dst_use(&rt->dst, jiffies);
1133 read_unlock_bh(&table->tb6_lock);
1134
3da59bd9
MKL
1135 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1136 dst_release(&rt->dst);
c71099ac 1137
1cfb71ee
WW
1138 if (uncached_rt) {
1139 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc()
1140 * No need for another dst_hold()
1141 */
8d0b94af 1142 rt6_uncached_list_add(uncached_rt);
1cfb71ee 1143 } else {
3da59bd9 1144 uncached_rt = net->ipv6.ip6_null_entry;
1cfb71ee
WW
1145 dst_hold(&uncached_rt->dst);
1146 }
b811580d
DA
1147
1148 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
3da59bd9 1149 return uncached_rt;
3da59bd9 1150
d52d3997
MKL
1151 } else {
1152 /* Get a percpu copy */
1153
1154 struct rt6_info *pcpu_rt;
1155
1156 rt->dst.lastuse = jiffies;
1157 rt->dst.__use++;
1158 pcpu_rt = rt6_get_pcpu_route(rt);
d52d3997 1159
9c7370a1
MKL
1160 if (pcpu_rt) {
1161 read_unlock_bh(&table->tb6_lock);
1162 } else {
1163 /* We have to do the read_unlock first
1164 * because rt6_make_pcpu_route() may trigger
1165 * ip6_dst_gc() which will take the write_lock.
1166 */
1167 dst_hold(&rt->dst);
1168 read_unlock_bh(&table->tb6_lock);
a73e4195 1169 pcpu_rt = rt6_make_pcpu_route(rt);
9c7370a1
MKL
1170 dst_release(&rt->dst);
1171 }
d52d3997 1172
b811580d 1173 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
d52d3997 1174 return pcpu_rt;
9c7370a1 1175
d52d3997 1176 }
1da177e4 1177}
9ff74384 1178EXPORT_SYMBOL_GPL(ip6_pol_route);
1da177e4 1179
8ed67789 1180static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
4c9483b2 1181 struct flowi6 *fl6, int flags)
4acad72d 1182{
4c9483b2 1183 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
4acad72d
PE
1184}
1185
d409b847
MB
1186struct dst_entry *ip6_route_input_lookup(struct net *net,
1187 struct net_device *dev,
1188 struct flowi6 *fl6, int flags)
72331bc0
SL
1189{
1190 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1191 flags |= RT6_LOOKUP_F_IFACE;
1192
1193 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1194}
d409b847 1195EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
72331bc0 1196
c71099ac
TG
1197void ip6_route_input(struct sk_buff *skb)
1198{
b71d1d42 1199 const struct ipv6hdr *iph = ipv6_hdr(skb);
c346dca1 1200 struct net *net = dev_net(skb->dev);
adaa70bb 1201 int flags = RT6_LOOKUP_F_HAS_SADDR;
904af04d 1202 struct ip_tunnel_info *tun_info;
4c9483b2 1203 struct flowi6 fl6 = {
e0d56fdd 1204 .flowi6_iif = skb->dev->ifindex,
4c9483b2
DM
1205 .daddr = iph->daddr,
1206 .saddr = iph->saddr,
6502ca52 1207 .flowlabel = ip6_flowinfo(iph),
4c9483b2
DM
1208 .flowi6_mark = skb->mark,
1209 .flowi6_proto = iph->nexthdr,
c71099ac 1210 };
adaa70bb 1211
904af04d 1212 tun_info = skb_tunnel_info(skb);
46fa062a 1213 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
904af04d 1214 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
06e9d040 1215 skb_dst_drop(skb);
72331bc0 1216 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
c71099ac
TG
1217}
1218
8ed67789 1219static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
4c9483b2 1220 struct flowi6 *fl6, int flags)
1da177e4 1221{
4c9483b2 1222 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
c71099ac
TG
1223}
1224
6f21c96a
PA
1225struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1226 struct flowi6 *fl6, int flags)
c71099ac 1227{
d46a9d67 1228 bool any_src;
c71099ac 1229
4c1feac5
DA
1230 if (rt6_need_strict(&fl6->daddr)) {
1231 struct dst_entry *dst;
1232
1233 dst = l3mdev_link_scope_lookup(net, fl6);
1234 if (dst)
1235 return dst;
1236 }
ca254490 1237
1fb9489b 1238 fl6->flowi6_iif = LOOPBACK_IFINDEX;
4dc27d1c 1239
d46a9d67 1240 any_src = ipv6_addr_any(&fl6->saddr);
741a11d9 1241 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
d46a9d67 1242 (fl6->flowi6_oif && any_src))
77d16f45 1243 flags |= RT6_LOOKUP_F_IFACE;
c71099ac 1244
d46a9d67 1245 if (!any_src)
adaa70bb 1246 flags |= RT6_LOOKUP_F_HAS_SADDR;
0c9a2ac1
YH
1247 else if (sk)
1248 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
adaa70bb 1249
4c9483b2 1250 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1da177e4 1251}
6f21c96a 1252EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1da177e4 1253
2774c131 1254struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
14e50e57 1255{
5c1e6aa3 1256 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1dbe3252 1257 struct net_device *loopback_dev = net->loopback_dev;
14e50e57
DM
1258 struct dst_entry *new = NULL;
1259
1dbe3252
WW
1260
1261 rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
1262 DST_OBSOLETE_NONE, 0);
14e50e57 1263 if (rt) {
0a1f5962 1264 rt6_info_init(rt);
8104891b 1265
0a1f5962 1266 new = &rt->dst;
14e50e57 1267 new->__use = 1;
352e512c 1268 new->input = dst_discard;
ede2059d 1269 new->output = dst_discard_out;
14e50e57 1270
0a1f5962 1271 dst_copy_metrics(new, &ort->dst);
14e50e57 1272
1dbe3252 1273 rt->rt6i_idev = in6_dev_get(loopback_dev);
4e3fd7a0 1274 rt->rt6i_gateway = ort->rt6i_gateway;
0a1f5962 1275 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
14e50e57
DM
1276 rt->rt6i_metric = 0;
1277
1278 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1279#ifdef CONFIG_IPV6_SUBTREES
1280 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1281#endif
1282
1283 dst_free(new);
1284 }
1285
69ead7af
DM
1286 dst_release(dst_orig);
1287 return new ? new : ERR_PTR(-ENOMEM);
14e50e57 1288}
14e50e57 1289
1da177e4
LT
1290/*
1291 * Destination cache support functions
1292 */
1293
4b32b5ad
MKL
1294static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1295{
1296 if (rt->dst.from &&
1297 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1298 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1299}
1300
3da59bd9
MKL
1301static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1302{
1303 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1304 return NULL;
1305
1306 if (rt6_check_expired(rt))
1307 return NULL;
1308
1309 return &rt->dst;
1310}
1311
1312static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1313{
5973fb1e
MKL
1314 if (!__rt6_check_expired(rt) &&
1315 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
3da59bd9
MKL
1316 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1317 return &rt->dst;
1318 else
1319 return NULL;
1320}
1321
1da177e4
LT
1322static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1323{
1324 struct rt6_info *rt;
1325
1326 rt = (struct rt6_info *) dst;
1327
6f3118b5
ND
1328 /* All IPV6 dsts are created with ->obsolete set to the value
1329 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1330 * into this function always.
1331 */
e3bc10bd 1332
4b32b5ad
MKL
1333 rt6_dst_from_metrics_check(rt);
1334
02bcf4e0
MKL
1335 if (rt->rt6i_flags & RTF_PCPU ||
1336 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
3da59bd9
MKL
1337 return rt6_dst_from_check(rt, cookie);
1338 else
1339 return rt6_check(rt, cookie);
1da177e4
LT
1340}
1341
1342static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1343{
1344 struct rt6_info *rt = (struct rt6_info *) dst;
1345
1346 if (rt) {
54c1a859
YH
1347 if (rt->rt6i_flags & RTF_CACHE) {
1348 if (rt6_check_expired(rt)) {
1349 ip6_del_rt(rt);
1350 dst = NULL;
1351 }
1352 } else {
1da177e4 1353 dst_release(dst);
54c1a859
YH
1354 dst = NULL;
1355 }
1da177e4 1356 }
54c1a859 1357 return dst;
1da177e4
LT
1358}
1359
1360static void ip6_link_failure(struct sk_buff *skb)
1361{
1362 struct rt6_info *rt;
1363
3ffe533c 1364 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1da177e4 1365
adf30907 1366 rt = (struct rt6_info *) skb_dst(skb);
1da177e4 1367 if (rt) {
1eb4f758 1368 if (rt->rt6i_flags & RTF_CACHE) {
ad65a2f0
WW
1369 if (dst_hold_safe(&rt->dst))
1370 ip6_del_rt(rt);
1eb4f758 1371 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1da177e4 1372 rt->rt6i_node->fn_sernum = -1;
1eb4f758 1373 }
1da177e4
LT
1374 }
1375}
1376
45e4fd26
MKL
1377static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1378{
1379 struct net *net = dev_net(rt->dst.dev);
1380
1381 rt->rt6i_flags |= RTF_MODIFIED;
1382 rt->rt6i_pmtu = mtu;
1383 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1384}
1385
0d3f6d29
MKL
1386static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1387{
1388 return !(rt->rt6i_flags & RTF_CACHE) &&
1389 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1390}
1391
45e4fd26
MKL
1392static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1393 const struct ipv6hdr *iph, u32 mtu)
1da177e4 1394{
0dec879f 1395 const struct in6_addr *daddr, *saddr;
67ba4152 1396 struct rt6_info *rt6 = (struct rt6_info *)dst;
1da177e4 1397
45e4fd26
MKL
1398 if (rt6->rt6i_flags & RTF_LOCAL)
1399 return;
81aded24 1400
19bda36c
XL
1401 if (dst_metric_locked(dst, RTAX_MTU))
1402 return;
1403
0dec879f
JA
1404 if (iph) {
1405 daddr = &iph->daddr;
1406 saddr = &iph->saddr;
1407 } else if (sk) {
1408 daddr = &sk->sk_v6_daddr;
1409 saddr = &inet6_sk(sk)->saddr;
1410 } else {
1411 daddr = NULL;
1412 saddr = NULL;
1413 }
1414 dst_confirm_neigh(dst, daddr);
45e4fd26
MKL
1415 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1416 if (mtu >= dst_mtu(dst))
1417 return;
9d289715 1418
0d3f6d29 1419 if (!rt6_cache_allowed_for_pmtu(rt6)) {
45e4fd26 1420 rt6_do_update_pmtu(rt6, mtu);
0dec879f 1421 } else if (daddr) {
45e4fd26
MKL
1422 struct rt6_info *nrt6;
1423
45e4fd26
MKL
1424 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1425 if (nrt6) {
1426 rt6_do_update_pmtu(nrt6, mtu);
1427
1428 /* ip6_ins_rt(nrt6) will bump the
1429 * rt6->rt6i_node->fn_sernum
1430 * which will fail the next rt6_check() and
1431 * invalidate the sk->sk_dst_cache.
1432 */
1433 ip6_ins_rt(nrt6);
1cfb71ee
WW
1434 /* Release the reference taken in
1435 * ip6_rt_cache_alloc()
1436 */
1437 dst_release(&nrt6->dst);
45e4fd26 1438 }
1da177e4
LT
1439 }
1440}
1441
45e4fd26
MKL
1442static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1443 struct sk_buff *skb, u32 mtu)
1444{
1445 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1446}
1447
42ae66c8 1448void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
e2d118a1 1449 int oif, u32 mark, kuid_t uid)
81aded24
DM
1450{
1451 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1452 struct dst_entry *dst;
1453 struct flowi6 fl6;
1454
1455 memset(&fl6, 0, sizeof(fl6));
1456 fl6.flowi6_oif = oif;
1b3c61dc 1457 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
81aded24
DM
1458 fl6.daddr = iph->daddr;
1459 fl6.saddr = iph->saddr;
6502ca52 1460 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1461 fl6.flowi6_uid = uid;
81aded24
DM
1462
1463 dst = ip6_route_output(net, NULL, &fl6);
1464 if (!dst->error)
45e4fd26 1465 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
81aded24
DM
1466 dst_release(dst);
1467}
1468EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1469
1470void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1471{
33c162a9
MKL
1472 struct dst_entry *dst;
1473
81aded24 1474 ip6_update_pmtu(skb, sock_net(sk), mtu,
e2d118a1 1475 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
33c162a9
MKL
1476
1477 dst = __sk_dst_get(sk);
1478 if (!dst || !dst->obsolete ||
1479 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1480 return;
1481
1482 bh_lock_sock(sk);
1483 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1484 ip6_datagram_dst_update(sk, false);
1485 bh_unlock_sock(sk);
81aded24
DM
1486}
1487EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1488
b55b76b2
DJ
1489/* Handle redirects */
1490struct ip6rd_flowi {
1491 struct flowi6 fl6;
1492 struct in6_addr gateway;
1493};
1494
1495static struct rt6_info *__ip6_route_redirect(struct net *net,
1496 struct fib6_table *table,
1497 struct flowi6 *fl6,
1498 int flags)
1499{
1500 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1501 struct rt6_info *rt;
1502 struct fib6_node *fn;
1503
1504 /* Get the "current" route for this destination and
67c408cf 1505 * check if the redirect has come from appropriate router.
b55b76b2
DJ
1506 *
1507 * RFC 4861 specifies that redirects should only be
1508 * accepted if they come from the nexthop to the target.
1509 * Due to the way the routes are chosen, this notion
1510 * is a bit fuzzy and one might need to check all possible
1511 * routes.
1512 */
1513
1514 read_lock_bh(&table->tb6_lock);
1515 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1516restart:
1517 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1518 if (rt6_check_expired(rt))
1519 continue;
1520 if (rt->dst.error)
1521 break;
1522 if (!(rt->rt6i_flags & RTF_GATEWAY))
1523 continue;
1524 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1525 continue;
1526 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1527 continue;
1528 break;
1529 }
1530
1531 if (!rt)
1532 rt = net->ipv6.ip6_null_entry;
1533 else if (rt->dst.error) {
1534 rt = net->ipv6.ip6_null_entry;
b0a1ba59
MKL
1535 goto out;
1536 }
1537
1538 if (rt == net->ipv6.ip6_null_entry) {
a3c00e46
MKL
1539 fn = fib6_backtrack(fn, &fl6->saddr);
1540 if (fn)
1541 goto restart;
b55b76b2 1542 }
a3c00e46 1543
b0a1ba59 1544out:
b55b76b2
DJ
1545 dst_hold(&rt->dst);
1546
1547 read_unlock_bh(&table->tb6_lock);
1548
b811580d 1549 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
b55b76b2
DJ
1550 return rt;
1551};
1552
1553static struct dst_entry *ip6_route_redirect(struct net *net,
1554 const struct flowi6 *fl6,
1555 const struct in6_addr *gateway)
1556{
1557 int flags = RT6_LOOKUP_F_HAS_SADDR;
1558 struct ip6rd_flowi rdfl;
1559
1560 rdfl.fl6 = *fl6;
1561 rdfl.gateway = *gateway;
1562
1563 return fib6_rule_lookup(net, &rdfl.fl6,
1564 flags, __ip6_route_redirect);
1565}
1566
e2d118a1
LC
1567void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1568 kuid_t uid)
3a5ad2ee
DM
1569{
1570 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1571 struct dst_entry *dst;
1572 struct flowi6 fl6;
1573
1574 memset(&fl6, 0, sizeof(fl6));
e374c618 1575 fl6.flowi6_iif = LOOPBACK_IFINDEX;
3a5ad2ee
DM
1576 fl6.flowi6_oif = oif;
1577 fl6.flowi6_mark = mark;
3a5ad2ee
DM
1578 fl6.daddr = iph->daddr;
1579 fl6.saddr = iph->saddr;
6502ca52 1580 fl6.flowlabel = ip6_flowinfo(iph);
e2d118a1 1581 fl6.flowi6_uid = uid;
3a5ad2ee 1582
b55b76b2
DJ
1583 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1584 rt6_do_redirect(dst, NULL, skb);
3a5ad2ee
DM
1585 dst_release(dst);
1586}
1587EXPORT_SYMBOL_GPL(ip6_redirect);
1588
c92a59ec
DJ
1589void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1590 u32 mark)
1591{
1592 const struct ipv6hdr *iph = ipv6_hdr(skb);
1593 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1594 struct dst_entry *dst;
1595 struct flowi6 fl6;
1596
1597 memset(&fl6, 0, sizeof(fl6));
e374c618 1598 fl6.flowi6_iif = LOOPBACK_IFINDEX;
c92a59ec
DJ
1599 fl6.flowi6_oif = oif;
1600 fl6.flowi6_mark = mark;
c92a59ec
DJ
1601 fl6.daddr = msg->dest;
1602 fl6.saddr = iph->daddr;
e2d118a1 1603 fl6.flowi6_uid = sock_net_uid(net, NULL);
c92a59ec 1604
b55b76b2
DJ
1605 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1606 rt6_do_redirect(dst, NULL, skb);
c92a59ec
DJ
1607 dst_release(dst);
1608}
1609
3a5ad2ee
DM
1610void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1611{
e2d118a1
LC
1612 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1613 sk->sk_uid);
3a5ad2ee
DM
1614}
1615EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1616
0dbaee3b 1617static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1da177e4 1618{
0dbaee3b
DM
1619 struct net_device *dev = dst->dev;
1620 unsigned int mtu = dst_mtu(dst);
1621 struct net *net = dev_net(dev);
1622
1da177e4
LT
1623 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1624
5578689a
DL
1625 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1626 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1da177e4
LT
1627
1628 /*
1ab1457c
YH
1629 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1630 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1631 * IPV6_MAXPLEN is also valid and means: "any MSS,
1da177e4
LT
1632 * rely only on pmtu discovery"
1633 */
1634 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1635 mtu = IPV6_MAXPLEN;
1636 return mtu;
1637}
1638
ebb762f2 1639static unsigned int ip6_mtu(const struct dst_entry *dst)
d33e4553 1640{
4b32b5ad
MKL
1641 const struct rt6_info *rt = (const struct rt6_info *)dst;
1642 unsigned int mtu = rt->rt6i_pmtu;
d33e4553 1643 struct inet6_dev *idev;
618f9bc7 1644
4b32b5ad
MKL
1645 if (mtu)
1646 goto out;
1647
1648 mtu = dst_metric_raw(dst, RTAX_MTU);
618f9bc7 1649 if (mtu)
30f78d8e 1650 goto out;
618f9bc7
SK
1651
1652 mtu = IPV6_MIN_MTU;
d33e4553
DM
1653
1654 rcu_read_lock();
1655 idev = __in6_dev_get(dst->dev);
1656 if (idev)
1657 mtu = idev->cnf.mtu6;
1658 rcu_read_unlock();
1659
30f78d8e 1660out:
14972cbd
RP
1661 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1662
1663 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
d33e4553
DM
1664}
1665
3b00944c
YH
1666static struct dst_entry *icmp6_dst_gc_list;
1667static DEFINE_SPINLOCK(icmp6_dst_lock);
5d0bbeeb 1668
3b00944c 1669struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
87a11578 1670 struct flowi6 *fl6)
1da177e4 1671{
87a11578 1672 struct dst_entry *dst;
1da177e4
LT
1673 struct rt6_info *rt;
1674 struct inet6_dev *idev = in6_dev_get(dev);
c346dca1 1675 struct net *net = dev_net(dev);
1da177e4 1676
38308473 1677 if (unlikely(!idev))
122bdf67 1678 return ERR_PTR(-ENODEV);
1da177e4 1679
ad706862 1680 rt = ip6_dst_alloc(net, dev, 0);
38308473 1681 if (unlikely(!rt)) {
1da177e4 1682 in6_dev_put(idev);
87a11578 1683 dst = ERR_PTR(-ENOMEM);
1da177e4
LT
1684 goto out;
1685 }
1686
8e2ec639
YZ
1687 rt->dst.flags |= DST_HOST;
1688 rt->dst.output = ip6_output;
550bab42 1689 rt->rt6i_gateway = fl6->daddr;
87a11578 1690 rt->rt6i_dst.addr = fl6->daddr;
8e2ec639
YZ
1691 rt->rt6i_dst.plen = 128;
1692 rt->rt6i_idev = idev;
14edd87d 1693 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1da177e4 1694
3b00944c 1695 spin_lock_bh(&icmp6_dst_lock);
d8d1f30b
CG
1696 rt->dst.next = icmp6_dst_gc_list;
1697 icmp6_dst_gc_list = &rt->dst;
3b00944c 1698 spin_unlock_bh(&icmp6_dst_lock);
1da177e4 1699
5578689a 1700 fib6_force_start_gc(net);
1da177e4 1701
87a11578
DM
1702 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1703
1da177e4 1704out:
87a11578 1705 return dst;
1da177e4
LT
1706}
1707
3d0f24a7 1708int icmp6_dst_gc(void)
1da177e4 1709{
e9476e95 1710 struct dst_entry *dst, **pprev;
3d0f24a7 1711 int more = 0;
1da177e4 1712
3b00944c
YH
1713 spin_lock_bh(&icmp6_dst_lock);
1714 pprev = &icmp6_dst_gc_list;
5d0bbeeb 1715
1da177e4
LT
1716 while ((dst = *pprev) != NULL) {
1717 if (!atomic_read(&dst->__refcnt)) {
1718 *pprev = dst->next;
1719 dst_free(dst);
1da177e4
LT
1720 } else {
1721 pprev = &dst->next;
3d0f24a7 1722 ++more;
1da177e4
LT
1723 }
1724 }
1725
3b00944c 1726 spin_unlock_bh(&icmp6_dst_lock);
5d0bbeeb 1727
3d0f24a7 1728 return more;
1da177e4
LT
1729}
1730
1e493d19
DM
1731static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1732 void *arg)
1733{
1734 struct dst_entry *dst, **pprev;
1735
1736 spin_lock_bh(&icmp6_dst_lock);
1737 pprev = &icmp6_dst_gc_list;
1738 while ((dst = *pprev) != NULL) {
1739 struct rt6_info *rt = (struct rt6_info *) dst;
1740 if (func(rt, arg)) {
1741 *pprev = dst->next;
1742 dst_free(dst);
1743 } else {
1744 pprev = &dst->next;
1745 }
1746 }
1747 spin_unlock_bh(&icmp6_dst_lock);
1748}
1749
569d3645 1750static int ip6_dst_gc(struct dst_ops *ops)
1da177e4 1751{
86393e52 1752 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
7019b78e
DL
1753 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1754 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1755 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1756 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1757 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
fc66f95c 1758 int entries;
7019b78e 1759
fc66f95c 1760 entries = dst_entries_get_fast(ops);
49a18d86 1761 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
fc66f95c 1762 entries <= rt_max_size)
1da177e4
LT
1763 goto out;
1764
6891a346 1765 net->ipv6.ip6_rt_gc_expire++;
14956643 1766 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
fc66f95c
ED
1767 entries = dst_entries_get_slow(ops);
1768 if (entries < ops->gc_thresh)
7019b78e 1769 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1da177e4 1770out:
7019b78e 1771 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
fc66f95c 1772 return entries > rt_max_size;
1da177e4
LT
1773}
1774
e715b6d3
FW
1775static int ip6_convert_metrics(struct mx6_config *mxc,
1776 const struct fib6_config *cfg)
1777{
c3a8d947 1778 bool ecn_ca = false;
e715b6d3
FW
1779 struct nlattr *nla;
1780 int remaining;
1781 u32 *mp;
1782
63159f29 1783 if (!cfg->fc_mx)
e715b6d3
FW
1784 return 0;
1785
1786 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1787 if (unlikely(!mp))
1788 return -ENOMEM;
1789
1790 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1791 int type = nla_type(nla);
1bb14807 1792 u32 val;
e715b6d3 1793
1bb14807
DB
1794 if (!type)
1795 continue;
1796 if (unlikely(type > RTAX_MAX))
1797 goto err;
ea697639 1798
1bb14807
DB
1799 if (type == RTAX_CC_ALGO) {
1800 char tmp[TCP_CA_NAME_MAX];
e715b6d3 1801
1bb14807 1802 nla_strlcpy(tmp, nla, sizeof(tmp));
c3a8d947 1803 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1bb14807
DB
1804 if (val == TCP_CA_UNSPEC)
1805 goto err;
1806 } else {
1807 val = nla_get_u32(nla);
e715b6d3 1808 }
626abd59
PA
1809 if (type == RTAX_HOPLIMIT && val > 255)
1810 val = 255;
b8d3e416
DB
1811 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1812 goto err;
1bb14807
DB
1813
1814 mp[type - 1] = val;
1815 __set_bit(type - 1, mxc->mx_valid);
e715b6d3
FW
1816 }
1817
c3a8d947
DB
1818 if (ecn_ca) {
1819 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1820 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1821 }
e715b6d3 1822
c3a8d947 1823 mxc->mx = mp;
e715b6d3
FW
1824 return 0;
1825 err:
1826 kfree(mp);
1827 return -EINVAL;
1828}
1da177e4 1829
8c14586f
DA
1830static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1831 struct fib6_config *cfg,
1832 const struct in6_addr *gw_addr)
1833{
1834 struct flowi6 fl6 = {
1835 .flowi6_oif = cfg->fc_ifindex,
1836 .daddr = *gw_addr,
1837 .saddr = cfg->fc_prefsrc,
1838 };
1839 struct fib6_table *table;
1840 struct rt6_info *rt;
d5d32e4b 1841 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
8c14586f
DA
1842
1843 table = fib6_get_table(net, cfg->fc_table);
1844 if (!table)
1845 return NULL;
1846
1847 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1848 flags |= RT6_LOOKUP_F_HAS_SADDR;
1849
1850 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1851
1852 /* if table lookup failed, fall back to full lookup */
1853 if (rt == net->ipv6.ip6_null_entry) {
1854 ip6_rt_put(rt);
1855 rt = NULL;
1856 }
1857
1858 return rt;
1859}
1860
333c4301
DA
1861static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
1862 struct netlink_ext_ack *extack)
1da177e4 1863{
5578689a 1864 struct net *net = cfg->fc_nlinfo.nl_net;
1da177e4
LT
1865 struct rt6_info *rt = NULL;
1866 struct net_device *dev = NULL;
1867 struct inet6_dev *idev = NULL;
c71099ac 1868 struct fib6_table *table;
1da177e4 1869 int addr_type;
8c5b83f0 1870 int err = -EINVAL;
1da177e4 1871
557c44be 1872 /* RTF_PCPU is an internal flag; can not be set by userspace */
d5d531cb
DA
1873 if (cfg->fc_flags & RTF_PCPU) {
1874 NL_SET_ERR_MSG(extack, "Userspace can not set RTF_PCPU");
557c44be 1875 goto out;
d5d531cb 1876 }
557c44be 1877
d5d531cb
DA
1878 if (cfg->fc_dst_len > 128) {
1879 NL_SET_ERR_MSG(extack, "Invalid prefix length");
1880 goto out;
1881 }
1882 if (cfg->fc_src_len > 128) {
1883 NL_SET_ERR_MSG(extack, "Invalid source address length");
8c5b83f0 1884 goto out;
d5d531cb 1885 }
1da177e4 1886#ifndef CONFIG_IPV6_SUBTREES
d5d531cb
DA
1887 if (cfg->fc_src_len) {
1888 NL_SET_ERR_MSG(extack,
1889 "Specifying source address requires IPV6_SUBTREES to be enabled");
8c5b83f0 1890 goto out;
d5d531cb 1891 }
1da177e4 1892#endif
86872cb5 1893 if (cfg->fc_ifindex) {
1da177e4 1894 err = -ENODEV;
5578689a 1895 dev = dev_get_by_index(net, cfg->fc_ifindex);
1da177e4
LT
1896 if (!dev)
1897 goto out;
1898 idev = in6_dev_get(dev);
1899 if (!idev)
1900 goto out;
1901 }
1902
86872cb5
TG
1903 if (cfg->fc_metric == 0)
1904 cfg->fc_metric = IP6_RT_PRIO_USER;
1da177e4 1905
d71314b4 1906 err = -ENOBUFS;
38308473
DM
1907 if (cfg->fc_nlinfo.nlh &&
1908 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
d71314b4 1909 table = fib6_get_table(net, cfg->fc_table);
38308473 1910 if (!table) {
f3213831 1911 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
d71314b4
MV
1912 table = fib6_new_table(net, cfg->fc_table);
1913 }
1914 } else {
1915 table = fib6_new_table(net, cfg->fc_table);
1916 }
38308473
DM
1917
1918 if (!table)
c71099ac 1919 goto out;
c71099ac 1920
ad706862
MKL
1921 rt = ip6_dst_alloc(net, NULL,
1922 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1da177e4 1923
38308473 1924 if (!rt) {
1da177e4
LT
1925 err = -ENOMEM;
1926 goto out;
1927 }
1928
1716a961
G
1929 if (cfg->fc_flags & RTF_EXPIRES)
1930 rt6_set_expires(rt, jiffies +
1931 clock_t_to_jiffies(cfg->fc_expires));
1932 else
1933 rt6_clean_expires(rt);
1da177e4 1934
86872cb5
TG
1935 if (cfg->fc_protocol == RTPROT_UNSPEC)
1936 cfg->fc_protocol = RTPROT_BOOT;
1937 rt->rt6i_protocol = cfg->fc_protocol;
1938
1939 addr_type = ipv6_addr_type(&cfg->fc_dst);
1da177e4
LT
1940
1941 if (addr_type & IPV6_ADDR_MULTICAST)
d8d1f30b 1942 rt->dst.input = ip6_mc_input;
ab79ad14
1943 else if (cfg->fc_flags & RTF_LOCAL)
1944 rt->dst.input = ip6_input;
1da177e4 1945 else
d8d1f30b 1946 rt->dst.input = ip6_forward;
1da177e4 1947
d8d1f30b 1948 rt->dst.output = ip6_output;
1da177e4 1949
19e42e45
RP
1950 if (cfg->fc_encap) {
1951 struct lwtunnel_state *lwtstate;
1952
30357d7d 1953 err = lwtunnel_build_state(cfg->fc_encap_type,
127eb7cd 1954 cfg->fc_encap, AF_INET6, cfg,
9ae28727 1955 &lwtstate, extack);
19e42e45
RP
1956 if (err)
1957 goto out;
61adedf3
JB
1958 rt->dst.lwtstate = lwtstate_get(lwtstate);
1959 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1960 rt->dst.lwtstate->orig_output = rt->dst.output;
1961 rt->dst.output = lwtunnel_output;
25368623 1962 }
61adedf3
JB
1963 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1964 rt->dst.lwtstate->orig_input = rt->dst.input;
1965 rt->dst.input = lwtunnel_input;
25368623 1966 }
19e42e45
RP
1967 }
1968
86872cb5
TG
1969 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1970 rt->rt6i_dst.plen = cfg->fc_dst_len;
afc4eef8 1971 if (rt->rt6i_dst.plen == 128)
e5fd387a 1972 rt->dst.flags |= DST_HOST;
e5fd387a 1973
1da177e4 1974#ifdef CONFIG_IPV6_SUBTREES
86872cb5
TG
1975 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1976 rt->rt6i_src.plen = cfg->fc_src_len;
1da177e4
LT
1977#endif
1978
86872cb5 1979 rt->rt6i_metric = cfg->fc_metric;
1da177e4
LT
1980
1981 /* We cannot add true routes via loopback here,
1982 they would result in kernel looping; promote them to reject routes
1983 */
86872cb5 1984 if ((cfg->fc_flags & RTF_REJECT) ||
38308473
DM
1985 (dev && (dev->flags & IFF_LOOPBACK) &&
1986 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1987 !(cfg->fc_flags & RTF_LOCAL))) {
1da177e4 1988 /* hold loopback dev/idev if we haven't done so. */
5578689a 1989 if (dev != net->loopback_dev) {
1da177e4
LT
1990 if (dev) {
1991 dev_put(dev);
1992 in6_dev_put(idev);
1993 }
5578689a 1994 dev = net->loopback_dev;
1da177e4
LT
1995 dev_hold(dev);
1996 idev = in6_dev_get(dev);
1997 if (!idev) {
1998 err = -ENODEV;
1999 goto out;
2000 }
2001 }
1da177e4 2002 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
ef2c7d7b
ND
2003 switch (cfg->fc_type) {
2004 case RTN_BLACKHOLE:
2005 rt->dst.error = -EINVAL;
ede2059d 2006 rt->dst.output = dst_discard_out;
7150aede 2007 rt->dst.input = dst_discard;
ef2c7d7b
ND
2008 break;
2009 case RTN_PROHIBIT:
2010 rt->dst.error = -EACCES;
7150aede
K
2011 rt->dst.output = ip6_pkt_prohibit_out;
2012 rt->dst.input = ip6_pkt_prohibit;
ef2c7d7b 2013 break;
b4949ab2 2014 case RTN_THROW:
0315e382 2015 case RTN_UNREACHABLE:
ef2c7d7b 2016 default:
7150aede 2017 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
0315e382
NF
2018 : (cfg->fc_type == RTN_UNREACHABLE)
2019 ? -EHOSTUNREACH : -ENETUNREACH;
7150aede
K
2020 rt->dst.output = ip6_pkt_discard_out;
2021 rt->dst.input = ip6_pkt_discard;
ef2c7d7b
ND
2022 break;
2023 }
1da177e4
LT
2024 goto install_route;
2025 }
2026
86872cb5 2027 if (cfg->fc_flags & RTF_GATEWAY) {
b71d1d42 2028 const struct in6_addr *gw_addr;
1da177e4
LT
2029 int gwa_type;
2030
86872cb5 2031 gw_addr = &cfg->fc_gateway;
330567b7 2032 gwa_type = ipv6_addr_type(gw_addr);
48ed7b26
FW
2033
2034 /* if gw_addr is local we will fail to detect this in case
2035 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2036 * will return already-added prefix route via interface that
2037 * prefix route was assigned to, which might be non-loopback.
2038 */
2039 err = -EINVAL;
330567b7
FW
2040 if (ipv6_chk_addr_and_flags(net, gw_addr,
2041 gwa_type & IPV6_ADDR_LINKLOCAL ?
d5d531cb
DA
2042 dev : NULL, 0, 0)) {
2043 NL_SET_ERR_MSG(extack, "Invalid gateway address");
48ed7b26 2044 goto out;
d5d531cb 2045 }
4e3fd7a0 2046 rt->rt6i_gateway = *gw_addr;
1da177e4
LT
2047
2048 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
8c14586f 2049 struct rt6_info *grt = NULL;
1da177e4
LT
2050
2051 /* IPv6 strictly inhibits using not link-local
2052 addresses as nexthop address.
2053 Otherwise, router will not able to send redirects.
2054 It is very good, but in some (rare!) circumstances
2055 (SIT, PtP, NBMA NOARP links) it is handy to allow
2056 some exceptions. --ANK
96d5822c
EN
2057 We allow IPv4-mapped nexthops to support RFC4798-type
2058 addressing
1da177e4 2059 */
96d5822c 2060 if (!(gwa_type & (IPV6_ADDR_UNICAST |
d5d531cb
DA
2061 IPV6_ADDR_MAPPED))) {
2062 NL_SET_ERR_MSG(extack,
2063 "Invalid gateway address");
1da177e4 2064 goto out;
d5d531cb 2065 }
1da177e4 2066
a435a07f 2067 if (cfg->fc_table) {
8c14586f
DA
2068 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2069
a435a07f
VB
2070 if (grt) {
2071 if (grt->rt6i_flags & RTF_GATEWAY ||
2072 (dev && dev != grt->dst.dev)) {
2073 ip6_rt_put(grt);
2074 grt = NULL;
2075 }
2076 }
2077 }
2078
8c14586f
DA
2079 if (!grt)
2080 grt = rt6_lookup(net, gw_addr, NULL,
2081 cfg->fc_ifindex, 1);
1da177e4
LT
2082
2083 err = -EHOSTUNREACH;
38308473 2084 if (!grt)
1da177e4
LT
2085 goto out;
2086 if (dev) {
d1918542 2087 if (dev != grt->dst.dev) {
94e187c0 2088 ip6_rt_put(grt);
1da177e4
LT
2089 goto out;
2090 }
2091 } else {
d1918542 2092 dev = grt->dst.dev;
1da177e4
LT
2093 idev = grt->rt6i_idev;
2094 dev_hold(dev);
2095 in6_dev_hold(grt->rt6i_idev);
2096 }
38308473 2097 if (!(grt->rt6i_flags & RTF_GATEWAY))
1da177e4 2098 err = 0;
94e187c0 2099 ip6_rt_put(grt);
1da177e4
LT
2100
2101 if (err)
2102 goto out;
2103 }
2104 err = -EINVAL;
d5d531cb
DA
2105 if (!dev) {
2106 NL_SET_ERR_MSG(extack, "Egress device not specified");
2107 goto out;
2108 } else if (dev->flags & IFF_LOOPBACK) {
2109 NL_SET_ERR_MSG(extack,
2110 "Egress device can not be loopback device for this route");
1da177e4 2111 goto out;
d5d531cb 2112 }
1da177e4
LT
2113 }
2114
2115 err = -ENODEV;
38308473 2116 if (!dev)
1da177e4
LT
2117 goto out;
2118
c3968a85
DW
2119 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2120 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
d5d531cb 2121 NL_SET_ERR_MSG(extack, "Invalid source address");
c3968a85
DW
2122 err = -EINVAL;
2123 goto out;
2124 }
4e3fd7a0 2125 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
c3968a85
DW
2126 rt->rt6i_prefsrc.plen = 128;
2127 } else
2128 rt->rt6i_prefsrc.plen = 0;
2129
86872cb5 2130 rt->rt6i_flags = cfg->fc_flags;
1da177e4
LT
2131
2132install_route:
d8d1f30b 2133 rt->dst.dev = dev;
1da177e4 2134 rt->rt6i_idev = idev;
c71099ac 2135 rt->rt6i_table = table;
63152fc0 2136
c346dca1 2137 cfg->fc_nlinfo.nl_net = dev_net(dev);
63152fc0 2138
8c5b83f0 2139 return rt;
6b9ea5a6
RP
2140out:
2141 if (dev)
2142 dev_put(dev);
2143 if (idev)
2144 in6_dev_put(idev);
1cfb71ee
WW
2145 if (rt) {
2146 dst_release(&rt->dst);
6b9ea5a6 2147 dst_free(&rt->dst);
1cfb71ee 2148 }
6b9ea5a6 2149
8c5b83f0 2150 return ERR_PTR(err);
6b9ea5a6
RP
2151}
2152
333c4301
DA
2153int ip6_route_add(struct fib6_config *cfg,
2154 struct netlink_ext_ack *extack)
6b9ea5a6
RP
2155{
2156 struct mx6_config mxc = { .mx = NULL, };
8c5b83f0 2157 struct rt6_info *rt;
6b9ea5a6
RP
2158 int err;
2159
333c4301 2160 rt = ip6_route_info_create(cfg, extack);
8c5b83f0
RP
2161 if (IS_ERR(rt)) {
2162 err = PTR_ERR(rt);
2163 rt = NULL;
6b9ea5a6 2164 goto out;
8c5b83f0 2165 }
6b9ea5a6 2166
e715b6d3
FW
2167 err = ip6_convert_metrics(&mxc, cfg);
2168 if (err)
2169 goto out;
1da177e4 2170
333c4301 2171 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc, extack);
e715b6d3
FW
2172
2173 kfree(mxc.mx);
6b9ea5a6 2174
e715b6d3 2175 return err;
1da177e4 2176out:
1cfb71ee
WW
2177 if (rt) {
2178 dst_release(&rt->dst);
d8d1f30b 2179 dst_free(&rt->dst);
1cfb71ee 2180 }
6b9ea5a6 2181
1da177e4
LT
2182 return err;
2183}
2184
86872cb5 2185static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1da177e4
LT
2186{
2187 int err;
c71099ac 2188 struct fib6_table *table;
d1918542 2189 struct net *net = dev_net(rt->dst.dev);
1da177e4 2190
8e3d5be7
MKL
2191 if (rt == net->ipv6.ip6_null_entry ||
2192 rt->dst.flags & DST_NOCACHE) {
6825a26c
G
2193 err = -ENOENT;
2194 goto out;
2195 }
6c813a72 2196
c71099ac
TG
2197 table = rt->rt6i_table;
2198 write_lock_bh(&table->tb6_lock);
86872cb5 2199 err = fib6_del(rt, info);
c71099ac 2200 write_unlock_bh(&table->tb6_lock);
1da177e4 2201
6825a26c 2202out:
94e187c0 2203 ip6_rt_put(rt);
1da177e4
LT
2204 return err;
2205}
2206
e0a1ad73
TG
2207int ip6_del_rt(struct rt6_info *rt)
2208{
4d1169c1 2209 struct nl_info info = {
d1918542 2210 .nl_net = dev_net(rt->dst.dev),
4d1169c1 2211 };
528c4ceb 2212 return __ip6_del_rt(rt, &info);
e0a1ad73
TG
2213}
2214
0ae81335
DA
2215static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
2216{
2217 struct nl_info *info = &cfg->fc_nlinfo;
e3330039 2218 struct net *net = info->nl_net;
16a16cd3 2219 struct sk_buff *skb = NULL;
0ae81335 2220 struct fib6_table *table;
e3330039 2221 int err = -ENOENT;
0ae81335 2222
e3330039
WC
2223 if (rt == net->ipv6.ip6_null_entry)
2224 goto out_put;
0ae81335
DA
2225 table = rt->rt6i_table;
2226 write_lock_bh(&table->tb6_lock);
2227
2228 if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
2229 struct rt6_info *sibling, *next_sibling;
2230
16a16cd3
DA
2231 /* prefer to send a single notification with all hops */
2232 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
2233 if (skb) {
2234 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2235
e3330039 2236 if (rt6_fill_node(net, skb, rt,
16a16cd3
DA
2237 NULL, NULL, 0, RTM_DELROUTE,
2238 info->portid, seq, 0) < 0) {
2239 kfree_skb(skb);
2240 skb = NULL;
2241 } else
2242 info->skip_notify = 1;
2243 }
2244
0ae81335
DA
2245 list_for_each_entry_safe(sibling, next_sibling,
2246 &rt->rt6i_siblings,
2247 rt6i_siblings) {
2248 err = fib6_del(sibling, info);
2249 if (err)
e3330039 2250 goto out_unlock;
0ae81335
DA
2251 }
2252 }
2253
2254 err = fib6_del(rt, info);
e3330039 2255out_unlock:
0ae81335 2256 write_unlock_bh(&table->tb6_lock);
e3330039 2257out_put:
0ae81335 2258 ip6_rt_put(rt);
16a16cd3
DA
2259
2260 if (skb) {
e3330039 2261 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
16a16cd3
DA
2262 info->nlh, gfp_any());
2263 }
0ae81335
DA
2264 return err;
2265}
2266
333c4301
DA
2267static int ip6_route_del(struct fib6_config *cfg,
2268 struct netlink_ext_ack *extack)
1da177e4 2269{
c71099ac 2270 struct fib6_table *table;
1da177e4
LT
2271 struct fib6_node *fn;
2272 struct rt6_info *rt;
2273 int err = -ESRCH;
2274
5578689a 2275 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
d5d531cb
DA
2276 if (!table) {
2277 NL_SET_ERR_MSG(extack, "FIB table does not exist");
c71099ac 2278 return err;
d5d531cb 2279 }
c71099ac
TG
2280
2281 read_lock_bh(&table->tb6_lock);
1da177e4 2282
c71099ac 2283 fn = fib6_locate(&table->tb6_root,
86872cb5
TG
2284 &cfg->fc_dst, cfg->fc_dst_len,
2285 &cfg->fc_src, cfg->fc_src_len);
1ab1457c 2286
1da177e4 2287 if (fn) {
d8d1f30b 2288 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1f56a01f
MKL
2289 if ((rt->rt6i_flags & RTF_CACHE) &&
2290 !(cfg->fc_flags & RTF_CACHE))
2291 continue;
86872cb5 2292 if (cfg->fc_ifindex &&
d1918542
DM
2293 (!rt->dst.dev ||
2294 rt->dst.dev->ifindex != cfg->fc_ifindex))
1da177e4 2295 continue;
86872cb5
TG
2296 if (cfg->fc_flags & RTF_GATEWAY &&
2297 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1da177e4 2298 continue;
86872cb5 2299 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1da177e4 2300 continue;
c2ed1880
M
2301 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2302 continue;
d8d1f30b 2303 dst_hold(&rt->dst);
c71099ac 2304 read_unlock_bh(&table->tb6_lock);
1da177e4 2305
0ae81335
DA
2306 /* if gateway was specified only delete the one hop */
2307 if (cfg->fc_flags & RTF_GATEWAY)
2308 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2309
2310 return __ip6_del_rt_siblings(rt, cfg);
1da177e4
LT
2311 }
2312 }
c71099ac 2313 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2314
2315 return err;
2316}
2317
6700c270 2318static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
a6279458 2319{
a6279458 2320 struct netevent_redirect netevent;
e8599ff4 2321 struct rt6_info *rt, *nrt = NULL;
e8599ff4
DM
2322 struct ndisc_options ndopts;
2323 struct inet6_dev *in6_dev;
2324 struct neighbour *neigh;
71bcdba0 2325 struct rd_msg *msg;
6e157b6a
DM
2326 int optlen, on_link;
2327 u8 *lladdr;
e8599ff4 2328
29a3cad5 2329 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
71bcdba0 2330 optlen -= sizeof(*msg);
e8599ff4
DM
2331
2332 if (optlen < 0) {
6e157b6a 2333 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
e8599ff4
DM
2334 return;
2335 }
2336
71bcdba0 2337 msg = (struct rd_msg *)icmp6_hdr(skb);
e8599ff4 2338
71bcdba0 2339 if (ipv6_addr_is_multicast(&msg->dest)) {
6e157b6a 2340 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
e8599ff4
DM
2341 return;
2342 }
2343
6e157b6a 2344 on_link = 0;
71bcdba0 2345 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
e8599ff4 2346 on_link = 1;
71bcdba0 2347 } else if (ipv6_addr_type(&msg->target) !=
e8599ff4 2348 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
6e157b6a 2349 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
e8599ff4
DM
2350 return;
2351 }
2352
2353 in6_dev = __in6_dev_get(skb->dev);
2354 if (!in6_dev)
2355 return;
2356 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2357 return;
2358
2359 /* RFC2461 8.1:
2360 * The IP source address of the Redirect MUST be the same as the current
2361 * first-hop router for the specified ICMP Destination Address.
2362 */
2363
f997c55c 2364 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
e8599ff4
DM
2365 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2366 return;
2367 }
6e157b6a
DM
2368
2369 lladdr = NULL;
e8599ff4
DM
2370 if (ndopts.nd_opts_tgt_lladdr) {
2371 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2372 skb->dev);
2373 if (!lladdr) {
2374 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2375 return;
2376 }
2377 }
2378
6e157b6a 2379 rt = (struct rt6_info *) dst;
ec13ad1d 2380 if (rt->rt6i_flags & RTF_REJECT) {
6e157b6a 2381 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
e8599ff4 2382 return;
6e157b6a 2383 }
e8599ff4 2384
6e157b6a
DM
2385 /* Redirect received -> path was valid.
2386 * Look, redirects are sent only in response to data packets,
2387 * so that this nexthop apparently is reachable. --ANK
2388 */
0dec879f 2389 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
a6279458 2390
71bcdba0 2391 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
6e157b6a
DM
2392 if (!neigh)
2393 return;
a6279458 2394
1da177e4
LT
2395 /*
2396 * We have finally decided to accept it.
2397 */
2398
f997c55c 2399 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
1da177e4
LT
2400 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2401 NEIGH_UPDATE_F_OVERRIDE|
2402 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
f997c55c
AA
2403 NEIGH_UPDATE_F_ISROUTER)),
2404 NDISC_REDIRECT, &ndopts);
1da177e4 2405
83a09abd 2406 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
38308473 2407 if (!nrt)
1da177e4
LT
2408 goto out;
2409
2410 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2411 if (on_link)
2412 nrt->rt6i_flags &= ~RTF_GATEWAY;
2413
4e3fd7a0 2414 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1da177e4 2415
40e22e8f 2416 if (ip6_ins_rt(nrt))
1cfb71ee 2417 goto out_release;
1da177e4 2418
d8d1f30b
CG
2419 netevent.old = &rt->dst;
2420 netevent.new = &nrt->dst;
71bcdba0 2421 netevent.daddr = &msg->dest;
60592833 2422 netevent.neigh = neigh;
8d71740c
TT
2423 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2424
38308473 2425 if (rt->rt6i_flags & RTF_CACHE) {
6e157b6a 2426 rt = (struct rt6_info *) dst_clone(&rt->dst);
e0a1ad73 2427 ip6_del_rt(rt);
1da177e4
LT
2428 }
2429
1cfb71ee
WW
2430out_release:
2431 /* Release the reference taken in
2432 * ip6_rt_cache_alloc()
2433 */
2434 dst_release(&nrt->dst);
2435
1da177e4 2436out:
e8599ff4 2437 neigh_release(neigh);
6e157b6a
DM
2438}
2439
1da177e4
LT
2440/*
2441 * Misc support functions
2442 */
2443
4b32b5ad
MKL
2444static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2445{
2446 BUG_ON(from->dst.from);
2447
2448 rt->rt6i_flags &= ~RTF_EXPIRES;
2449 dst_hold(&from->dst);
2450 rt->dst.from = &from->dst;
2451 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2452}
2453
83a09abd
MKL
2454static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2455{
2456 rt->dst.input = ort->dst.input;
2457 rt->dst.output = ort->dst.output;
2458 rt->rt6i_dst = ort->rt6i_dst;
2459 rt->dst.error = ort->dst.error;
2460 rt->rt6i_idev = ort->rt6i_idev;
2461 if (rt->rt6i_idev)
2462 in6_dev_hold(rt->rt6i_idev);
2463 rt->dst.lastuse = jiffies;
2464 rt->rt6i_gateway = ort->rt6i_gateway;
2465 rt->rt6i_flags = ort->rt6i_flags;
2466 rt6_set_from(rt, ort);
2467 rt->rt6i_metric = ort->rt6i_metric;
1da177e4 2468#ifdef CONFIG_IPV6_SUBTREES
83a09abd 2469 rt->rt6i_src = ort->rt6i_src;
1da177e4 2470#endif
83a09abd
MKL
2471 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2472 rt->rt6i_table = ort->rt6i_table;
61adedf3 2473 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
1da177e4
LT
2474}
2475
70ceb4f5 2476#ifdef CONFIG_IPV6_ROUTE_INFO
efa2cea0 2477static struct rt6_info *rt6_get_route_info(struct net *net,
b71d1d42 2478 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2479 const struct in6_addr *gwaddr,
2480 struct net_device *dev)
70ceb4f5 2481{
830218c1
DA
2482 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2483 int ifindex = dev->ifindex;
70ceb4f5
YH
2484 struct fib6_node *fn;
2485 struct rt6_info *rt = NULL;
c71099ac
TG
2486 struct fib6_table *table;
2487
830218c1 2488 table = fib6_get_table(net, tb_id);
38308473 2489 if (!table)
c71099ac 2490 return NULL;
70ceb4f5 2491
5744dd9b 2492 read_lock_bh(&table->tb6_lock);
67ba4152 2493 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
70ceb4f5
YH
2494 if (!fn)
2495 goto out;
2496
d8d1f30b 2497 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2498 if (rt->dst.dev->ifindex != ifindex)
70ceb4f5
YH
2499 continue;
2500 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2501 continue;
2502 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2503 continue;
d8d1f30b 2504 dst_hold(&rt->dst);
70ceb4f5
YH
2505 break;
2506 }
2507out:
5744dd9b 2508 read_unlock_bh(&table->tb6_lock);
70ceb4f5
YH
2509 return rt;
2510}
2511
efa2cea0 2512static struct rt6_info *rt6_add_route_info(struct net *net,
b71d1d42 2513 const struct in6_addr *prefix, int prefixlen,
830218c1
DA
2514 const struct in6_addr *gwaddr,
2515 struct net_device *dev,
95c96174 2516 unsigned int pref)
70ceb4f5 2517{
86872cb5 2518 struct fib6_config cfg = {
238fc7ea 2519 .fc_metric = IP6_RT_PRIO_USER,
830218c1 2520 .fc_ifindex = dev->ifindex,
86872cb5
TG
2521 .fc_dst_len = prefixlen,
2522 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2523 RTF_UP | RTF_PREF(pref),
15e47304 2524 .fc_nlinfo.portid = 0,
efa2cea0
DL
2525 .fc_nlinfo.nlh = NULL,
2526 .fc_nlinfo.nl_net = net,
86872cb5
TG
2527 };
2528
830218c1 2529 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
4e3fd7a0
AD
2530 cfg.fc_dst = *prefix;
2531 cfg.fc_gateway = *gwaddr;
70ceb4f5 2532
e317da96
YH
2533 /* We should treat it as a default route if prefix length is 0. */
2534 if (!prefixlen)
86872cb5 2535 cfg.fc_flags |= RTF_DEFAULT;
70ceb4f5 2536
333c4301 2537 ip6_route_add(&cfg, NULL);
70ceb4f5 2538
830218c1 2539 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
70ceb4f5
YH
2540}
2541#endif
2542
b71d1d42 2543struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1ab1457c 2544{
830218c1 2545 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
1da177e4 2546 struct rt6_info *rt;
c71099ac 2547 struct fib6_table *table;
1da177e4 2548
830218c1 2549 table = fib6_get_table(dev_net(dev), tb_id);
38308473 2550 if (!table)
c71099ac 2551 return NULL;
1da177e4 2552
5744dd9b 2553 read_lock_bh(&table->tb6_lock);
67ba4152 2554 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
d1918542 2555 if (dev == rt->dst.dev &&
045927ff 2556 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1da177e4
LT
2557 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2558 break;
2559 }
2560 if (rt)
d8d1f30b 2561 dst_hold(&rt->dst);
5744dd9b 2562 read_unlock_bh(&table->tb6_lock);
1da177e4
LT
2563 return rt;
2564}
2565
b71d1d42 2566struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
ebacaaa0
YH
2567 struct net_device *dev,
2568 unsigned int pref)
1da177e4 2569{
86872cb5 2570 struct fib6_config cfg = {
ca254490 2571 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
238fc7ea 2572 .fc_metric = IP6_RT_PRIO_USER,
86872cb5
TG
2573 .fc_ifindex = dev->ifindex,
2574 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2575 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
15e47304 2576 .fc_nlinfo.portid = 0,
5578689a 2577 .fc_nlinfo.nlh = NULL,
c346dca1 2578 .fc_nlinfo.nl_net = dev_net(dev),
86872cb5 2579 };
1da177e4 2580
4e3fd7a0 2581 cfg.fc_gateway = *gwaddr;
1da177e4 2582
333c4301 2583 if (!ip6_route_add(&cfg, NULL)) {
830218c1
DA
2584 struct fib6_table *table;
2585
2586 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2587 if (table)
2588 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2589 }
1da177e4 2590
1da177e4
LT
2591 return rt6_get_dflt_router(gwaddr, dev);
2592}
2593
830218c1 2594static void __rt6_purge_dflt_routers(struct fib6_table *table)
1da177e4
LT
2595{
2596 struct rt6_info *rt;
2597
2598restart:
c71099ac 2599 read_lock_bh(&table->tb6_lock);
d8d1f30b 2600 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
3e8b0ac3
LC
2601 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2602 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
d8d1f30b 2603 dst_hold(&rt->dst);
c71099ac 2604 read_unlock_bh(&table->tb6_lock);
e0a1ad73 2605 ip6_del_rt(rt);
1da177e4
LT
2606 goto restart;
2607 }
2608 }
c71099ac 2609 read_unlock_bh(&table->tb6_lock);
830218c1
DA
2610
2611 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2612}
2613
2614void rt6_purge_dflt_routers(struct net *net)
2615{
2616 struct fib6_table *table;
2617 struct hlist_head *head;
2618 unsigned int h;
2619
2620 rcu_read_lock();
2621
2622 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2623 head = &net->ipv6.fib_table_hash[h];
2624 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2625 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2626 __rt6_purge_dflt_routers(table);
2627 }
2628 }
2629
2630 rcu_read_unlock();
1da177e4
LT
2631}
2632
5578689a
DL
2633static void rtmsg_to_fib6_config(struct net *net,
2634 struct in6_rtmsg *rtmsg,
86872cb5
TG
2635 struct fib6_config *cfg)
2636{
2637 memset(cfg, 0, sizeof(*cfg));
2638
ca254490
DA
2639 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2640 : RT6_TABLE_MAIN;
86872cb5
TG
2641 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2642 cfg->fc_metric = rtmsg->rtmsg_metric;
2643 cfg->fc_expires = rtmsg->rtmsg_info;
2644 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2645 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2646 cfg->fc_flags = rtmsg->rtmsg_flags;
2647
5578689a 2648 cfg->fc_nlinfo.nl_net = net;
f1243c2d 2649
4e3fd7a0
AD
2650 cfg->fc_dst = rtmsg->rtmsg_dst;
2651 cfg->fc_src = rtmsg->rtmsg_src;
2652 cfg->fc_gateway = rtmsg->rtmsg_gateway;
86872cb5
TG
2653}
2654
5578689a 2655int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1da177e4 2656{
86872cb5 2657 struct fib6_config cfg;
1da177e4
LT
2658 struct in6_rtmsg rtmsg;
2659 int err;
2660
67ba4152 2661 switch (cmd) {
1da177e4
LT
2662 case SIOCADDRT: /* Add a route */
2663 case SIOCDELRT: /* Delete a route */
af31f412 2664 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
1da177e4
LT
2665 return -EPERM;
2666 err = copy_from_user(&rtmsg, arg,
2667 sizeof(struct in6_rtmsg));
2668 if (err)
2669 return -EFAULT;
86872cb5 2670
5578689a 2671 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
86872cb5 2672
1da177e4
LT
2673 rtnl_lock();
2674 switch (cmd) {
2675 case SIOCADDRT:
333c4301 2676 err = ip6_route_add(&cfg, NULL);
1da177e4
LT
2677 break;
2678 case SIOCDELRT:
333c4301 2679 err = ip6_route_del(&cfg, NULL);
1da177e4
LT
2680 break;
2681 default:
2682 err = -EINVAL;
2683 }
2684 rtnl_unlock();
2685
2686 return err;
3ff50b79 2687 }
1da177e4
LT
2688
2689 return -EINVAL;
2690}
2691
2692/*
2693 * Drop the packet on the floor
2694 */
2695
d5fdd6ba 2696static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1da177e4 2697{
612f09e8 2698 int type;
adf30907 2699 struct dst_entry *dst = skb_dst(skb);
612f09e8
YH
2700 switch (ipstats_mib_noroutes) {
2701 case IPSTATS_MIB_INNOROUTES:
0660e03f 2702 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
45bb0060 2703 if (type == IPV6_ADDR_ANY) {
3bd653c8
DL
2704 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2705 IPSTATS_MIB_INADDRERRORS);
612f09e8
YH
2706 break;
2707 }
2708 /* FALLTHROUGH */
2709 case IPSTATS_MIB_OUTNOROUTES:
3bd653c8
DL
2710 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2711 ipstats_mib_noroutes);
612f09e8
YH
2712 break;
2713 }
3ffe533c 2714 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1da177e4
LT
2715 kfree_skb(skb);
2716 return 0;
2717}
2718
9ce8ade0
TG
2719static int ip6_pkt_discard(struct sk_buff *skb)
2720{
612f09e8 2721 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2722}
2723
ede2059d 2724static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
1da177e4 2725{
adf30907 2726 skb->dev = skb_dst(skb)->dev;
612f09e8 2727 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1da177e4
LT
2728}
2729
9ce8ade0
TG
2730static int ip6_pkt_prohibit(struct sk_buff *skb)
2731{
612f09e8 2732 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
9ce8ade0
TG
2733}
2734
ede2059d 2735static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
9ce8ade0 2736{
adf30907 2737 skb->dev = skb_dst(skb)->dev;
612f09e8 2738 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
9ce8ade0
TG
2739}
2740
1da177e4
LT
2741/*
2742 * Allocate a dst for local (unicast / anycast) address.
2743 */
2744
2745struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2746 const struct in6_addr *addr,
8f031519 2747 bool anycast)
1da177e4 2748{
ca254490 2749 u32 tb_id;
c346dca1 2750 struct net *net = dev_net(idev->dev);
5f02ce24
DA
2751 struct net_device *dev = net->loopback_dev;
2752 struct rt6_info *rt;
2753
2754 /* use L3 Master device as loopback for host routes if device
2755 * is enslaved and address is not link local or multicast
2756 */
2757 if (!rt6_need_strict(addr))
2758 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2759
2760 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
a3300ef4 2761 if (!rt)
1da177e4
LT
2762 return ERR_PTR(-ENOMEM);
2763
1da177e4
LT
2764 in6_dev_hold(idev);
2765
11d53b49 2766 rt->dst.flags |= DST_HOST;
d8d1f30b
CG
2767 rt->dst.input = ip6_input;
2768 rt->dst.output = ip6_output;
1da177e4 2769 rt->rt6i_idev = idev;
1da177e4 2770
94b5e0f9 2771 rt->rt6i_protocol = RTPROT_KERNEL;
1da177e4 2772 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
58c4fb86
YH
2773 if (anycast)
2774 rt->rt6i_flags |= RTF_ANYCAST;
2775 else
1da177e4 2776 rt->rt6i_flags |= RTF_LOCAL;
1da177e4 2777
550bab42 2778 rt->rt6i_gateway = *addr;
4e3fd7a0 2779 rt->rt6i_dst.addr = *addr;
1da177e4 2780 rt->rt6i_dst.plen = 128;
ca254490
DA
2781 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2782 rt->rt6i_table = fib6_get_table(net, tb_id);
8e3d5be7 2783 rt->dst.flags |= DST_NOCACHE;
1da177e4 2784
1da177e4
LT
2785 return rt;
2786}
2787
c3968a85
DW
2788/* remove deleted ip from prefsrc entries */
2789struct arg_dev_net_ip {
2790 struct net_device *dev;
2791 struct net *net;
2792 struct in6_addr *addr;
2793};
2794
2795static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2796{
2797 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2798 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2799 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2800
d1918542 2801 if (((void *)rt->dst.dev == dev || !dev) &&
c3968a85
DW
2802 rt != net->ipv6.ip6_null_entry &&
2803 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2804 /* remove prefsrc entry */
2805 rt->rt6i_prefsrc.plen = 0;
2806 }
2807 return 0;
2808}
2809
2810void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2811{
2812 struct net *net = dev_net(ifp->idev->dev);
2813 struct arg_dev_net_ip adni = {
2814 .dev = ifp->idev->dev,
2815 .net = net,
2816 .addr = &ifp->addr,
2817 };
0c3584d5 2818 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
c3968a85
DW
2819}
2820
be7a010d
DJ
2821#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2822#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2823
2824/* Remove routers and update dst entries when gateway turn into host. */
2825static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2826{
2827 struct in6_addr *gateway = (struct in6_addr *)arg;
2828
2829 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2830 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2831 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2832 return -1;
2833 }
2834 return 0;
2835}
2836
2837void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2838{
2839 fib6_clean_all(net, fib6_clean_tohost, gateway);
2840}
2841
8ed67789
DL
2842struct arg_dev_net {
2843 struct net_device *dev;
2844 struct net *net;
2845};
2846
a1a22c12 2847/* called with write lock held for table with rt */
1da177e4
LT
2848static int fib6_ifdown(struct rt6_info *rt, void *arg)
2849{
bc3ef660 2850 const struct arg_dev_net *adn = arg;
2851 const struct net_device *dev = adn->dev;
8ed67789 2852
d1918542 2853 if ((rt->dst.dev == dev || !dev) &&
a1a22c12
DA
2854 rt != adn->net->ipv6.ip6_null_entry &&
2855 (rt->rt6i_nsiblings == 0 ||
8397ed36 2856 (dev && netdev_unregistering(dev)) ||
a1a22c12 2857 !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
1da177e4 2858 return -1;
c159d30c 2859
1da177e4
LT
2860 return 0;
2861}
2862
f3db4851 2863void rt6_ifdown(struct net *net, struct net_device *dev)
1da177e4 2864{
8ed67789
DL
2865 struct arg_dev_net adn = {
2866 .dev = dev,
2867 .net = net,
2868 };
2869
0c3584d5 2870 fib6_clean_all(net, fib6_ifdown, &adn);
1e493d19 2871 icmp6_clean_all(fib6_ifdown, &adn);
e332bc67
EB
2872 if (dev)
2873 rt6_uncached_list_flush_dev(net, dev);
1da177e4
LT
2874}
2875
95c96174 2876struct rt6_mtu_change_arg {
1da177e4 2877 struct net_device *dev;
95c96174 2878 unsigned int mtu;
1da177e4
LT
2879};
2880
2881static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2882{
2883 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2884 struct inet6_dev *idev;
2885
2886 /* In IPv6 pmtu discovery is not optional,
2887 so that RTAX_MTU lock cannot disable it.
2888 We still use this lock to block changes
2889 caused by addrconf/ndisc.
2890 */
2891
2892 idev = __in6_dev_get(arg->dev);
38308473 2893 if (!idev)
1da177e4
LT
2894 return 0;
2895
2896 /* For administrative MTU increase, there is no way to discover
2897 IPv6 PMTU increase, so PMTU increase should be updated here.
2898 Since RFC 1981 doesn't include administrative MTU increase
2899 update PMTU increase is a MUST. (i.e. jumbo frame)
2900 */
2901 /*
2902 If new MTU is less than route PMTU, this new MTU will be the
2903 lowest MTU in the path, update the route PMTU to reflect PMTU
2904 decreases; if new MTU is greater than route PMTU, and the
2905 old MTU is the lowest MTU in the path, update the route PMTU
2906 to reflect the increase. In this case if the other nodes' MTU
2907 also have the lowest MTU, TOO BIG MESSAGE will be lead to
67c408cf 2908 PMTU discovery.
1da177e4 2909 */
d1918542 2910 if (rt->dst.dev == arg->dev &&
fb56be83 2911 dst_metric_raw(&rt->dst, RTAX_MTU) &&
4b32b5ad
MKL
2912 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2913 if (rt->rt6i_flags & RTF_CACHE) {
2914 /* For RTF_CACHE with rt6i_pmtu == 0
2915 * (i.e. a redirected route),
2916 * the metrics of its rt->dst.from has already
2917 * been updated.
2918 */
2919 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2920 rt->rt6i_pmtu = arg->mtu;
2921 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2922 (dst_mtu(&rt->dst) < arg->mtu &&
2923 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2924 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2925 }
566cfd8f 2926 }
1da177e4
LT
2927 return 0;
2928}
2929
95c96174 2930void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
1da177e4 2931{
c71099ac
TG
2932 struct rt6_mtu_change_arg arg = {
2933 .dev = dev,
2934 .mtu = mtu,
2935 };
1da177e4 2936
0c3584d5 2937 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
1da177e4
LT
2938}
2939
ef7c79ed 2940static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
5176f91e 2941 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
86872cb5 2942 [RTA_OIF] = { .type = NLA_U32 },
ab364a6f 2943 [RTA_IIF] = { .type = NLA_U32 },
86872cb5
TG
2944 [RTA_PRIORITY] = { .type = NLA_U32 },
2945 [RTA_METRICS] = { .type = NLA_NESTED },
51ebd318 2946 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
c78ba6d6 2947 [RTA_PREF] = { .type = NLA_U8 },
19e42e45
RP
2948 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2949 [RTA_ENCAP] = { .type = NLA_NESTED },
32bc201e 2950 [RTA_EXPIRES] = { .type = NLA_U32 },
622ec2c9 2951 [RTA_UID] = { .type = NLA_U32 },
3b45a410 2952 [RTA_MARK] = { .type = NLA_U32 },
86872cb5
TG
2953};
2954
2955static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
333c4301
DA
2956 struct fib6_config *cfg,
2957 struct netlink_ext_ack *extack)
1da177e4 2958{
86872cb5
TG
2959 struct rtmsg *rtm;
2960 struct nlattr *tb[RTA_MAX+1];
c78ba6d6 2961 unsigned int pref;
86872cb5 2962 int err;
1da177e4 2963
fceb6435
JB
2964 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
2965 NULL);
86872cb5
TG
2966 if (err < 0)
2967 goto errout;
1da177e4 2968
86872cb5
TG
2969 err = -EINVAL;
2970 rtm = nlmsg_data(nlh);
2971 memset(cfg, 0, sizeof(*cfg));
2972
2973 cfg->fc_table = rtm->rtm_table;
2974 cfg->fc_dst_len = rtm->rtm_dst_len;
2975 cfg->fc_src_len = rtm->rtm_src_len;
2976 cfg->fc_flags = RTF_UP;
2977 cfg->fc_protocol = rtm->rtm_protocol;
ef2c7d7b 2978 cfg->fc_type = rtm->rtm_type;
86872cb5 2979
ef2c7d7b
ND
2980 if (rtm->rtm_type == RTN_UNREACHABLE ||
2981 rtm->rtm_type == RTN_BLACKHOLE ||
b4949ab2
ND
2982 rtm->rtm_type == RTN_PROHIBIT ||
2983 rtm->rtm_type == RTN_THROW)
86872cb5
TG
2984 cfg->fc_flags |= RTF_REJECT;
2985
ab79ad14
2986 if (rtm->rtm_type == RTN_LOCAL)
2987 cfg->fc_flags |= RTF_LOCAL;
2988
1f56a01f
MKL
2989 if (rtm->rtm_flags & RTM_F_CLONED)
2990 cfg->fc_flags |= RTF_CACHE;
2991
15e47304 2992 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
86872cb5 2993 cfg->fc_nlinfo.nlh = nlh;
3b1e0a65 2994 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
86872cb5
TG
2995
2996 if (tb[RTA_GATEWAY]) {
67b61f6c 2997 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
86872cb5 2998 cfg->fc_flags |= RTF_GATEWAY;
1da177e4 2999 }
86872cb5
TG
3000
3001 if (tb[RTA_DST]) {
3002 int plen = (rtm->rtm_dst_len + 7) >> 3;
3003
3004 if (nla_len(tb[RTA_DST]) < plen)
3005 goto errout;
3006
3007 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1da177e4 3008 }
86872cb5
TG
3009
3010 if (tb[RTA_SRC]) {
3011 int plen = (rtm->rtm_src_len + 7) >> 3;
3012
3013 if (nla_len(tb[RTA_SRC]) < plen)
3014 goto errout;
3015
3016 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1da177e4 3017 }
86872cb5 3018
c3968a85 3019 if (tb[RTA_PREFSRC])
67b61f6c 3020 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
c3968a85 3021
86872cb5
TG
3022 if (tb[RTA_OIF])
3023 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
3024
3025 if (tb[RTA_PRIORITY])
3026 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
3027
3028 if (tb[RTA_METRICS]) {
3029 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
3030 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
1da177e4 3031 }
86872cb5
TG
3032
3033 if (tb[RTA_TABLE])
3034 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
3035
51ebd318
ND
3036 if (tb[RTA_MULTIPATH]) {
3037 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
3038 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
9ed59592
DA
3039
3040 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
c255bd68 3041 cfg->fc_mp_len, extack);
9ed59592
DA
3042 if (err < 0)
3043 goto errout;
51ebd318
ND
3044 }
3045
c78ba6d6
LR
3046 if (tb[RTA_PREF]) {
3047 pref = nla_get_u8(tb[RTA_PREF]);
3048 if (pref != ICMPV6_ROUTER_PREF_LOW &&
3049 pref != ICMPV6_ROUTER_PREF_HIGH)
3050 pref = ICMPV6_ROUTER_PREF_MEDIUM;
3051 cfg->fc_flags |= RTF_PREF(pref);
3052 }
3053
19e42e45
RP
3054 if (tb[RTA_ENCAP])
3055 cfg->fc_encap = tb[RTA_ENCAP];
3056
9ed59592 3057 if (tb[RTA_ENCAP_TYPE]) {
19e42e45
RP
3058 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
3059
c255bd68 3060 err = lwtunnel_valid_encap_type(cfg->fc_encap_type, extack);
9ed59592
DA
3061 if (err < 0)
3062 goto errout;
3063 }
3064
32bc201e
XL
3065 if (tb[RTA_EXPIRES]) {
3066 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
3067
3068 if (addrconf_finite_timeout(timeout)) {
3069 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
3070 cfg->fc_flags |= RTF_EXPIRES;
3071 }
3072 }
3073
86872cb5
TG
3074 err = 0;
3075errout:
3076 return err;
1da177e4
LT
3077}
3078
6b9ea5a6
RP
3079struct rt6_nh {
3080 struct rt6_info *rt6_info;
3081 struct fib6_config r_cfg;
3082 struct mx6_config mxc;
3083 struct list_head next;
3084};
3085
3086static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
3087{
3088 struct rt6_nh *nh;
3089
3090 list_for_each_entry(nh, rt6_nh_list, next) {
7d4d5065 3091 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
6b9ea5a6
RP
3092 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
3093 nh->r_cfg.fc_ifindex);
3094 }
3095}
3096
3097static int ip6_route_info_append(struct list_head *rt6_nh_list,
3098 struct rt6_info *rt, struct fib6_config *r_cfg)
3099{
3100 struct rt6_nh *nh;
3101 struct rt6_info *rtnh;
3102 int err = -EEXIST;
3103
3104 list_for_each_entry(nh, rt6_nh_list, next) {
3105 /* check if rt6_info already exists */
3106 rtnh = nh->rt6_info;
3107
3108 if (rtnh->dst.dev == rt->dst.dev &&
3109 rtnh->rt6i_idev == rt->rt6i_idev &&
3110 ipv6_addr_equal(&rtnh->rt6i_gateway,
3111 &rt->rt6i_gateway))
3112 return err;
3113 }
3114
3115 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
3116 if (!nh)
3117 return -ENOMEM;
3118 nh->rt6_info = rt;
3119 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3120 if (err) {
3121 kfree(nh);
3122 return err;
3123 }
3124 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3125 list_add_tail(&nh->next, rt6_nh_list);
3126
3127 return 0;
3128}
3129
3b1137fe
DA
3130static void ip6_route_mpath_notify(struct rt6_info *rt,
3131 struct rt6_info *rt_last,
3132 struct nl_info *info,
3133 __u16 nlflags)
3134{
3135 /* if this is an APPEND route, then rt points to the first route
3136 * inserted and rt_last points to last route inserted. Userspace
3137 * wants a consistent dump of the route which starts at the first
3138 * nexthop. Since sibling routes are always added at the end of
3139 * the list, find the first sibling of the last route appended
3140 */
3141 if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->rt6i_nsiblings) {
3142 rt = list_first_entry(&rt_last->rt6i_siblings,
3143 struct rt6_info,
3144 rt6i_siblings);
3145 }
3146
3147 if (rt)
3148 inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
3149}
3150
333c4301
DA
3151static int ip6_route_multipath_add(struct fib6_config *cfg,
3152 struct netlink_ext_ack *extack)
51ebd318 3153{
3b1137fe
DA
3154 struct rt6_info *rt_notif = NULL, *rt_last = NULL;
3155 struct nl_info *info = &cfg->fc_nlinfo;
51ebd318
ND
3156 struct fib6_config r_cfg;
3157 struct rtnexthop *rtnh;
6b9ea5a6
RP
3158 struct rt6_info *rt;
3159 struct rt6_nh *err_nh;
3160 struct rt6_nh *nh, *nh_safe;
3b1137fe 3161 __u16 nlflags;
51ebd318
ND
3162 int remaining;
3163 int attrlen;
6b9ea5a6
RP
3164 int err = 1;
3165 int nhn = 0;
3166 int replace = (cfg->fc_nlinfo.nlh &&
3167 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3168 LIST_HEAD(rt6_nh_list);
51ebd318 3169
3b1137fe
DA
3170 nlflags = replace ? NLM_F_REPLACE : NLM_F_CREATE;
3171 if (info->nlh && info->nlh->nlmsg_flags & NLM_F_APPEND)
3172 nlflags |= NLM_F_APPEND;
3173
35f1b4e9 3174 remaining = cfg->fc_mp_len;
51ebd318 3175 rtnh = (struct rtnexthop *)cfg->fc_mp;
51ebd318 3176
6b9ea5a6
RP
3177 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3178 * rt6_info structs per nexthop
3179 */
51ebd318
ND
3180 while (rtnh_ok(rtnh, remaining)) {
3181 memcpy(&r_cfg, cfg, sizeof(*cfg));
3182 if (rtnh->rtnh_ifindex)
3183 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3184
3185 attrlen = rtnh_attrlen(rtnh);
3186 if (attrlen > 0) {
3187 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3188
3189 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3190 if (nla) {
67b61f6c 3191 r_cfg.fc_gateway = nla_get_in6_addr(nla);
51ebd318
ND
3192 r_cfg.fc_flags |= RTF_GATEWAY;
3193 }
19e42e45
RP
3194 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3195 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3196 if (nla)
3197 r_cfg.fc_encap_type = nla_get_u16(nla);
51ebd318 3198 }
6b9ea5a6 3199
333c4301 3200 rt = ip6_route_info_create(&r_cfg, extack);
8c5b83f0
RP
3201 if (IS_ERR(rt)) {
3202 err = PTR_ERR(rt);
3203 rt = NULL;
6b9ea5a6 3204 goto cleanup;
8c5b83f0 3205 }
6b9ea5a6
RP
3206
3207 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
51ebd318 3208 if (err) {
1cfb71ee 3209 dst_release(&rt->dst);
6b9ea5a6
RP
3210 dst_free(&rt->dst);
3211 goto cleanup;
3212 }
3213
3214 rtnh = rtnh_next(rtnh, &remaining);
3215 }
3216
3b1137fe
DA
3217 /* for add and replace send one notification with all nexthops.
3218 * Skip the notification in fib6_add_rt2node and send one with
3219 * the full route when done
3220 */
3221 info->skip_notify = 1;
3222
6b9ea5a6
RP
3223 err_nh = NULL;
3224 list_for_each_entry(nh, &rt6_nh_list, next) {
3b1137fe 3225 rt_last = nh->rt6_info;
333c4301 3226 err = __ip6_ins_rt(nh->rt6_info, info, &nh->mxc, extack);
3b1137fe
DA
3227 /* save reference to first route for notification */
3228 if (!rt_notif && !err)
3229 rt_notif = nh->rt6_info;
3230
6b9ea5a6
RP
3231 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3232 nh->rt6_info = NULL;
3233 if (err) {
3234 if (replace && nhn)
3235 ip6_print_replace_route_err(&rt6_nh_list);
3236 err_nh = nh;
3237 goto add_errout;
51ebd318 3238 }
6b9ea5a6 3239
1a72418b 3240 /* Because each route is added like a single route we remove
27596472
MK
3241 * these flags after the first nexthop: if there is a collision,
3242 * we have already failed to add the first nexthop:
3243 * fib6_add_rt2node() has rejected it; when replacing, old
3244 * nexthops have been replaced by first new, the rest should
3245 * be added to it.
1a72418b 3246 */
27596472
MK
3247 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3248 NLM_F_REPLACE);
6b9ea5a6
RP
3249 nhn++;
3250 }
3251
3b1137fe
DA
3252 /* success ... tell user about new route */
3253 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
6b9ea5a6
RP
3254 goto cleanup;
3255
3256add_errout:
3b1137fe
DA
3257 /* send notification for routes that were added so that
3258 * the delete notifications sent by ip6_route_del are
3259 * coherent
3260 */
3261 if (rt_notif)
3262 ip6_route_mpath_notify(rt_notif, rt_last, info, nlflags);
3263
6b9ea5a6
RP
3264 /* Delete routes that were already added */
3265 list_for_each_entry(nh, &rt6_nh_list, next) {
3266 if (err_nh == nh)
3267 break;
333c4301 3268 ip6_route_del(&nh->r_cfg, extack);
6b9ea5a6
RP
3269 }
3270
3271cleanup:
3272 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
1cfb71ee
WW
3273 if (nh->rt6_info) {
3274 dst_release(&nh->rt6_info->dst);
6b9ea5a6 3275 dst_free(&nh->rt6_info->dst);
1cfb71ee 3276 }
52fe51f8 3277 kfree(nh->mxc.mx);
6b9ea5a6
RP
3278 list_del(&nh->next);
3279 kfree(nh);
3280 }
3281
3282 return err;
3283}
3284
333c4301
DA
3285static int ip6_route_multipath_del(struct fib6_config *cfg,
3286 struct netlink_ext_ack *extack)
6b9ea5a6
RP
3287{
3288 struct fib6_config r_cfg;
3289 struct rtnexthop *rtnh;
3290 int remaining;
3291 int attrlen;
3292 int err = 1, last_err = 0;
3293
3294 remaining = cfg->fc_mp_len;
3295 rtnh = (struct rtnexthop *)cfg->fc_mp;
3296
3297 /* Parse a Multipath Entry */
3298 while (rtnh_ok(rtnh, remaining)) {
3299 memcpy(&r_cfg, cfg, sizeof(*cfg));
3300 if (rtnh->rtnh_ifindex)
3301 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3302
3303 attrlen = rtnh_attrlen(rtnh);
3304 if (attrlen > 0) {
3305 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3306
3307 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3308 if (nla) {
3309 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3310 r_cfg.fc_flags |= RTF_GATEWAY;
3311 }
3312 }
333c4301 3313 err = ip6_route_del(&r_cfg, extack);
6b9ea5a6
RP
3314 if (err)
3315 last_err = err;
3316
51ebd318
ND
3317 rtnh = rtnh_next(rtnh, &remaining);
3318 }
3319
3320 return last_err;
3321}
3322
c21ef3e3
DA
3323static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3324 struct netlink_ext_ack *extack)
1da177e4 3325{
86872cb5
TG
3326 struct fib6_config cfg;
3327 int err;
1da177e4 3328
333c4301 3329 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3330 if (err < 0)
3331 return err;
3332
51ebd318 3333 if (cfg.fc_mp)
333c4301 3334 return ip6_route_multipath_del(&cfg, extack);
0ae81335
DA
3335 else {
3336 cfg.fc_delete_all_nh = 1;
333c4301 3337 return ip6_route_del(&cfg, extack);
0ae81335 3338 }
1da177e4
LT
3339}
3340
c21ef3e3
DA
3341static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
3342 struct netlink_ext_ack *extack)
1da177e4 3343{
86872cb5
TG
3344 struct fib6_config cfg;
3345 int err;
1da177e4 3346
333c4301 3347 err = rtm_to_fib6_config(skb, nlh, &cfg, extack);
86872cb5
TG
3348 if (err < 0)
3349 return err;
3350
51ebd318 3351 if (cfg.fc_mp)
333c4301 3352 return ip6_route_multipath_add(&cfg, extack);
51ebd318 3353 else
333c4301 3354 return ip6_route_add(&cfg, extack);
1da177e4
LT
3355}
3356
beb1afac 3357static size_t rt6_nlmsg_size(struct rt6_info *rt)
339bf98f 3358{
beb1afac
DA
3359 int nexthop_len = 0;
3360
3361 if (rt->rt6i_nsiblings) {
3362 nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
3363 + NLA_ALIGN(sizeof(struct rtnexthop))
3364 + nla_total_size(16) /* RTA_GATEWAY */
beb1afac
DA
3365 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3366
3367 nexthop_len *= rt->rt6i_nsiblings;
3368 }
3369
339bf98f
TG
3370 return NLMSG_ALIGN(sizeof(struct rtmsg))
3371 + nla_total_size(16) /* RTA_SRC */
3372 + nla_total_size(16) /* RTA_DST */
3373 + nla_total_size(16) /* RTA_GATEWAY */
3374 + nla_total_size(16) /* RTA_PREFSRC */
3375 + nla_total_size(4) /* RTA_TABLE */
3376 + nla_total_size(4) /* RTA_IIF */
3377 + nla_total_size(4) /* RTA_OIF */
3378 + nla_total_size(4) /* RTA_PRIORITY */
6a2b9ce0 3379 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
ea697639 3380 + nla_total_size(sizeof(struct rta_cacheinfo))
c78ba6d6 3381 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
19e42e45 3382 + nla_total_size(1) /* RTA_PREF */
beb1afac
DA
3383 + lwtunnel_get_encap_size(rt->dst.lwtstate)
3384 + nexthop_len;
3385}
3386
3387static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
5be083ce 3388 unsigned int *flags, bool skip_oif)
beb1afac
DA
3389{
3390 if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
3391 *flags |= RTNH_F_LINKDOWN;
3392 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3393 *flags |= RTNH_F_DEAD;
3394 }
3395
3396 if (rt->rt6i_flags & RTF_GATEWAY) {
3397 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3398 goto nla_put_failure;
3399 }
3400
5be083ce
DA
3401 /* not needed for multipath encoding b/c it has a rtnexthop struct */
3402 if (!skip_oif && rt->dst.dev &&
beb1afac
DA
3403 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3404 goto nla_put_failure;
3405
3406 if (rt->dst.lwtstate &&
3407 lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3408 goto nla_put_failure;
3409
3410 return 0;
3411
3412nla_put_failure:
3413 return -EMSGSIZE;
3414}
3415
5be083ce 3416/* add multipath next hop */
beb1afac
DA
3417static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
3418{
3419 struct rtnexthop *rtnh;
3420 unsigned int flags = 0;
3421
3422 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
3423 if (!rtnh)
3424 goto nla_put_failure;
3425
3426 rtnh->rtnh_hops = 0;
3427 rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
3428
5be083ce 3429 if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
beb1afac
DA
3430 goto nla_put_failure;
3431
3432 rtnh->rtnh_flags = flags;
3433
3434 /* length of rtnetlink header + attributes */
3435 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
3436
3437 return 0;
3438
3439nla_put_failure:
3440 return -EMSGSIZE;
339bf98f
TG
3441}
3442
191cd582
BH
3443static int rt6_fill_node(struct net *net,
3444 struct sk_buff *skb, struct rt6_info *rt,
0d51aa80 3445 struct in6_addr *dst, struct in6_addr *src,
15e47304 3446 int iif, int type, u32 portid, u32 seq,
f8cfe2ce 3447 unsigned int flags)
1da177e4 3448{
4b32b5ad 3449 u32 metrics[RTAX_MAX];
1da177e4 3450 struct rtmsg *rtm;
2d7202bf 3451 struct nlmsghdr *nlh;
e3703b3d 3452 long expires;
9e762a4a 3453 u32 table;
1da177e4 3454
15e47304 3455 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
38308473 3456 if (!nlh)
26932566 3457 return -EMSGSIZE;
2d7202bf
TG
3458
3459 rtm = nlmsg_data(nlh);
1da177e4
LT
3460 rtm->rtm_family = AF_INET6;
3461 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3462 rtm->rtm_src_len = rt->rt6i_src.plen;
3463 rtm->rtm_tos = 0;
c71099ac 3464 if (rt->rt6i_table)
9e762a4a 3465 table = rt->rt6i_table->tb6_id;
c71099ac 3466 else
9e762a4a
PM
3467 table = RT6_TABLE_UNSPEC;
3468 rtm->rtm_table = table;
c78679e8
DM
3469 if (nla_put_u32(skb, RTA_TABLE, table))
3470 goto nla_put_failure;
ef2c7d7b
ND
3471 if (rt->rt6i_flags & RTF_REJECT) {
3472 switch (rt->dst.error) {
3473 case -EINVAL:
3474 rtm->rtm_type = RTN_BLACKHOLE;
3475 break;
3476 case -EACCES:
3477 rtm->rtm_type = RTN_PROHIBIT;
3478 break;
b4949ab2
ND
3479 case -EAGAIN:
3480 rtm->rtm_type = RTN_THROW;
3481 break;
ef2c7d7b
ND
3482 default:
3483 rtm->rtm_type = RTN_UNREACHABLE;
3484 break;
3485 }
3486 }
38308473 3487 else if (rt->rt6i_flags & RTF_LOCAL)
ab79ad14 3488 rtm->rtm_type = RTN_LOCAL;
4ee39733
DA
3489 else if (rt->rt6i_flags & RTF_ANYCAST)
3490 rtm->rtm_type = RTN_ANYCAST;
d1918542 3491 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
1da177e4
LT
3492 rtm->rtm_type = RTN_LOCAL;
3493 else
3494 rtm->rtm_type = RTN_UNICAST;
3495 rtm->rtm_flags = 0;
3496 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3497 rtm->rtm_protocol = rt->rt6i_protocol;
38308473 3498 if (rt->rt6i_flags & RTF_DYNAMIC)
1da177e4 3499 rtm->rtm_protocol = RTPROT_REDIRECT;
f0396f60
DO
3500 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3501 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3502 rtm->rtm_protocol = RTPROT_RA;
3503 else
3504 rtm->rtm_protocol = RTPROT_KERNEL;
3505 }
1da177e4 3506
38308473 3507 if (rt->rt6i_flags & RTF_CACHE)
1da177e4
LT
3508 rtm->rtm_flags |= RTM_F_CLONED;
3509
3510 if (dst) {
930345ea 3511 if (nla_put_in6_addr(skb, RTA_DST, dst))
c78679e8 3512 goto nla_put_failure;
1ab1457c 3513 rtm->rtm_dst_len = 128;
1da177e4 3514 } else if (rtm->rtm_dst_len)
930345ea 3515 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
c78679e8 3516 goto nla_put_failure;
1da177e4
LT
3517#ifdef CONFIG_IPV6_SUBTREES
3518 if (src) {
930345ea 3519 if (nla_put_in6_addr(skb, RTA_SRC, src))
c78679e8 3520 goto nla_put_failure;
1ab1457c 3521 rtm->rtm_src_len = 128;
c78679e8 3522 } else if (rtm->rtm_src_len &&
930345ea 3523 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
c78679e8 3524 goto nla_put_failure;
1da177e4 3525#endif
7bc570c8
YH
3526 if (iif) {
3527#ifdef CONFIG_IPV6_MROUTE
3528 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
fd61c6ba
DA
3529 int err = ip6mr_get_route(net, skb, rtm, portid);
3530
3531 if (err == 0)
3532 return 0;
3533 if (err < 0)
3534 goto nla_put_failure;
7bc570c8
YH
3535 } else
3536#endif
c78679e8
DM
3537 if (nla_put_u32(skb, RTA_IIF, iif))
3538 goto nla_put_failure;
7bc570c8 3539 } else if (dst) {
1da177e4 3540 struct in6_addr saddr_buf;
c78679e8 3541 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
930345ea 3542 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3543 goto nla_put_failure;
1da177e4 3544 }
2d7202bf 3545
c3968a85
DW
3546 if (rt->rt6i_prefsrc.plen) {
3547 struct in6_addr saddr_buf;
4e3fd7a0 3548 saddr_buf = rt->rt6i_prefsrc.addr;
930345ea 3549 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
c78679e8 3550 goto nla_put_failure;
c3968a85
DW
3551 }
3552
4b32b5ad
MKL
3553 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3554 if (rt->rt6i_pmtu)
3555 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3556 if (rtnetlink_put_metrics(skb, metrics) < 0)
2d7202bf
TG
3557 goto nla_put_failure;
3558
c78679e8
DM
3559 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3560 goto nla_put_failure;
8253947e 3561
beb1afac
DA
3562 /* For multipath routes, walk the siblings list and add
3563 * each as a nexthop within RTA_MULTIPATH.
3564 */
3565 if (rt->rt6i_nsiblings) {
3566 struct rt6_info *sibling, *next_sibling;
3567 struct nlattr *mp;
3568
3569 mp = nla_nest_start(skb, RTA_MULTIPATH);
3570 if (!mp)
3571 goto nla_put_failure;
3572
3573 if (rt6_add_nexthop(skb, rt) < 0)
3574 goto nla_put_failure;
3575
3576 list_for_each_entry_safe(sibling, next_sibling,
3577 &rt->rt6i_siblings, rt6i_siblings) {
3578 if (rt6_add_nexthop(skb, sibling) < 0)
3579 goto nla_put_failure;
3580 }
3581
3582 nla_nest_end(skb, mp);
3583 } else {
5be083ce 3584 if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
beb1afac
DA
3585 goto nla_put_failure;
3586 }
3587
8253947e 3588 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
69cdf8f9 3589
87a50699 3590 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
e3703b3d 3591 goto nla_put_failure;
2d7202bf 3592
c78ba6d6
LR
3593 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3594 goto nla_put_failure;
3595
19e42e45 3596
053c095a
JB
3597 nlmsg_end(skb, nlh);
3598 return 0;
2d7202bf
TG
3599
3600nla_put_failure:
26932566
PM
3601 nlmsg_cancel(skb, nlh);
3602 return -EMSGSIZE;
1da177e4
LT
3603}
3604
1b43af54 3605int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1da177e4
LT
3606{
3607 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1f17e2f2
DA
3608 struct net *net = arg->net;
3609
3610 if (rt == net->ipv6.ip6_null_entry)
3611 return 0;
1da177e4 3612
2d7202bf
TG
3613 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3614 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
f8cfe2ce
DA
3615
3616 /* user wants prefix routes only */
3617 if (rtm->rtm_flags & RTM_F_PREFIX &&
3618 !(rt->rt6i_flags & RTF_PREFIX_RT)) {
3619 /* success since this is not a prefix route */
3620 return 1;
3621 }
3622 }
1da177e4 3623
1f17e2f2 3624 return rt6_fill_node(net,
191cd582 3625 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
15e47304 3626 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
f8cfe2ce 3627 NLM_F_MULTI);
1da177e4
LT
3628}
3629
c21ef3e3
DA
3630static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
3631 struct netlink_ext_ack *extack)
1da177e4 3632{
3b1e0a65 3633 struct net *net = sock_net(in_skb->sk);
ab364a6f 3634 struct nlattr *tb[RTA_MAX+1];
18c3a61c
RP
3635 int err, iif = 0, oif = 0;
3636 struct dst_entry *dst;
ab364a6f 3637 struct rt6_info *rt;
1da177e4 3638 struct sk_buff *skb;
ab364a6f 3639 struct rtmsg *rtm;
4c9483b2 3640 struct flowi6 fl6;
18c3a61c 3641 bool fibmatch;
1da177e4 3642
fceb6435 3643 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
c21ef3e3 3644 extack);
ab364a6f
TG
3645 if (err < 0)
3646 goto errout;
1da177e4 3647
ab364a6f 3648 err = -EINVAL;
4c9483b2 3649 memset(&fl6, 0, sizeof(fl6));
38b7097b
HFS
3650 rtm = nlmsg_data(nlh);
3651 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
18c3a61c 3652 fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH);
1da177e4 3653
ab364a6f
TG
3654 if (tb[RTA_SRC]) {
3655 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3656 goto errout;
3657
4e3fd7a0 3658 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
ab364a6f
TG
3659 }
3660
3661 if (tb[RTA_DST]) {
3662 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3663 goto errout;
3664
4e3fd7a0 3665 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
ab364a6f
TG
3666 }
3667
3668 if (tb[RTA_IIF])
3669 iif = nla_get_u32(tb[RTA_IIF]);
3670
3671 if (tb[RTA_OIF])
72331bc0 3672 oif = nla_get_u32(tb[RTA_OIF]);
1da177e4 3673
2e47b291
LC
3674 if (tb[RTA_MARK])
3675 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3676
622ec2c9
LC
3677 if (tb[RTA_UID])
3678 fl6.flowi6_uid = make_kuid(current_user_ns(),
3679 nla_get_u32(tb[RTA_UID]));
3680 else
3681 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3682
1da177e4
LT
3683 if (iif) {
3684 struct net_device *dev;
72331bc0
SL
3685 int flags = 0;
3686
5578689a 3687 dev = __dev_get_by_index(net, iif);
1da177e4
LT
3688 if (!dev) {
3689 err = -ENODEV;
ab364a6f 3690 goto errout;
1da177e4 3691 }
72331bc0
SL
3692
3693 fl6.flowi6_iif = iif;
3694
3695 if (!ipv6_addr_any(&fl6.saddr))
3696 flags |= RT6_LOOKUP_F_HAS_SADDR;
3697
18c3a61c
RP
3698 if (!fibmatch)
3699 dst = ip6_route_input_lookup(net, dev, &fl6, flags);
72331bc0
SL
3700 } else {
3701 fl6.flowi6_oif = oif;
3702
18c3a61c
RP
3703 if (!fibmatch)
3704 dst = ip6_route_output(net, NULL, &fl6);
3705 }
3706
3707 if (fibmatch)
3708 dst = ip6_route_lookup(net, &fl6, 0);
3709
3710 rt = container_of(dst, struct rt6_info, dst);
3711 if (rt->dst.error) {
3712 err = rt->dst.error;
3713 ip6_rt_put(rt);
3714 goto errout;
1da177e4
LT
3715 }
3716
9d6acb3b
WC
3717 if (rt == net->ipv6.ip6_null_entry) {
3718 err = rt->dst.error;
3719 ip6_rt_put(rt);
3720 goto errout;
3721 }
3722
ab364a6f 3723 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
38308473 3724 if (!skb) {
94e187c0 3725 ip6_rt_put(rt);
ab364a6f
TG
3726 err = -ENOBUFS;
3727 goto errout;
3728 }
1da177e4 3729
d8d1f30b 3730 skb_dst_set(skb, &rt->dst);
18c3a61c
RP
3731 if (fibmatch)
3732 err = rt6_fill_node(net, skb, rt, NULL, NULL, iif,
3733 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3734 nlh->nlmsg_seq, 0);
3735 else
3736 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3737 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3738 nlh->nlmsg_seq, 0);
1da177e4 3739 if (err < 0) {
ab364a6f
TG
3740 kfree_skb(skb);
3741 goto errout;
1da177e4
LT
3742 }
3743
15e47304 3744 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
ab364a6f 3745errout:
1da177e4 3746 return err;
1da177e4
LT
3747}
3748
37a1d361
RP
3749void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3750 unsigned int nlm_flags)
1da177e4
LT
3751{
3752 struct sk_buff *skb;
5578689a 3753 struct net *net = info->nl_net;
528c4ceb
DL
3754 u32 seq;
3755 int err;
3756
3757 err = -ENOBUFS;
38308473 3758 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
86872cb5 3759
19e42e45 3760 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
38308473 3761 if (!skb)
21713ebc
TG
3762 goto errout;
3763
191cd582 3764 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
f8cfe2ce 3765 event, info->portid, seq, nlm_flags);
26932566
PM
3766 if (err < 0) {
3767 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3768 WARN_ON(err == -EMSGSIZE);
3769 kfree_skb(skb);
3770 goto errout;
3771 }
15e47304 3772 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
1ce85fe4
PNA
3773 info->nlh, gfp_any());
3774 return;
21713ebc
TG
3775errout:
3776 if (err < 0)
5578689a 3777 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
1da177e4
LT
3778}
3779
8ed67789 3780static int ip6_route_dev_notify(struct notifier_block *this,
351638e7 3781 unsigned long event, void *ptr)
8ed67789 3782{
351638e7 3783 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
c346dca1 3784 struct net *net = dev_net(dev);
8ed67789 3785
242d3a49
WC
3786 if (!(dev->flags & IFF_LOOPBACK))
3787 return NOTIFY_OK;
3788
3789 if (event == NETDEV_REGISTER) {
d8d1f30b 3790 net->ipv6.ip6_null_entry->dst.dev = dev;
8ed67789
DL
3791 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3792#ifdef CONFIG_IPV6_MULTIPLE_TABLES
d8d1f30b 3793 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
8ed67789 3794 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
d8d1f30b 3795 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
8ed67789 3796 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
242d3a49
WC
3797#endif
3798 } else if (event == NETDEV_UNREGISTER) {
3799 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3800#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3801 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3802 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
8ed67789
DL
3803#endif
3804 }
3805
3806 return NOTIFY_OK;
3807}
3808
1da177e4
LT
3809/*
3810 * /proc
3811 */
3812
3813#ifdef CONFIG_PROC_FS
3814
33120b30
AD
3815static const struct file_operations ipv6_route_proc_fops = {
3816 .owner = THIS_MODULE,
3817 .open = ipv6_route_open,
3818 .read = seq_read,
3819 .llseek = seq_lseek,
8d2ca1d7 3820 .release = seq_release_net,
33120b30
AD
3821};
3822
1da177e4
LT
3823static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3824{
69ddb805 3825 struct net *net = (struct net *)seq->private;
1da177e4 3826 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
69ddb805
DL
3827 net->ipv6.rt6_stats->fib_nodes,
3828 net->ipv6.rt6_stats->fib_route_nodes,
3829 net->ipv6.rt6_stats->fib_rt_alloc,
3830 net->ipv6.rt6_stats->fib_rt_entries,
3831 net->ipv6.rt6_stats->fib_rt_cache,
fc66f95c 3832 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
69ddb805 3833 net->ipv6.rt6_stats->fib_discarded_routes);
1da177e4
LT
3834
3835 return 0;
3836}
3837
3838static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3839{
de05c557 3840 return single_open_net(inode, file, rt6_stats_seq_show);
69ddb805
DL
3841}
3842
9a32144e 3843static const struct file_operations rt6_stats_seq_fops = {
1da177e4
LT
3844 .owner = THIS_MODULE,
3845 .open = rt6_stats_seq_open,
3846 .read = seq_read,
3847 .llseek = seq_lseek,
b6fcbdb4 3848 .release = single_release_net,
1da177e4
LT
3849};
3850#endif /* CONFIG_PROC_FS */
3851
3852#ifdef CONFIG_SYSCTL
3853
1da177e4 3854static
fe2c6338 3855int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
1da177e4
LT
3856 void __user *buffer, size_t *lenp, loff_t *ppos)
3857{
c486da34
LAG
3858 struct net *net;
3859 int delay;
3860 if (!write)
1da177e4 3861 return -EINVAL;
c486da34
LAG
3862
3863 net = (struct net *)ctl->extra1;
3864 delay = net->ipv6.sysctl.flush_delay;
3865 proc_dointvec(ctl, write, buffer, lenp, ppos);
2ac3ac8f 3866 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
c486da34 3867 return 0;
1da177e4
LT
3868}
3869
fe2c6338 3870struct ctl_table ipv6_route_table_template[] = {
1ab1457c 3871 {
1da177e4 3872 .procname = "flush",
4990509f 3873 .data = &init_net.ipv6.sysctl.flush_delay,
1da177e4 3874 .maxlen = sizeof(int),
89c8b3a1 3875 .mode = 0200,
6d9f239a 3876 .proc_handler = ipv6_sysctl_rtcache_flush
1da177e4
LT
3877 },
3878 {
1da177e4 3879 .procname = "gc_thresh",
9a7ec3a9 3880 .data = &ip6_dst_ops_template.gc_thresh,
1da177e4
LT
3881 .maxlen = sizeof(int),
3882 .mode = 0644,
6d9f239a 3883 .proc_handler = proc_dointvec,
1da177e4
LT
3884 },
3885 {
1da177e4 3886 .procname = "max_size",
4990509f 3887 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
1da177e4
LT
3888 .maxlen = sizeof(int),
3889 .mode = 0644,
6d9f239a 3890 .proc_handler = proc_dointvec,
1da177e4
LT
3891 },
3892 {
1da177e4 3893 .procname = "gc_min_interval",
4990509f 3894 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3895 .maxlen = sizeof(int),
3896 .mode = 0644,
6d9f239a 3897 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3898 },
3899 {
1da177e4 3900 .procname = "gc_timeout",
4990509f 3901 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
1da177e4
LT
3902 .maxlen = sizeof(int),
3903 .mode = 0644,
6d9f239a 3904 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3905 },
3906 {
1da177e4 3907 .procname = "gc_interval",
4990509f 3908 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
1da177e4
LT
3909 .maxlen = sizeof(int),
3910 .mode = 0644,
6d9f239a 3911 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3912 },
3913 {
1da177e4 3914 .procname = "gc_elasticity",
4990509f 3915 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
1da177e4
LT
3916 .maxlen = sizeof(int),
3917 .mode = 0644,
f3d3f616 3918 .proc_handler = proc_dointvec,
1da177e4
LT
3919 },
3920 {
1da177e4 3921 .procname = "mtu_expires",
4990509f 3922 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
1da177e4
LT
3923 .maxlen = sizeof(int),
3924 .mode = 0644,
6d9f239a 3925 .proc_handler = proc_dointvec_jiffies,
1da177e4
LT
3926 },
3927 {
1da177e4 3928 .procname = "min_adv_mss",
4990509f 3929 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
1da177e4
LT
3930 .maxlen = sizeof(int),
3931 .mode = 0644,
f3d3f616 3932 .proc_handler = proc_dointvec,
1da177e4
LT
3933 },
3934 {
1da177e4 3935 .procname = "gc_min_interval_ms",
4990509f 3936 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
1da177e4
LT
3937 .maxlen = sizeof(int),
3938 .mode = 0644,
6d9f239a 3939 .proc_handler = proc_dointvec_ms_jiffies,
1da177e4 3940 },
f8572d8f 3941 { }
1da177e4
LT
3942};
3943
2c8c1e72 3944struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
760f2d01
DL
3945{
3946 struct ctl_table *table;
3947
3948 table = kmemdup(ipv6_route_table_template,
3949 sizeof(ipv6_route_table_template),
3950 GFP_KERNEL);
5ee09105
YH
3951
3952 if (table) {
3953 table[0].data = &net->ipv6.sysctl.flush_delay;
c486da34 3954 table[0].extra1 = net;
86393e52 3955 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
5ee09105
YH
3956 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3957 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3958 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3959 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3960 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3961 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3962 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
9c69fabe 3963 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
464dc801
EB
3964
3965 /* Don't export sysctls to unprivileged users */
3966 if (net->user_ns != &init_user_ns)
3967 table[0].procname = NULL;
5ee09105
YH
3968 }
3969
760f2d01
DL
3970 return table;
3971}
1da177e4
LT
3972#endif
3973
2c8c1e72 3974static int __net_init ip6_route_net_init(struct net *net)
cdb18761 3975{
633d424b 3976 int ret = -ENOMEM;
8ed67789 3977
86393e52
AD
3978 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3979 sizeof(net->ipv6.ip6_dst_ops));
f2fc6a54 3980
fc66f95c
ED
3981 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3982 goto out_ip6_dst_ops;
3983
8ed67789
DL
3984 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3985 sizeof(*net->ipv6.ip6_null_entry),
3986 GFP_KERNEL);
3987 if (!net->ipv6.ip6_null_entry)
fc66f95c 3988 goto out_ip6_dst_entries;
d8d1f30b 3989 net->ipv6.ip6_null_entry->dst.path =
8ed67789 3990 (struct dst_entry *)net->ipv6.ip6_null_entry;
d8d1f30b 3991 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
3992 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3993 ip6_template_metrics, true);
8ed67789
DL
3994
3995#ifdef CONFIG_IPV6_MULTIPLE_TABLES
3996 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3997 sizeof(*net->ipv6.ip6_prohibit_entry),
3998 GFP_KERNEL);
68fffc67
PZ
3999 if (!net->ipv6.ip6_prohibit_entry)
4000 goto out_ip6_null_entry;
d8d1f30b 4001 net->ipv6.ip6_prohibit_entry->dst.path =
8ed67789 4002 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
d8d1f30b 4003 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4004 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
4005 ip6_template_metrics, true);
8ed67789
DL
4006
4007 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
4008 sizeof(*net->ipv6.ip6_blk_hole_entry),
4009 GFP_KERNEL);
68fffc67
PZ
4010 if (!net->ipv6.ip6_blk_hole_entry)
4011 goto out_ip6_prohibit_entry;
d8d1f30b 4012 net->ipv6.ip6_blk_hole_entry->dst.path =
8ed67789 4013 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
d8d1f30b 4014 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
62fa8a84
DM
4015 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
4016 ip6_template_metrics, true);
8ed67789
DL
4017#endif
4018
b339a47c
PZ
4019 net->ipv6.sysctl.flush_delay = 0;
4020 net->ipv6.sysctl.ip6_rt_max_size = 4096;
4021 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
4022 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
4023 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
4024 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
4025 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
4026 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
4027
6891a346
BT
4028 net->ipv6.ip6_rt_gc_expire = 30*HZ;
4029
8ed67789
DL
4030 ret = 0;
4031out:
4032 return ret;
f2fc6a54 4033
68fffc67
PZ
4034#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4035out_ip6_prohibit_entry:
4036 kfree(net->ipv6.ip6_prohibit_entry);
4037out_ip6_null_entry:
4038 kfree(net->ipv6.ip6_null_entry);
4039#endif
fc66f95c
ED
4040out_ip6_dst_entries:
4041 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
f2fc6a54 4042out_ip6_dst_ops:
f2fc6a54 4043 goto out;
cdb18761
DL
4044}
4045
2c8c1e72 4046static void __net_exit ip6_route_net_exit(struct net *net)
cdb18761 4047{
8ed67789
DL
4048 kfree(net->ipv6.ip6_null_entry);
4049#ifdef CONFIG_IPV6_MULTIPLE_TABLES
4050 kfree(net->ipv6.ip6_prohibit_entry);
4051 kfree(net->ipv6.ip6_blk_hole_entry);
4052#endif
41bb78b4 4053 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
cdb18761
DL
4054}
4055
d189634e
TG
4056static int __net_init ip6_route_net_init_late(struct net *net)
4057{
4058#ifdef CONFIG_PROC_FS
d4beaa66
G
4059 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
4060 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
d189634e
TG
4061#endif
4062 return 0;
4063}
4064
4065static void __net_exit ip6_route_net_exit_late(struct net *net)
4066{
4067#ifdef CONFIG_PROC_FS
ece31ffd
G
4068 remove_proc_entry("ipv6_route", net->proc_net);
4069 remove_proc_entry("rt6_stats", net->proc_net);
d189634e
TG
4070#endif
4071}
4072
cdb18761
DL
4073static struct pernet_operations ip6_route_net_ops = {
4074 .init = ip6_route_net_init,
4075 .exit = ip6_route_net_exit,
4076};
4077
c3426b47
DM
4078static int __net_init ipv6_inetpeer_init(struct net *net)
4079{
4080 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
4081
4082 if (!bp)
4083 return -ENOMEM;
4084 inet_peer_base_init(bp);
4085 net->ipv6.peers = bp;
4086 return 0;
4087}
4088
4089static void __net_exit ipv6_inetpeer_exit(struct net *net)
4090{
4091 struct inet_peer_base *bp = net->ipv6.peers;
4092
4093 net->ipv6.peers = NULL;
56a6b248 4094 inetpeer_invalidate_tree(bp);
c3426b47
DM
4095 kfree(bp);
4096}
4097
2b823f72 4098static struct pernet_operations ipv6_inetpeer_ops = {
c3426b47
DM
4099 .init = ipv6_inetpeer_init,
4100 .exit = ipv6_inetpeer_exit,
4101};
4102
d189634e
TG
4103static struct pernet_operations ip6_route_net_late_ops = {
4104 .init = ip6_route_net_init_late,
4105 .exit = ip6_route_net_exit_late,
4106};
4107
8ed67789
DL
4108static struct notifier_block ip6_route_dev_notifier = {
4109 .notifier_call = ip6_route_dev_notify,
242d3a49 4110 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
8ed67789
DL
4111};
4112
2f460933
WC
4113void __init ip6_route_init_special_entries(void)
4114{
4115 /* Registering of the loopback is done before this portion of code,
4116 * the loopback reference in rt6_info will not be taken, do it
4117 * manually for init_net */
4118 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
4119 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4120 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
4121 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
4122 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4123 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
4124 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
4125 #endif
4126}
4127
433d49c3 4128int __init ip6_route_init(void)
1da177e4 4129{
433d49c3 4130 int ret;
8d0b94af 4131 int cpu;
433d49c3 4132
9a7ec3a9
DL
4133 ret = -ENOMEM;
4134 ip6_dst_ops_template.kmem_cachep =
e5d679f3 4135 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
f845ab6b 4136 SLAB_HWCACHE_ALIGN, NULL);
9a7ec3a9 4137 if (!ip6_dst_ops_template.kmem_cachep)
c19a28e1 4138 goto out;
14e50e57 4139
fc66f95c 4140 ret = dst_entries_init(&ip6_dst_blackhole_ops);
8ed67789 4141 if (ret)
bdb3289f 4142 goto out_kmem_cache;
bdb3289f 4143
c3426b47
DM
4144 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
4145 if (ret)
e8803b6c 4146 goto out_dst_entries;
2a0c451a 4147
7e52b33b
DM
4148 ret = register_pernet_subsys(&ip6_route_net_ops);
4149 if (ret)
4150 goto out_register_inetpeer;
c3426b47 4151
5dc121e9
AE
4152 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
4153
e8803b6c 4154 ret = fib6_init();
433d49c3 4155 if (ret)
8ed67789 4156 goto out_register_subsys;
433d49c3 4157
433d49c3
DL
4158 ret = xfrm6_init();
4159 if (ret)
e8803b6c 4160 goto out_fib6_init;
c35b7e72 4161
433d49c3
DL
4162 ret = fib6_rules_init();
4163 if (ret)
4164 goto xfrm6_init;
7e5449c2 4165
d189634e
TG
4166 ret = register_pernet_subsys(&ip6_route_net_late_ops);
4167 if (ret)
4168 goto fib6_rules_init;
4169
433d49c3 4170 ret = -ENOBUFS;
c7ac8679
GR
4171 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
4172 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
4173 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
d189634e 4174 goto out_register_late_subsys;
c127ea2c 4175
8ed67789 4176 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
cdb18761 4177 if (ret)
d189634e 4178 goto out_register_late_subsys;
8ed67789 4179
8d0b94af
MKL
4180 for_each_possible_cpu(cpu) {
4181 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
4182
4183 INIT_LIST_HEAD(&ul->head);
4184 spin_lock_init(&ul->lock);
4185 }
4186
433d49c3
DL
4187out:
4188 return ret;
4189
d189634e
TG
4190out_register_late_subsys:
4191 unregister_pernet_subsys(&ip6_route_net_late_ops);
433d49c3 4192fib6_rules_init:
433d49c3
DL
4193 fib6_rules_cleanup();
4194xfrm6_init:
433d49c3 4195 xfrm6_fini();
2a0c451a
TG
4196out_fib6_init:
4197 fib6_gc_cleanup();
8ed67789
DL
4198out_register_subsys:
4199 unregister_pernet_subsys(&ip6_route_net_ops);
7e52b33b
DM
4200out_register_inetpeer:
4201 unregister_pernet_subsys(&ipv6_inetpeer_ops);
fc66f95c
ED
4202out_dst_entries:
4203 dst_entries_destroy(&ip6_dst_blackhole_ops);
433d49c3 4204out_kmem_cache:
f2fc6a54 4205 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
433d49c3 4206 goto out;
1da177e4
LT
4207}
4208
4209void ip6_route_cleanup(void)
4210{
8ed67789 4211 unregister_netdevice_notifier(&ip6_route_dev_notifier);
d189634e 4212 unregister_pernet_subsys(&ip6_route_net_late_ops);
101367c2 4213 fib6_rules_cleanup();
1da177e4 4214 xfrm6_fini();
1da177e4 4215 fib6_gc_cleanup();
c3426b47 4216 unregister_pernet_subsys(&ipv6_inetpeer_ops);
8ed67789 4217 unregister_pernet_subsys(&ip6_route_net_ops);
41bb78b4 4218 dst_entries_destroy(&ip6_dst_blackhole_ops);
f2fc6a54 4219 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
1da177e4 4220}