]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/ipv6/route.c
net: use dst_confirm_neigh for UDP, RAW, ICMP, L2TP
[mirror_ubuntu-zesty-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/dst_metadata.h>
58 #include <net/xfrm.h>
59 #include <net/netevent.h>
60 #include <net/netlink.h>
61 #include <net/nexthop.h>
62 #include <net/lwtunnel.h>
63 #include <net/ip_tunnels.h>
64 #include <net/l3mdev.h>
65 #include <trace/events/fib6.h>
66
67 #include <linux/uaccess.h>
68
69 #ifdef CONFIG_SYSCTL
70 #include <linux/sysctl.h>
71 #endif
72
73 enum rt6_nud_state {
74 RT6_NUD_FAIL_HARD = -3,
75 RT6_NUD_FAIL_PROBE = -2,
76 RT6_NUD_FAIL_DO_RR = -1,
77 RT6_NUD_SUCCEED = 1
78 };
79
80 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort);
81 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
82 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
83 static unsigned int ip6_mtu(const struct dst_entry *dst);
84 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
85 static void ip6_dst_destroy(struct dst_entry *);
86 static void ip6_dst_ifdown(struct dst_entry *,
87 struct net_device *dev, int how);
88 static int ip6_dst_gc(struct dst_ops *ops);
89
90 static int ip6_pkt_discard(struct sk_buff *skb);
91 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
92 static int ip6_pkt_prohibit(struct sk_buff *skb);
93 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
96 struct sk_buff *skb, u32 mtu);
97 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
98 struct sk_buff *skb);
99 static void rt6_dst_from_metrics_check(struct rt6_info *rt);
100 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
101
102 #ifdef CONFIG_IPV6_ROUTE_INFO
103 static struct rt6_info *rt6_add_route_info(struct net *net,
104 const struct in6_addr *prefix, int prefixlen,
105 const struct in6_addr *gwaddr,
106 struct net_device *dev,
107 unsigned int pref);
108 static struct rt6_info *rt6_get_route_info(struct net *net,
109 const struct in6_addr *prefix, int prefixlen,
110 const struct in6_addr *gwaddr,
111 struct net_device *dev);
112 #endif
113
114 struct uncached_list {
115 spinlock_t lock;
116 struct list_head head;
117 };
118
119 static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
120
121 static void rt6_uncached_list_add(struct rt6_info *rt)
122 {
123 struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list);
124
125 rt->dst.flags |= DST_NOCACHE;
126 rt->rt6i_uncached_list = ul;
127
128 spin_lock_bh(&ul->lock);
129 list_add_tail(&rt->rt6i_uncached, &ul->head);
130 spin_unlock_bh(&ul->lock);
131 }
132
133 static void rt6_uncached_list_del(struct rt6_info *rt)
134 {
135 if (!list_empty(&rt->rt6i_uncached)) {
136 struct uncached_list *ul = rt->rt6i_uncached_list;
137
138 spin_lock_bh(&ul->lock);
139 list_del(&rt->rt6i_uncached);
140 spin_unlock_bh(&ul->lock);
141 }
142 }
143
144 static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
145 {
146 struct net_device *loopback_dev = net->loopback_dev;
147 int cpu;
148
149 if (dev == loopback_dev)
150 return;
151
152 for_each_possible_cpu(cpu) {
153 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
154 struct rt6_info *rt;
155
156 spin_lock_bh(&ul->lock);
157 list_for_each_entry(rt, &ul->head, rt6i_uncached) {
158 struct inet6_dev *rt_idev = rt->rt6i_idev;
159 struct net_device *rt_dev = rt->dst.dev;
160
161 if (rt_idev->dev == dev) {
162 rt->rt6i_idev = in6_dev_get(loopback_dev);
163 in6_dev_put(rt_idev);
164 }
165
166 if (rt_dev == dev) {
167 rt->dst.dev = loopback_dev;
168 dev_hold(rt->dst.dev);
169 dev_put(rt_dev);
170 }
171 }
172 spin_unlock_bh(&ul->lock);
173 }
174 }
175
176 static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
177 {
178 return dst_metrics_write_ptr(rt->dst.from);
179 }
180
181 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
182 {
183 struct rt6_info *rt = (struct rt6_info *)dst;
184
185 if (rt->rt6i_flags & RTF_PCPU)
186 return rt6_pcpu_cow_metrics(rt);
187 else if (rt->rt6i_flags & RTF_CACHE)
188 return NULL;
189 else
190 return dst_cow_metrics_generic(dst, old);
191 }
192
193 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
194 struct sk_buff *skb,
195 const void *daddr)
196 {
197 struct in6_addr *p = &rt->rt6i_gateway;
198
199 if (!ipv6_addr_any(p))
200 return (const void *) p;
201 else if (skb)
202 return &ipv6_hdr(skb)->daddr;
203 return daddr;
204 }
205
206 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
207 struct sk_buff *skb,
208 const void *daddr)
209 {
210 struct rt6_info *rt = (struct rt6_info *) dst;
211 struct neighbour *n;
212
213 daddr = choose_neigh_daddr(rt, skb, daddr);
214 n = __ipv6_neigh_lookup(dst->dev, daddr);
215 if (n)
216 return n;
217 return neigh_create(&nd_tbl, daddr, dst->dev);
218 }
219
220 static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr)
221 {
222 struct net_device *dev = dst->dev;
223 struct rt6_info *rt = (struct rt6_info *)dst;
224
225 daddr = choose_neigh_daddr(rt, NULL, daddr);
226 if (!daddr)
227 return;
228 if (dev->flags & (IFF_NOARP | IFF_LOOPBACK))
229 return;
230 if (ipv6_addr_is_multicast((const struct in6_addr *)daddr))
231 return;
232 __ipv6_confirm_neigh(dev, daddr);
233 }
234
235 static struct dst_ops ip6_dst_ops_template = {
236 .family = AF_INET6,
237 .gc = ip6_dst_gc,
238 .gc_thresh = 1024,
239 .check = ip6_dst_check,
240 .default_advmss = ip6_default_advmss,
241 .mtu = ip6_mtu,
242 .cow_metrics = ipv6_cow_metrics,
243 .destroy = ip6_dst_destroy,
244 .ifdown = ip6_dst_ifdown,
245 .negative_advice = ip6_negative_advice,
246 .link_failure = ip6_link_failure,
247 .update_pmtu = ip6_rt_update_pmtu,
248 .redirect = rt6_do_redirect,
249 .local_out = __ip6_local_out,
250 .neigh_lookup = ip6_neigh_lookup,
251 .confirm_neigh = ip6_confirm_neigh,
252 };
253
254 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
255 {
256 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
257
258 return mtu ? : dst->dev->mtu;
259 }
260
261 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
262 struct sk_buff *skb, u32 mtu)
263 {
264 }
265
266 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
267 struct sk_buff *skb)
268 {
269 }
270
271 static struct dst_ops ip6_dst_blackhole_ops = {
272 .family = AF_INET6,
273 .destroy = ip6_dst_destroy,
274 .check = ip6_dst_check,
275 .mtu = ip6_blackhole_mtu,
276 .default_advmss = ip6_default_advmss,
277 .update_pmtu = ip6_rt_blackhole_update_pmtu,
278 .redirect = ip6_rt_blackhole_redirect,
279 .cow_metrics = dst_cow_metrics_generic,
280 .neigh_lookup = ip6_neigh_lookup,
281 };
282
283 static const u32 ip6_template_metrics[RTAX_MAX] = {
284 [RTAX_HOPLIMIT - 1] = 0,
285 };
286
287 static const struct rt6_info ip6_null_entry_template = {
288 .dst = {
289 .__refcnt = ATOMIC_INIT(1),
290 .__use = 1,
291 .obsolete = DST_OBSOLETE_FORCE_CHK,
292 .error = -ENETUNREACH,
293 .input = ip6_pkt_discard,
294 .output = ip6_pkt_discard_out,
295 },
296 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
297 .rt6i_protocol = RTPROT_KERNEL,
298 .rt6i_metric = ~(u32) 0,
299 .rt6i_ref = ATOMIC_INIT(1),
300 };
301
302 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
303
304 static const struct rt6_info ip6_prohibit_entry_template = {
305 .dst = {
306 .__refcnt = ATOMIC_INIT(1),
307 .__use = 1,
308 .obsolete = DST_OBSOLETE_FORCE_CHK,
309 .error = -EACCES,
310 .input = ip6_pkt_prohibit,
311 .output = ip6_pkt_prohibit_out,
312 },
313 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
314 .rt6i_protocol = RTPROT_KERNEL,
315 .rt6i_metric = ~(u32) 0,
316 .rt6i_ref = ATOMIC_INIT(1),
317 };
318
319 static const struct rt6_info ip6_blk_hole_entry_template = {
320 .dst = {
321 .__refcnt = ATOMIC_INIT(1),
322 .__use = 1,
323 .obsolete = DST_OBSOLETE_FORCE_CHK,
324 .error = -EINVAL,
325 .input = dst_discard,
326 .output = dst_discard_out,
327 },
328 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
329 .rt6i_protocol = RTPROT_KERNEL,
330 .rt6i_metric = ~(u32) 0,
331 .rt6i_ref = ATOMIC_INIT(1),
332 };
333
334 #endif
335
336 static void rt6_info_init(struct rt6_info *rt)
337 {
338 struct dst_entry *dst = &rt->dst;
339
340 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
341 INIT_LIST_HEAD(&rt->rt6i_siblings);
342 INIT_LIST_HEAD(&rt->rt6i_uncached);
343 }
344
345 /* allocate dst with ip6_dst_ops */
346 static struct rt6_info *__ip6_dst_alloc(struct net *net,
347 struct net_device *dev,
348 int flags)
349 {
350 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
351 0, DST_OBSOLETE_FORCE_CHK, flags);
352
353 if (rt)
354 rt6_info_init(rt);
355
356 return rt;
357 }
358
359 struct rt6_info *ip6_dst_alloc(struct net *net,
360 struct net_device *dev,
361 int flags)
362 {
363 struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
364
365 if (rt) {
366 rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
367 if (rt->rt6i_pcpu) {
368 int cpu;
369
370 for_each_possible_cpu(cpu) {
371 struct rt6_info **p;
372
373 p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
374 /* no one shares rt */
375 *p = NULL;
376 }
377 } else {
378 dst_destroy((struct dst_entry *)rt);
379 return NULL;
380 }
381 }
382
383 return rt;
384 }
385 EXPORT_SYMBOL(ip6_dst_alloc);
386
387 static void ip6_dst_destroy(struct dst_entry *dst)
388 {
389 struct rt6_info *rt = (struct rt6_info *)dst;
390 struct dst_entry *from = dst->from;
391 struct inet6_dev *idev;
392
393 dst_destroy_metrics_generic(dst);
394 free_percpu(rt->rt6i_pcpu);
395 rt6_uncached_list_del(rt);
396
397 idev = rt->rt6i_idev;
398 if (idev) {
399 rt->rt6i_idev = NULL;
400 in6_dev_put(idev);
401 }
402
403 dst->from = NULL;
404 dst_release(from);
405 }
406
407 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
408 int how)
409 {
410 struct rt6_info *rt = (struct rt6_info *)dst;
411 struct inet6_dev *idev = rt->rt6i_idev;
412 struct net_device *loopback_dev =
413 dev_net(dev)->loopback_dev;
414
415 if (dev != loopback_dev) {
416 if (idev && idev->dev == dev) {
417 struct inet6_dev *loopback_idev =
418 in6_dev_get(loopback_dev);
419 if (loopback_idev) {
420 rt->rt6i_idev = loopback_idev;
421 in6_dev_put(idev);
422 }
423 }
424 }
425 }
426
427 static bool __rt6_check_expired(const struct rt6_info *rt)
428 {
429 if (rt->rt6i_flags & RTF_EXPIRES)
430 return time_after(jiffies, rt->dst.expires);
431 else
432 return false;
433 }
434
435 static bool rt6_check_expired(const struct rt6_info *rt)
436 {
437 if (rt->rt6i_flags & RTF_EXPIRES) {
438 if (time_after(jiffies, rt->dst.expires))
439 return true;
440 } else if (rt->dst.from) {
441 return rt6_check_expired((struct rt6_info *) rt->dst.from);
442 }
443 return false;
444 }
445
446 /* Multipath route selection:
447 * Hash based function using packet header and flowlabel.
448 * Adapted from fib_info_hashfn()
449 */
450 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
451 const struct flowi6 *fl6)
452 {
453 return get_hash_from_flowi6(fl6) % candidate_count;
454 }
455
456 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
457 struct flowi6 *fl6, int oif,
458 int strict)
459 {
460 struct rt6_info *sibling, *next_sibling;
461 int route_choosen;
462
463 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
464 /* Don't change the route, if route_choosen == 0
465 * (siblings does not include ourself)
466 */
467 if (route_choosen)
468 list_for_each_entry_safe(sibling, next_sibling,
469 &match->rt6i_siblings, rt6i_siblings) {
470 route_choosen--;
471 if (route_choosen == 0) {
472 if (rt6_score_route(sibling, oif, strict) < 0)
473 break;
474 match = sibling;
475 break;
476 }
477 }
478 return match;
479 }
480
481 /*
482 * Route lookup. Any table->tb6_lock is implied.
483 */
484
485 static inline struct rt6_info *rt6_device_match(struct net *net,
486 struct rt6_info *rt,
487 const struct in6_addr *saddr,
488 int oif,
489 int flags)
490 {
491 struct rt6_info *local = NULL;
492 struct rt6_info *sprt;
493
494 if (!oif && ipv6_addr_any(saddr))
495 goto out;
496
497 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
498 struct net_device *dev = sprt->dst.dev;
499
500 if (oif) {
501 if (dev->ifindex == oif)
502 return sprt;
503 if (dev->flags & IFF_LOOPBACK) {
504 if (!sprt->rt6i_idev ||
505 sprt->rt6i_idev->dev->ifindex != oif) {
506 if (flags & RT6_LOOKUP_F_IFACE)
507 continue;
508 if (local &&
509 local->rt6i_idev->dev->ifindex == oif)
510 continue;
511 }
512 local = sprt;
513 }
514 } else {
515 if (ipv6_chk_addr(net, saddr, dev,
516 flags & RT6_LOOKUP_F_IFACE))
517 return sprt;
518 }
519 }
520
521 if (oif) {
522 if (local)
523 return local;
524
525 if (flags & RT6_LOOKUP_F_IFACE)
526 return net->ipv6.ip6_null_entry;
527 }
528 out:
529 return rt;
530 }
531
532 #ifdef CONFIG_IPV6_ROUTER_PREF
533 struct __rt6_probe_work {
534 struct work_struct work;
535 struct in6_addr target;
536 struct net_device *dev;
537 };
538
539 static void rt6_probe_deferred(struct work_struct *w)
540 {
541 struct in6_addr mcaddr;
542 struct __rt6_probe_work *work =
543 container_of(w, struct __rt6_probe_work, work);
544
545 addrconf_addr_solict_mult(&work->target, &mcaddr);
546 ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, 0);
547 dev_put(work->dev);
548 kfree(work);
549 }
550
551 static void rt6_probe(struct rt6_info *rt)
552 {
553 struct __rt6_probe_work *work;
554 struct neighbour *neigh;
555 /*
556 * Okay, this does not seem to be appropriate
557 * for now, however, we need to check if it
558 * is really so; aka Router Reachability Probing.
559 *
560 * Router Reachability Probe MUST be rate-limited
561 * to no more than one per minute.
562 */
563 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
564 return;
565 rcu_read_lock_bh();
566 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
567 if (neigh) {
568 if (neigh->nud_state & NUD_VALID)
569 goto out;
570
571 work = NULL;
572 write_lock(&neigh->lock);
573 if (!(neigh->nud_state & NUD_VALID) &&
574 time_after(jiffies,
575 neigh->updated +
576 rt->rt6i_idev->cnf.rtr_probe_interval)) {
577 work = kmalloc(sizeof(*work), GFP_ATOMIC);
578 if (work)
579 __neigh_set_probe_once(neigh);
580 }
581 write_unlock(&neigh->lock);
582 } else {
583 work = kmalloc(sizeof(*work), GFP_ATOMIC);
584 }
585
586 if (work) {
587 INIT_WORK(&work->work, rt6_probe_deferred);
588 work->target = rt->rt6i_gateway;
589 dev_hold(rt->dst.dev);
590 work->dev = rt->dst.dev;
591 schedule_work(&work->work);
592 }
593
594 out:
595 rcu_read_unlock_bh();
596 }
597 #else
598 static inline void rt6_probe(struct rt6_info *rt)
599 {
600 }
601 #endif
602
603 /*
604 * Default Router Selection (RFC 2461 6.3.6)
605 */
606 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
607 {
608 struct net_device *dev = rt->dst.dev;
609 if (!oif || dev->ifindex == oif)
610 return 2;
611 if ((dev->flags & IFF_LOOPBACK) &&
612 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
613 return 1;
614 return 0;
615 }
616
617 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
618 {
619 struct neighbour *neigh;
620 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
621
622 if (rt->rt6i_flags & RTF_NONEXTHOP ||
623 !(rt->rt6i_flags & RTF_GATEWAY))
624 return RT6_NUD_SUCCEED;
625
626 rcu_read_lock_bh();
627 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
628 if (neigh) {
629 read_lock(&neigh->lock);
630 if (neigh->nud_state & NUD_VALID)
631 ret = RT6_NUD_SUCCEED;
632 #ifdef CONFIG_IPV6_ROUTER_PREF
633 else if (!(neigh->nud_state & NUD_FAILED))
634 ret = RT6_NUD_SUCCEED;
635 else
636 ret = RT6_NUD_FAIL_PROBE;
637 #endif
638 read_unlock(&neigh->lock);
639 } else {
640 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
641 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
642 }
643 rcu_read_unlock_bh();
644
645 return ret;
646 }
647
648 static int rt6_score_route(struct rt6_info *rt, int oif,
649 int strict)
650 {
651 int m;
652
653 m = rt6_check_dev(rt, oif);
654 if (!m && (strict & RT6_LOOKUP_F_IFACE))
655 return RT6_NUD_FAIL_HARD;
656 #ifdef CONFIG_IPV6_ROUTER_PREF
657 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
658 #endif
659 if (strict & RT6_LOOKUP_F_REACHABLE) {
660 int n = rt6_check_neigh(rt);
661 if (n < 0)
662 return n;
663 }
664 return m;
665 }
666
667 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
668 int *mpri, struct rt6_info *match,
669 bool *do_rr)
670 {
671 int m;
672 bool match_do_rr = false;
673 struct inet6_dev *idev = rt->rt6i_idev;
674 struct net_device *dev = rt->dst.dev;
675
676 if (dev && !netif_carrier_ok(dev) &&
677 idev->cnf.ignore_routes_with_linkdown &&
678 !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
679 goto out;
680
681 if (rt6_check_expired(rt))
682 goto out;
683
684 m = rt6_score_route(rt, oif, strict);
685 if (m == RT6_NUD_FAIL_DO_RR) {
686 match_do_rr = true;
687 m = 0; /* lowest valid score */
688 } else if (m == RT6_NUD_FAIL_HARD) {
689 goto out;
690 }
691
692 if (strict & RT6_LOOKUP_F_REACHABLE)
693 rt6_probe(rt);
694
695 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
696 if (m > *mpri) {
697 *do_rr = match_do_rr;
698 *mpri = m;
699 match = rt;
700 }
701 out:
702 return match;
703 }
704
705 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
706 struct rt6_info *rr_head,
707 u32 metric, int oif, int strict,
708 bool *do_rr)
709 {
710 struct rt6_info *rt, *match, *cont;
711 int mpri = -1;
712
713 match = NULL;
714 cont = NULL;
715 for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
716 if (rt->rt6i_metric != metric) {
717 cont = rt;
718 break;
719 }
720
721 match = find_match(rt, oif, strict, &mpri, match, do_rr);
722 }
723
724 for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
725 if (rt->rt6i_metric != metric) {
726 cont = rt;
727 break;
728 }
729
730 match = find_match(rt, oif, strict, &mpri, match, do_rr);
731 }
732
733 if (match || !cont)
734 return match;
735
736 for (rt = cont; rt; rt = rt->dst.rt6_next)
737 match = find_match(rt, oif, strict, &mpri, match, do_rr);
738
739 return match;
740 }
741
742 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
743 {
744 struct rt6_info *match, *rt0;
745 struct net *net;
746 bool do_rr = false;
747
748 rt0 = fn->rr_ptr;
749 if (!rt0)
750 fn->rr_ptr = rt0 = fn->leaf;
751
752 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
753 &do_rr);
754
755 if (do_rr) {
756 struct rt6_info *next = rt0->dst.rt6_next;
757
758 /* no entries matched; do round-robin */
759 if (!next || next->rt6i_metric != rt0->rt6i_metric)
760 next = fn->leaf;
761
762 if (next != rt0)
763 fn->rr_ptr = next;
764 }
765
766 net = dev_net(rt0->dst.dev);
767 return match ? match : net->ipv6.ip6_null_entry;
768 }
769
770 static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt)
771 {
772 return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
773 }
774
775 #ifdef CONFIG_IPV6_ROUTE_INFO
776 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
777 const struct in6_addr *gwaddr)
778 {
779 struct net *net = dev_net(dev);
780 struct route_info *rinfo = (struct route_info *) opt;
781 struct in6_addr prefix_buf, *prefix;
782 unsigned int pref;
783 unsigned long lifetime;
784 struct rt6_info *rt;
785
786 if (len < sizeof(struct route_info)) {
787 return -EINVAL;
788 }
789
790 /* Sanity check for prefix_len and length */
791 if (rinfo->length > 3) {
792 return -EINVAL;
793 } else if (rinfo->prefix_len > 128) {
794 return -EINVAL;
795 } else if (rinfo->prefix_len > 64) {
796 if (rinfo->length < 2) {
797 return -EINVAL;
798 }
799 } else if (rinfo->prefix_len > 0) {
800 if (rinfo->length < 1) {
801 return -EINVAL;
802 }
803 }
804
805 pref = rinfo->route_pref;
806 if (pref == ICMPV6_ROUTER_PREF_INVALID)
807 return -EINVAL;
808
809 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
810
811 if (rinfo->length == 3)
812 prefix = (struct in6_addr *)rinfo->prefix;
813 else {
814 /* this function is safe */
815 ipv6_addr_prefix(&prefix_buf,
816 (struct in6_addr *)rinfo->prefix,
817 rinfo->prefix_len);
818 prefix = &prefix_buf;
819 }
820
821 if (rinfo->prefix_len == 0)
822 rt = rt6_get_dflt_router(gwaddr, dev);
823 else
824 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
825 gwaddr, dev);
826
827 if (rt && !lifetime) {
828 ip6_del_rt(rt);
829 rt = NULL;
830 }
831
832 if (!rt && lifetime)
833 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr,
834 dev, pref);
835 else if (rt)
836 rt->rt6i_flags = RTF_ROUTEINFO |
837 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
838
839 if (rt) {
840 if (!addrconf_finite_timeout(lifetime))
841 rt6_clean_expires(rt);
842 else
843 rt6_set_expires(rt, jiffies + HZ * lifetime);
844
845 ip6_rt_put(rt);
846 }
847 return 0;
848 }
849 #endif
850
851 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
852 struct in6_addr *saddr)
853 {
854 struct fib6_node *pn;
855 while (1) {
856 if (fn->fn_flags & RTN_TL_ROOT)
857 return NULL;
858 pn = fn->parent;
859 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
860 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
861 else
862 fn = pn;
863 if (fn->fn_flags & RTN_RTINFO)
864 return fn;
865 }
866 }
867
868 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
869 struct fib6_table *table,
870 struct flowi6 *fl6, int flags)
871 {
872 struct fib6_node *fn;
873 struct rt6_info *rt;
874
875 read_lock_bh(&table->tb6_lock);
876 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
877 restart:
878 rt = fn->leaf;
879 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
880 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
881 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
882 if (rt == net->ipv6.ip6_null_entry) {
883 fn = fib6_backtrack(fn, &fl6->saddr);
884 if (fn)
885 goto restart;
886 }
887 dst_use(&rt->dst, jiffies);
888 read_unlock_bh(&table->tb6_lock);
889
890 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
891
892 return rt;
893
894 }
895
896 struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
897 int flags)
898 {
899 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
900 }
901 EXPORT_SYMBOL_GPL(ip6_route_lookup);
902
903 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
904 const struct in6_addr *saddr, int oif, int strict)
905 {
906 struct flowi6 fl6 = {
907 .flowi6_oif = oif,
908 .daddr = *daddr,
909 };
910 struct dst_entry *dst;
911 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
912
913 if (saddr) {
914 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
915 flags |= RT6_LOOKUP_F_HAS_SADDR;
916 }
917
918 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
919 if (dst->error == 0)
920 return (struct rt6_info *) dst;
921
922 dst_release(dst);
923
924 return NULL;
925 }
926 EXPORT_SYMBOL(rt6_lookup);
927
928 /* ip6_ins_rt is called with FREE table->tb6_lock.
929 It takes new route entry, the addition fails by any reason the
930 route is freed. In any case, if caller does not hold it, it may
931 be destroyed.
932 */
933
934 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
935 struct mx6_config *mxc)
936 {
937 int err;
938 struct fib6_table *table;
939
940 table = rt->rt6i_table;
941 write_lock_bh(&table->tb6_lock);
942 err = fib6_add(&table->tb6_root, rt, info, mxc);
943 write_unlock_bh(&table->tb6_lock);
944
945 return err;
946 }
947
948 int ip6_ins_rt(struct rt6_info *rt)
949 {
950 struct nl_info info = { .nl_net = dev_net(rt->dst.dev), };
951 struct mx6_config mxc = { .mx = NULL, };
952
953 return __ip6_ins_rt(rt, &info, &mxc);
954 }
955
956 static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
957 const struct in6_addr *daddr,
958 const struct in6_addr *saddr)
959 {
960 struct rt6_info *rt;
961
962 /*
963 * Clone the route.
964 */
965
966 if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
967 ort = (struct rt6_info *)ort->dst.from;
968
969 rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
970
971 if (!rt)
972 return NULL;
973
974 ip6_rt_copy_init(rt, ort);
975 rt->rt6i_flags |= RTF_CACHE;
976 rt->rt6i_metric = 0;
977 rt->dst.flags |= DST_HOST;
978 rt->rt6i_dst.addr = *daddr;
979 rt->rt6i_dst.plen = 128;
980
981 if (!rt6_is_gw_or_nonexthop(ort)) {
982 if (ort->rt6i_dst.plen != 128 &&
983 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
984 rt->rt6i_flags |= RTF_ANYCAST;
985 #ifdef CONFIG_IPV6_SUBTREES
986 if (rt->rt6i_src.plen && saddr) {
987 rt->rt6i_src.addr = *saddr;
988 rt->rt6i_src.plen = 128;
989 }
990 #endif
991 }
992
993 return rt;
994 }
995
996 static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
997 {
998 struct rt6_info *pcpu_rt;
999
1000 pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
1001 rt->dst.dev, rt->dst.flags);
1002
1003 if (!pcpu_rt)
1004 return NULL;
1005 ip6_rt_copy_init(pcpu_rt, rt);
1006 pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
1007 pcpu_rt->rt6i_flags |= RTF_PCPU;
1008 return pcpu_rt;
1009 }
1010
1011 /* It should be called with read_lock_bh(&tb6_lock) acquired */
1012 static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
1013 {
1014 struct rt6_info *pcpu_rt, **p;
1015
1016 p = this_cpu_ptr(rt->rt6i_pcpu);
1017 pcpu_rt = *p;
1018
1019 if (pcpu_rt) {
1020 dst_hold(&pcpu_rt->dst);
1021 rt6_dst_from_metrics_check(pcpu_rt);
1022 }
1023 return pcpu_rt;
1024 }
1025
1026 static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
1027 {
1028 struct fib6_table *table = rt->rt6i_table;
1029 struct rt6_info *pcpu_rt, *prev, **p;
1030
1031 pcpu_rt = ip6_rt_pcpu_alloc(rt);
1032 if (!pcpu_rt) {
1033 struct net *net = dev_net(rt->dst.dev);
1034
1035 dst_hold(&net->ipv6.ip6_null_entry->dst);
1036 return net->ipv6.ip6_null_entry;
1037 }
1038
1039 read_lock_bh(&table->tb6_lock);
1040 if (rt->rt6i_pcpu) {
1041 p = this_cpu_ptr(rt->rt6i_pcpu);
1042 prev = cmpxchg(p, NULL, pcpu_rt);
1043 if (prev) {
1044 /* If someone did it before us, return prev instead */
1045 dst_destroy(&pcpu_rt->dst);
1046 pcpu_rt = prev;
1047 }
1048 } else {
1049 /* rt has been removed from the fib6 tree
1050 * before we have a chance to acquire the read_lock.
1051 * In this case, don't brother to create a pcpu rt
1052 * since rt is going away anyway. The next
1053 * dst_check() will trigger a re-lookup.
1054 */
1055 dst_destroy(&pcpu_rt->dst);
1056 pcpu_rt = rt;
1057 }
1058 dst_hold(&pcpu_rt->dst);
1059 rt6_dst_from_metrics_check(pcpu_rt);
1060 read_unlock_bh(&table->tb6_lock);
1061 return pcpu_rt;
1062 }
1063
1064 struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
1065 int oif, struct flowi6 *fl6, int flags)
1066 {
1067 struct fib6_node *fn, *saved_fn;
1068 struct rt6_info *rt;
1069 int strict = 0;
1070
1071 strict |= flags & RT6_LOOKUP_F_IFACE;
1072 strict |= flags & RT6_LOOKUP_F_IGNORE_LINKSTATE;
1073 if (net->ipv6.devconf_all->forwarding == 0)
1074 strict |= RT6_LOOKUP_F_REACHABLE;
1075
1076 read_lock_bh(&table->tb6_lock);
1077
1078 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1079 saved_fn = fn;
1080
1081 if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
1082 oif = 0;
1083
1084 redo_rt6_select:
1085 rt = rt6_select(fn, oif, strict);
1086 if (rt->rt6i_nsiblings)
1087 rt = rt6_multipath_select(rt, fl6, oif, strict);
1088 if (rt == net->ipv6.ip6_null_entry) {
1089 fn = fib6_backtrack(fn, &fl6->saddr);
1090 if (fn)
1091 goto redo_rt6_select;
1092 else if (strict & RT6_LOOKUP_F_REACHABLE) {
1093 /* also consider unreachable route */
1094 strict &= ~RT6_LOOKUP_F_REACHABLE;
1095 fn = saved_fn;
1096 goto redo_rt6_select;
1097 }
1098 }
1099
1100
1101 if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
1102 dst_use(&rt->dst, jiffies);
1103 read_unlock_bh(&table->tb6_lock);
1104
1105 rt6_dst_from_metrics_check(rt);
1106
1107 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1108 return rt;
1109 } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
1110 !(rt->rt6i_flags & RTF_GATEWAY))) {
1111 /* Create a RTF_CACHE clone which will not be
1112 * owned by the fib6 tree. It is for the special case where
1113 * the daddr in the skb during the neighbor look-up is different
1114 * from the fl6->daddr used to look-up route here.
1115 */
1116
1117 struct rt6_info *uncached_rt;
1118
1119 dst_use(&rt->dst, jiffies);
1120 read_unlock_bh(&table->tb6_lock);
1121
1122 uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
1123 dst_release(&rt->dst);
1124
1125 if (uncached_rt)
1126 rt6_uncached_list_add(uncached_rt);
1127 else
1128 uncached_rt = net->ipv6.ip6_null_entry;
1129
1130 dst_hold(&uncached_rt->dst);
1131
1132 trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
1133 return uncached_rt;
1134
1135 } else {
1136 /* Get a percpu copy */
1137
1138 struct rt6_info *pcpu_rt;
1139
1140 rt->dst.lastuse = jiffies;
1141 rt->dst.__use++;
1142 pcpu_rt = rt6_get_pcpu_route(rt);
1143
1144 if (pcpu_rt) {
1145 read_unlock_bh(&table->tb6_lock);
1146 } else {
1147 /* We have to do the read_unlock first
1148 * because rt6_make_pcpu_route() may trigger
1149 * ip6_dst_gc() which will take the write_lock.
1150 */
1151 dst_hold(&rt->dst);
1152 read_unlock_bh(&table->tb6_lock);
1153 pcpu_rt = rt6_make_pcpu_route(rt);
1154 dst_release(&rt->dst);
1155 }
1156
1157 trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
1158 return pcpu_rt;
1159
1160 }
1161 }
1162 EXPORT_SYMBOL_GPL(ip6_pol_route);
1163
1164 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
1165 struct flowi6 *fl6, int flags)
1166 {
1167 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
1168 }
1169
1170 struct dst_entry *ip6_route_input_lookup(struct net *net,
1171 struct net_device *dev,
1172 struct flowi6 *fl6, int flags)
1173 {
1174 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1175 flags |= RT6_LOOKUP_F_IFACE;
1176
1177 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1178 }
1179 EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
1180
1181 void ip6_route_input(struct sk_buff *skb)
1182 {
1183 const struct ipv6hdr *iph = ipv6_hdr(skb);
1184 struct net *net = dev_net(skb->dev);
1185 int flags = RT6_LOOKUP_F_HAS_SADDR;
1186 struct ip_tunnel_info *tun_info;
1187 struct flowi6 fl6 = {
1188 .flowi6_iif = skb->dev->ifindex,
1189 .daddr = iph->daddr,
1190 .saddr = iph->saddr,
1191 .flowlabel = ip6_flowinfo(iph),
1192 .flowi6_mark = skb->mark,
1193 .flowi6_proto = iph->nexthdr,
1194 };
1195
1196 tun_info = skb_tunnel_info(skb);
1197 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
1198 fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
1199 skb_dst_drop(skb);
1200 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1201 }
1202
1203 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1204 struct flowi6 *fl6, int flags)
1205 {
1206 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1207 }
1208
1209 struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
1210 struct flowi6 *fl6, int flags)
1211 {
1212 bool any_src;
1213
1214 if (rt6_need_strict(&fl6->daddr)) {
1215 struct dst_entry *dst;
1216
1217 dst = l3mdev_link_scope_lookup(net, fl6);
1218 if (dst)
1219 return dst;
1220 }
1221
1222 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1223
1224 any_src = ipv6_addr_any(&fl6->saddr);
1225 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
1226 (fl6->flowi6_oif && any_src))
1227 flags |= RT6_LOOKUP_F_IFACE;
1228
1229 if (!any_src)
1230 flags |= RT6_LOOKUP_F_HAS_SADDR;
1231 else if (sk)
1232 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1233
1234 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1235 }
1236 EXPORT_SYMBOL_GPL(ip6_route_output_flags);
1237
1238 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1239 {
1240 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1241 struct dst_entry *new = NULL;
1242
1243 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1244 if (rt) {
1245 rt6_info_init(rt);
1246
1247 new = &rt->dst;
1248 new->__use = 1;
1249 new->input = dst_discard;
1250 new->output = dst_discard_out;
1251
1252 dst_copy_metrics(new, &ort->dst);
1253 rt->rt6i_idev = ort->rt6i_idev;
1254 if (rt->rt6i_idev)
1255 in6_dev_hold(rt->rt6i_idev);
1256
1257 rt->rt6i_gateway = ort->rt6i_gateway;
1258 rt->rt6i_flags = ort->rt6i_flags & ~RTF_PCPU;
1259 rt->rt6i_metric = 0;
1260
1261 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1262 #ifdef CONFIG_IPV6_SUBTREES
1263 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1264 #endif
1265
1266 dst_free(new);
1267 }
1268
1269 dst_release(dst_orig);
1270 return new ? new : ERR_PTR(-ENOMEM);
1271 }
1272
1273 /*
1274 * Destination cache support functions
1275 */
1276
1277 static void rt6_dst_from_metrics_check(struct rt6_info *rt)
1278 {
1279 if (rt->dst.from &&
1280 dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
1281 dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
1282 }
1283
1284 static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
1285 {
1286 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1287 return NULL;
1288
1289 if (rt6_check_expired(rt))
1290 return NULL;
1291
1292 return &rt->dst;
1293 }
1294
1295 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
1296 {
1297 if (!__rt6_check_expired(rt) &&
1298 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1299 rt6_check((struct rt6_info *)(rt->dst.from), cookie))
1300 return &rt->dst;
1301 else
1302 return NULL;
1303 }
1304
1305 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1306 {
1307 struct rt6_info *rt;
1308
1309 rt = (struct rt6_info *) dst;
1310
1311 /* All IPV6 dsts are created with ->obsolete set to the value
1312 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1313 * into this function always.
1314 */
1315
1316 rt6_dst_from_metrics_check(rt);
1317
1318 if (rt->rt6i_flags & RTF_PCPU ||
1319 (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
1320 return rt6_dst_from_check(rt, cookie);
1321 else
1322 return rt6_check(rt, cookie);
1323 }
1324
1325 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1326 {
1327 struct rt6_info *rt = (struct rt6_info *) dst;
1328
1329 if (rt) {
1330 if (rt->rt6i_flags & RTF_CACHE) {
1331 if (rt6_check_expired(rt)) {
1332 ip6_del_rt(rt);
1333 dst = NULL;
1334 }
1335 } else {
1336 dst_release(dst);
1337 dst = NULL;
1338 }
1339 }
1340 return dst;
1341 }
1342
1343 static void ip6_link_failure(struct sk_buff *skb)
1344 {
1345 struct rt6_info *rt;
1346
1347 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1348
1349 rt = (struct rt6_info *) skb_dst(skb);
1350 if (rt) {
1351 if (rt->rt6i_flags & RTF_CACHE) {
1352 dst_hold(&rt->dst);
1353 ip6_del_rt(rt);
1354 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1355 rt->rt6i_node->fn_sernum = -1;
1356 }
1357 }
1358 }
1359
1360 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
1361 {
1362 struct net *net = dev_net(rt->dst.dev);
1363
1364 rt->rt6i_flags |= RTF_MODIFIED;
1365 rt->rt6i_pmtu = mtu;
1366 rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
1367 }
1368
1369 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
1370 {
1371 return !(rt->rt6i_flags & RTF_CACHE) &&
1372 (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
1373 }
1374
1375 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
1376 const struct ipv6hdr *iph, u32 mtu)
1377 {
1378 const struct in6_addr *daddr, *saddr;
1379 struct rt6_info *rt6 = (struct rt6_info *)dst;
1380
1381 if (rt6->rt6i_flags & RTF_LOCAL)
1382 return;
1383
1384 if (dst_metric_locked(dst, RTAX_MTU))
1385 return;
1386
1387 if (iph) {
1388 daddr = &iph->daddr;
1389 saddr = &iph->saddr;
1390 } else if (sk) {
1391 daddr = &sk->sk_v6_daddr;
1392 saddr = &inet6_sk(sk)->saddr;
1393 } else {
1394 daddr = NULL;
1395 saddr = NULL;
1396 }
1397 dst_confirm_neigh(dst, daddr);
1398 mtu = max_t(u32, mtu, IPV6_MIN_MTU);
1399 if (mtu >= dst_mtu(dst))
1400 return;
1401
1402 if (!rt6_cache_allowed_for_pmtu(rt6)) {
1403 rt6_do_update_pmtu(rt6, mtu);
1404 } else if (daddr) {
1405 struct rt6_info *nrt6;
1406
1407 nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
1408 if (nrt6) {
1409 rt6_do_update_pmtu(nrt6, mtu);
1410
1411 /* ip6_ins_rt(nrt6) will bump the
1412 * rt6->rt6i_node->fn_sernum
1413 * which will fail the next rt6_check() and
1414 * invalidate the sk->sk_dst_cache.
1415 */
1416 ip6_ins_rt(nrt6);
1417 }
1418 }
1419 }
1420
1421 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1422 struct sk_buff *skb, u32 mtu)
1423 {
1424 __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
1425 }
1426
1427 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1428 int oif, u32 mark, kuid_t uid)
1429 {
1430 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1431 struct dst_entry *dst;
1432 struct flowi6 fl6;
1433
1434 memset(&fl6, 0, sizeof(fl6));
1435 fl6.flowi6_oif = oif;
1436 fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);
1437 fl6.daddr = iph->daddr;
1438 fl6.saddr = iph->saddr;
1439 fl6.flowlabel = ip6_flowinfo(iph);
1440 fl6.flowi6_uid = uid;
1441
1442 dst = ip6_route_output(net, NULL, &fl6);
1443 if (!dst->error)
1444 __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
1445 dst_release(dst);
1446 }
1447 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1448
1449 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1450 {
1451 struct dst_entry *dst;
1452
1453 ip6_update_pmtu(skb, sock_net(sk), mtu,
1454 sk->sk_bound_dev_if, sk->sk_mark, sk->sk_uid);
1455
1456 dst = __sk_dst_get(sk);
1457 if (!dst || !dst->obsolete ||
1458 dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
1459 return;
1460
1461 bh_lock_sock(sk);
1462 if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
1463 ip6_datagram_dst_update(sk, false);
1464 bh_unlock_sock(sk);
1465 }
1466 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1467
1468 /* Handle redirects */
1469 struct ip6rd_flowi {
1470 struct flowi6 fl6;
1471 struct in6_addr gateway;
1472 };
1473
1474 static struct rt6_info *__ip6_route_redirect(struct net *net,
1475 struct fib6_table *table,
1476 struct flowi6 *fl6,
1477 int flags)
1478 {
1479 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1480 struct rt6_info *rt;
1481 struct fib6_node *fn;
1482
1483 /* Get the "current" route for this destination and
1484 * check if the redirect has come from appropriate router.
1485 *
1486 * RFC 4861 specifies that redirects should only be
1487 * accepted if they come from the nexthop to the target.
1488 * Due to the way the routes are chosen, this notion
1489 * is a bit fuzzy and one might need to check all possible
1490 * routes.
1491 */
1492
1493 read_lock_bh(&table->tb6_lock);
1494 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1495 restart:
1496 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1497 if (rt6_check_expired(rt))
1498 continue;
1499 if (rt->dst.error)
1500 break;
1501 if (!(rt->rt6i_flags & RTF_GATEWAY))
1502 continue;
1503 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1504 continue;
1505 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1506 continue;
1507 break;
1508 }
1509
1510 if (!rt)
1511 rt = net->ipv6.ip6_null_entry;
1512 else if (rt->dst.error) {
1513 rt = net->ipv6.ip6_null_entry;
1514 goto out;
1515 }
1516
1517 if (rt == net->ipv6.ip6_null_entry) {
1518 fn = fib6_backtrack(fn, &fl6->saddr);
1519 if (fn)
1520 goto restart;
1521 }
1522
1523 out:
1524 dst_hold(&rt->dst);
1525
1526 read_unlock_bh(&table->tb6_lock);
1527
1528 trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
1529 return rt;
1530 };
1531
1532 static struct dst_entry *ip6_route_redirect(struct net *net,
1533 const struct flowi6 *fl6,
1534 const struct in6_addr *gateway)
1535 {
1536 int flags = RT6_LOOKUP_F_HAS_SADDR;
1537 struct ip6rd_flowi rdfl;
1538
1539 rdfl.fl6 = *fl6;
1540 rdfl.gateway = *gateway;
1541
1542 return fib6_rule_lookup(net, &rdfl.fl6,
1543 flags, __ip6_route_redirect);
1544 }
1545
1546 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
1547 kuid_t uid)
1548 {
1549 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1550 struct dst_entry *dst;
1551 struct flowi6 fl6;
1552
1553 memset(&fl6, 0, sizeof(fl6));
1554 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1555 fl6.flowi6_oif = oif;
1556 fl6.flowi6_mark = mark;
1557 fl6.daddr = iph->daddr;
1558 fl6.saddr = iph->saddr;
1559 fl6.flowlabel = ip6_flowinfo(iph);
1560 fl6.flowi6_uid = uid;
1561
1562 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1563 rt6_do_redirect(dst, NULL, skb);
1564 dst_release(dst);
1565 }
1566 EXPORT_SYMBOL_GPL(ip6_redirect);
1567
1568 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1569 u32 mark)
1570 {
1571 const struct ipv6hdr *iph = ipv6_hdr(skb);
1572 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1573 struct dst_entry *dst;
1574 struct flowi6 fl6;
1575
1576 memset(&fl6, 0, sizeof(fl6));
1577 fl6.flowi6_iif = LOOPBACK_IFINDEX;
1578 fl6.flowi6_oif = oif;
1579 fl6.flowi6_mark = mark;
1580 fl6.daddr = msg->dest;
1581 fl6.saddr = iph->daddr;
1582 fl6.flowi6_uid = sock_net_uid(net, NULL);
1583
1584 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1585 rt6_do_redirect(dst, NULL, skb);
1586 dst_release(dst);
1587 }
1588
1589 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1590 {
1591 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
1592 sk->sk_uid);
1593 }
1594 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1595
1596 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1597 {
1598 struct net_device *dev = dst->dev;
1599 unsigned int mtu = dst_mtu(dst);
1600 struct net *net = dev_net(dev);
1601
1602 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1603
1604 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1605 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1606
1607 /*
1608 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1609 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1610 * IPV6_MAXPLEN is also valid and means: "any MSS,
1611 * rely only on pmtu discovery"
1612 */
1613 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1614 mtu = IPV6_MAXPLEN;
1615 return mtu;
1616 }
1617
1618 static unsigned int ip6_mtu(const struct dst_entry *dst)
1619 {
1620 const struct rt6_info *rt = (const struct rt6_info *)dst;
1621 unsigned int mtu = rt->rt6i_pmtu;
1622 struct inet6_dev *idev;
1623
1624 if (mtu)
1625 goto out;
1626
1627 mtu = dst_metric_raw(dst, RTAX_MTU);
1628 if (mtu)
1629 goto out;
1630
1631 mtu = IPV6_MIN_MTU;
1632
1633 rcu_read_lock();
1634 idev = __in6_dev_get(dst->dev);
1635 if (idev)
1636 mtu = idev->cnf.mtu6;
1637 rcu_read_unlock();
1638
1639 out:
1640 mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
1641
1642 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
1643 }
1644
1645 static struct dst_entry *icmp6_dst_gc_list;
1646 static DEFINE_SPINLOCK(icmp6_dst_lock);
1647
1648 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1649 struct flowi6 *fl6)
1650 {
1651 struct dst_entry *dst;
1652 struct rt6_info *rt;
1653 struct inet6_dev *idev = in6_dev_get(dev);
1654 struct net *net = dev_net(dev);
1655
1656 if (unlikely(!idev))
1657 return ERR_PTR(-ENODEV);
1658
1659 rt = ip6_dst_alloc(net, dev, 0);
1660 if (unlikely(!rt)) {
1661 in6_dev_put(idev);
1662 dst = ERR_PTR(-ENOMEM);
1663 goto out;
1664 }
1665
1666 rt->dst.flags |= DST_HOST;
1667 rt->dst.output = ip6_output;
1668 atomic_set(&rt->dst.__refcnt, 1);
1669 rt->rt6i_gateway = fl6->daddr;
1670 rt->rt6i_dst.addr = fl6->daddr;
1671 rt->rt6i_dst.plen = 128;
1672 rt->rt6i_idev = idev;
1673 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1674
1675 spin_lock_bh(&icmp6_dst_lock);
1676 rt->dst.next = icmp6_dst_gc_list;
1677 icmp6_dst_gc_list = &rt->dst;
1678 spin_unlock_bh(&icmp6_dst_lock);
1679
1680 fib6_force_start_gc(net);
1681
1682 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1683
1684 out:
1685 return dst;
1686 }
1687
1688 int icmp6_dst_gc(void)
1689 {
1690 struct dst_entry *dst, **pprev;
1691 int more = 0;
1692
1693 spin_lock_bh(&icmp6_dst_lock);
1694 pprev = &icmp6_dst_gc_list;
1695
1696 while ((dst = *pprev) != NULL) {
1697 if (!atomic_read(&dst->__refcnt)) {
1698 *pprev = dst->next;
1699 dst_free(dst);
1700 } else {
1701 pprev = &dst->next;
1702 ++more;
1703 }
1704 }
1705
1706 spin_unlock_bh(&icmp6_dst_lock);
1707
1708 return more;
1709 }
1710
1711 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1712 void *arg)
1713 {
1714 struct dst_entry *dst, **pprev;
1715
1716 spin_lock_bh(&icmp6_dst_lock);
1717 pprev = &icmp6_dst_gc_list;
1718 while ((dst = *pprev) != NULL) {
1719 struct rt6_info *rt = (struct rt6_info *) dst;
1720 if (func(rt, arg)) {
1721 *pprev = dst->next;
1722 dst_free(dst);
1723 } else {
1724 pprev = &dst->next;
1725 }
1726 }
1727 spin_unlock_bh(&icmp6_dst_lock);
1728 }
1729
1730 static int ip6_dst_gc(struct dst_ops *ops)
1731 {
1732 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1733 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1734 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1735 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1736 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1737 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1738 int entries;
1739
1740 entries = dst_entries_get_fast(ops);
1741 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1742 entries <= rt_max_size)
1743 goto out;
1744
1745 net->ipv6.ip6_rt_gc_expire++;
1746 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
1747 entries = dst_entries_get_slow(ops);
1748 if (entries < ops->gc_thresh)
1749 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1750 out:
1751 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1752 return entries > rt_max_size;
1753 }
1754
1755 static int ip6_convert_metrics(struct mx6_config *mxc,
1756 const struct fib6_config *cfg)
1757 {
1758 bool ecn_ca = false;
1759 struct nlattr *nla;
1760 int remaining;
1761 u32 *mp;
1762
1763 if (!cfg->fc_mx)
1764 return 0;
1765
1766 mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1767 if (unlikely(!mp))
1768 return -ENOMEM;
1769
1770 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1771 int type = nla_type(nla);
1772 u32 val;
1773
1774 if (!type)
1775 continue;
1776 if (unlikely(type > RTAX_MAX))
1777 goto err;
1778
1779 if (type == RTAX_CC_ALGO) {
1780 char tmp[TCP_CA_NAME_MAX];
1781
1782 nla_strlcpy(tmp, nla, sizeof(tmp));
1783 val = tcp_ca_get_key_by_name(tmp, &ecn_ca);
1784 if (val == TCP_CA_UNSPEC)
1785 goto err;
1786 } else {
1787 val = nla_get_u32(nla);
1788 }
1789 if (type == RTAX_HOPLIMIT && val > 255)
1790 val = 255;
1791 if (type == RTAX_FEATURES && (val & ~RTAX_FEATURE_MASK))
1792 goto err;
1793
1794 mp[type - 1] = val;
1795 __set_bit(type - 1, mxc->mx_valid);
1796 }
1797
1798 if (ecn_ca) {
1799 __set_bit(RTAX_FEATURES - 1, mxc->mx_valid);
1800 mp[RTAX_FEATURES - 1] |= DST_FEATURE_ECN_CA;
1801 }
1802
1803 mxc->mx = mp;
1804 return 0;
1805 err:
1806 kfree(mp);
1807 return -EINVAL;
1808 }
1809
1810 static struct rt6_info *ip6_nh_lookup_table(struct net *net,
1811 struct fib6_config *cfg,
1812 const struct in6_addr *gw_addr)
1813 {
1814 struct flowi6 fl6 = {
1815 .flowi6_oif = cfg->fc_ifindex,
1816 .daddr = *gw_addr,
1817 .saddr = cfg->fc_prefsrc,
1818 };
1819 struct fib6_table *table;
1820 struct rt6_info *rt;
1821 int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
1822
1823 table = fib6_get_table(net, cfg->fc_table);
1824 if (!table)
1825 return NULL;
1826
1827 if (!ipv6_addr_any(&cfg->fc_prefsrc))
1828 flags |= RT6_LOOKUP_F_HAS_SADDR;
1829
1830 rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
1831
1832 /* if table lookup failed, fall back to full lookup */
1833 if (rt == net->ipv6.ip6_null_entry) {
1834 ip6_rt_put(rt);
1835 rt = NULL;
1836 }
1837
1838 return rt;
1839 }
1840
1841 static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
1842 {
1843 struct net *net = cfg->fc_nlinfo.nl_net;
1844 struct rt6_info *rt = NULL;
1845 struct net_device *dev = NULL;
1846 struct inet6_dev *idev = NULL;
1847 struct fib6_table *table;
1848 int addr_type;
1849 int err = -EINVAL;
1850
1851 /* RTF_PCPU is an internal flag; can not be set by userspace */
1852 if (cfg->fc_flags & RTF_PCPU)
1853 goto out;
1854
1855 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1856 goto out;
1857 #ifndef CONFIG_IPV6_SUBTREES
1858 if (cfg->fc_src_len)
1859 goto out;
1860 #endif
1861 if (cfg->fc_ifindex) {
1862 err = -ENODEV;
1863 dev = dev_get_by_index(net, cfg->fc_ifindex);
1864 if (!dev)
1865 goto out;
1866 idev = in6_dev_get(dev);
1867 if (!idev)
1868 goto out;
1869 }
1870
1871 if (cfg->fc_metric == 0)
1872 cfg->fc_metric = IP6_RT_PRIO_USER;
1873
1874 err = -ENOBUFS;
1875 if (cfg->fc_nlinfo.nlh &&
1876 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1877 table = fib6_get_table(net, cfg->fc_table);
1878 if (!table) {
1879 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1880 table = fib6_new_table(net, cfg->fc_table);
1881 }
1882 } else {
1883 table = fib6_new_table(net, cfg->fc_table);
1884 }
1885
1886 if (!table)
1887 goto out;
1888
1889 rt = ip6_dst_alloc(net, NULL,
1890 (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
1891
1892 if (!rt) {
1893 err = -ENOMEM;
1894 goto out;
1895 }
1896
1897 if (cfg->fc_flags & RTF_EXPIRES)
1898 rt6_set_expires(rt, jiffies +
1899 clock_t_to_jiffies(cfg->fc_expires));
1900 else
1901 rt6_clean_expires(rt);
1902
1903 if (cfg->fc_protocol == RTPROT_UNSPEC)
1904 cfg->fc_protocol = RTPROT_BOOT;
1905 rt->rt6i_protocol = cfg->fc_protocol;
1906
1907 addr_type = ipv6_addr_type(&cfg->fc_dst);
1908
1909 if (addr_type & IPV6_ADDR_MULTICAST)
1910 rt->dst.input = ip6_mc_input;
1911 else if (cfg->fc_flags & RTF_LOCAL)
1912 rt->dst.input = ip6_input;
1913 else
1914 rt->dst.input = ip6_forward;
1915
1916 rt->dst.output = ip6_output;
1917
1918 if (cfg->fc_encap) {
1919 struct lwtunnel_state *lwtstate;
1920
1921 err = lwtunnel_build_state(dev, cfg->fc_encap_type,
1922 cfg->fc_encap, AF_INET6, cfg,
1923 &lwtstate);
1924 if (err)
1925 goto out;
1926 rt->dst.lwtstate = lwtstate_get(lwtstate);
1927 if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
1928 rt->dst.lwtstate->orig_output = rt->dst.output;
1929 rt->dst.output = lwtunnel_output;
1930 }
1931 if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
1932 rt->dst.lwtstate->orig_input = rt->dst.input;
1933 rt->dst.input = lwtunnel_input;
1934 }
1935 }
1936
1937 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1938 rt->rt6i_dst.plen = cfg->fc_dst_len;
1939 if (rt->rt6i_dst.plen == 128)
1940 rt->dst.flags |= DST_HOST;
1941
1942 #ifdef CONFIG_IPV6_SUBTREES
1943 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1944 rt->rt6i_src.plen = cfg->fc_src_len;
1945 #endif
1946
1947 rt->rt6i_metric = cfg->fc_metric;
1948
1949 /* We cannot add true routes via loopback here,
1950 they would result in kernel looping; promote them to reject routes
1951 */
1952 if ((cfg->fc_flags & RTF_REJECT) ||
1953 (dev && (dev->flags & IFF_LOOPBACK) &&
1954 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1955 !(cfg->fc_flags & RTF_LOCAL))) {
1956 /* hold loopback dev/idev if we haven't done so. */
1957 if (dev != net->loopback_dev) {
1958 if (dev) {
1959 dev_put(dev);
1960 in6_dev_put(idev);
1961 }
1962 dev = net->loopback_dev;
1963 dev_hold(dev);
1964 idev = in6_dev_get(dev);
1965 if (!idev) {
1966 err = -ENODEV;
1967 goto out;
1968 }
1969 }
1970 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1971 switch (cfg->fc_type) {
1972 case RTN_BLACKHOLE:
1973 rt->dst.error = -EINVAL;
1974 rt->dst.output = dst_discard_out;
1975 rt->dst.input = dst_discard;
1976 break;
1977 case RTN_PROHIBIT:
1978 rt->dst.error = -EACCES;
1979 rt->dst.output = ip6_pkt_prohibit_out;
1980 rt->dst.input = ip6_pkt_prohibit;
1981 break;
1982 case RTN_THROW:
1983 case RTN_UNREACHABLE:
1984 default:
1985 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1986 : (cfg->fc_type == RTN_UNREACHABLE)
1987 ? -EHOSTUNREACH : -ENETUNREACH;
1988 rt->dst.output = ip6_pkt_discard_out;
1989 rt->dst.input = ip6_pkt_discard;
1990 break;
1991 }
1992 goto install_route;
1993 }
1994
1995 if (cfg->fc_flags & RTF_GATEWAY) {
1996 const struct in6_addr *gw_addr;
1997 int gwa_type;
1998
1999 gw_addr = &cfg->fc_gateway;
2000 gwa_type = ipv6_addr_type(gw_addr);
2001
2002 /* if gw_addr is local we will fail to detect this in case
2003 * address is still TENTATIVE (DAD in progress). rt6_lookup()
2004 * will return already-added prefix route via interface that
2005 * prefix route was assigned to, which might be non-loopback.
2006 */
2007 err = -EINVAL;
2008 if (ipv6_chk_addr_and_flags(net, gw_addr,
2009 gwa_type & IPV6_ADDR_LINKLOCAL ?
2010 dev : NULL, 0, 0))
2011 goto out;
2012
2013 rt->rt6i_gateway = *gw_addr;
2014
2015 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
2016 struct rt6_info *grt = NULL;
2017
2018 /* IPv6 strictly inhibits using not link-local
2019 addresses as nexthop address.
2020 Otherwise, router will not able to send redirects.
2021 It is very good, but in some (rare!) circumstances
2022 (SIT, PtP, NBMA NOARP links) it is handy to allow
2023 some exceptions. --ANK
2024 We allow IPv4-mapped nexthops to support RFC4798-type
2025 addressing
2026 */
2027 if (!(gwa_type & (IPV6_ADDR_UNICAST |
2028 IPV6_ADDR_MAPPED)))
2029 goto out;
2030
2031 if (cfg->fc_table) {
2032 grt = ip6_nh_lookup_table(net, cfg, gw_addr);
2033
2034 if (grt) {
2035 if (grt->rt6i_flags & RTF_GATEWAY ||
2036 (dev && dev != grt->dst.dev)) {
2037 ip6_rt_put(grt);
2038 grt = NULL;
2039 }
2040 }
2041 }
2042
2043 if (!grt)
2044 grt = rt6_lookup(net, gw_addr, NULL,
2045 cfg->fc_ifindex, 1);
2046
2047 err = -EHOSTUNREACH;
2048 if (!grt)
2049 goto out;
2050 if (dev) {
2051 if (dev != grt->dst.dev) {
2052 ip6_rt_put(grt);
2053 goto out;
2054 }
2055 } else {
2056 dev = grt->dst.dev;
2057 idev = grt->rt6i_idev;
2058 dev_hold(dev);
2059 in6_dev_hold(grt->rt6i_idev);
2060 }
2061 if (!(grt->rt6i_flags & RTF_GATEWAY))
2062 err = 0;
2063 ip6_rt_put(grt);
2064
2065 if (err)
2066 goto out;
2067 }
2068 err = -EINVAL;
2069 if (!dev || (dev->flags & IFF_LOOPBACK))
2070 goto out;
2071 }
2072
2073 err = -ENODEV;
2074 if (!dev)
2075 goto out;
2076
2077 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
2078 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
2079 err = -EINVAL;
2080 goto out;
2081 }
2082 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
2083 rt->rt6i_prefsrc.plen = 128;
2084 } else
2085 rt->rt6i_prefsrc.plen = 0;
2086
2087 rt->rt6i_flags = cfg->fc_flags;
2088
2089 install_route:
2090 rt->dst.dev = dev;
2091 rt->rt6i_idev = idev;
2092 rt->rt6i_table = table;
2093
2094 cfg->fc_nlinfo.nl_net = dev_net(dev);
2095
2096 return rt;
2097 out:
2098 if (dev)
2099 dev_put(dev);
2100 if (idev)
2101 in6_dev_put(idev);
2102 if (rt)
2103 dst_free(&rt->dst);
2104
2105 return ERR_PTR(err);
2106 }
2107
2108 int ip6_route_add(struct fib6_config *cfg)
2109 {
2110 struct mx6_config mxc = { .mx = NULL, };
2111 struct rt6_info *rt;
2112 int err;
2113
2114 rt = ip6_route_info_create(cfg);
2115 if (IS_ERR(rt)) {
2116 err = PTR_ERR(rt);
2117 rt = NULL;
2118 goto out;
2119 }
2120
2121 err = ip6_convert_metrics(&mxc, cfg);
2122 if (err)
2123 goto out;
2124
2125 err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
2126
2127 kfree(mxc.mx);
2128
2129 return err;
2130 out:
2131 if (rt)
2132 dst_free(&rt->dst);
2133
2134 return err;
2135 }
2136
2137 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
2138 {
2139 int err;
2140 struct fib6_table *table;
2141 struct net *net = dev_net(rt->dst.dev);
2142
2143 if (rt == net->ipv6.ip6_null_entry ||
2144 rt->dst.flags & DST_NOCACHE) {
2145 err = -ENOENT;
2146 goto out;
2147 }
2148
2149 table = rt->rt6i_table;
2150 write_lock_bh(&table->tb6_lock);
2151 err = fib6_del(rt, info);
2152 write_unlock_bh(&table->tb6_lock);
2153
2154 out:
2155 ip6_rt_put(rt);
2156 return err;
2157 }
2158
2159 int ip6_del_rt(struct rt6_info *rt)
2160 {
2161 struct nl_info info = {
2162 .nl_net = dev_net(rt->dst.dev),
2163 };
2164 return __ip6_del_rt(rt, &info);
2165 }
2166
2167 static int ip6_route_del(struct fib6_config *cfg)
2168 {
2169 struct fib6_table *table;
2170 struct fib6_node *fn;
2171 struct rt6_info *rt;
2172 int err = -ESRCH;
2173
2174 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
2175 if (!table)
2176 return err;
2177
2178 read_lock_bh(&table->tb6_lock);
2179
2180 fn = fib6_locate(&table->tb6_root,
2181 &cfg->fc_dst, cfg->fc_dst_len,
2182 &cfg->fc_src, cfg->fc_src_len);
2183
2184 if (fn) {
2185 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2186 if ((rt->rt6i_flags & RTF_CACHE) &&
2187 !(cfg->fc_flags & RTF_CACHE))
2188 continue;
2189 if (cfg->fc_ifindex &&
2190 (!rt->dst.dev ||
2191 rt->dst.dev->ifindex != cfg->fc_ifindex))
2192 continue;
2193 if (cfg->fc_flags & RTF_GATEWAY &&
2194 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
2195 continue;
2196 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
2197 continue;
2198 if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
2199 continue;
2200 dst_hold(&rt->dst);
2201 read_unlock_bh(&table->tb6_lock);
2202
2203 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
2204 }
2205 }
2206 read_unlock_bh(&table->tb6_lock);
2207
2208 return err;
2209 }
2210
2211 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
2212 {
2213 struct netevent_redirect netevent;
2214 struct rt6_info *rt, *nrt = NULL;
2215 struct ndisc_options ndopts;
2216 struct inet6_dev *in6_dev;
2217 struct neighbour *neigh;
2218 struct rd_msg *msg;
2219 int optlen, on_link;
2220 u8 *lladdr;
2221
2222 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
2223 optlen -= sizeof(*msg);
2224
2225 if (optlen < 0) {
2226 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
2227 return;
2228 }
2229
2230 msg = (struct rd_msg *)icmp6_hdr(skb);
2231
2232 if (ipv6_addr_is_multicast(&msg->dest)) {
2233 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
2234 return;
2235 }
2236
2237 on_link = 0;
2238 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
2239 on_link = 1;
2240 } else if (ipv6_addr_type(&msg->target) !=
2241 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
2242 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
2243 return;
2244 }
2245
2246 in6_dev = __in6_dev_get(skb->dev);
2247 if (!in6_dev)
2248 return;
2249 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
2250 return;
2251
2252 /* RFC2461 8.1:
2253 * The IP source address of the Redirect MUST be the same as the current
2254 * first-hop router for the specified ICMP Destination Address.
2255 */
2256
2257 if (!ndisc_parse_options(skb->dev, msg->opt, optlen, &ndopts)) {
2258 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
2259 return;
2260 }
2261
2262 lladdr = NULL;
2263 if (ndopts.nd_opts_tgt_lladdr) {
2264 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
2265 skb->dev);
2266 if (!lladdr) {
2267 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
2268 return;
2269 }
2270 }
2271
2272 rt = (struct rt6_info *) dst;
2273 if (rt->rt6i_flags & RTF_REJECT) {
2274 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
2275 return;
2276 }
2277
2278 /* Redirect received -> path was valid.
2279 * Look, redirects are sent only in response to data packets,
2280 * so that this nexthop apparently is reachable. --ANK
2281 */
2282 dst_confirm_neigh(&rt->dst, &ipv6_hdr(skb)->saddr);
2283
2284 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
2285 if (!neigh)
2286 return;
2287
2288 /*
2289 * We have finally decided to accept it.
2290 */
2291
2292 ndisc_update(skb->dev, neigh, lladdr, NUD_STALE,
2293 NEIGH_UPDATE_F_WEAK_OVERRIDE|
2294 NEIGH_UPDATE_F_OVERRIDE|
2295 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
2296 NEIGH_UPDATE_F_ISROUTER)),
2297 NDISC_REDIRECT, &ndopts);
2298
2299 nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL);
2300 if (!nrt)
2301 goto out;
2302
2303 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
2304 if (on_link)
2305 nrt->rt6i_flags &= ~RTF_GATEWAY;
2306
2307 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
2308
2309 if (ip6_ins_rt(nrt))
2310 goto out;
2311
2312 netevent.old = &rt->dst;
2313 netevent.new = &nrt->dst;
2314 netevent.daddr = &msg->dest;
2315 netevent.neigh = neigh;
2316 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
2317
2318 if (rt->rt6i_flags & RTF_CACHE) {
2319 rt = (struct rt6_info *) dst_clone(&rt->dst);
2320 ip6_del_rt(rt);
2321 }
2322
2323 out:
2324 neigh_release(neigh);
2325 }
2326
2327 /*
2328 * Misc support functions
2329 */
2330
2331 static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
2332 {
2333 BUG_ON(from->dst.from);
2334
2335 rt->rt6i_flags &= ~RTF_EXPIRES;
2336 dst_hold(&from->dst);
2337 rt->dst.from = &from->dst;
2338 dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
2339 }
2340
2341 static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
2342 {
2343 rt->dst.input = ort->dst.input;
2344 rt->dst.output = ort->dst.output;
2345 rt->rt6i_dst = ort->rt6i_dst;
2346 rt->dst.error = ort->dst.error;
2347 rt->rt6i_idev = ort->rt6i_idev;
2348 if (rt->rt6i_idev)
2349 in6_dev_hold(rt->rt6i_idev);
2350 rt->dst.lastuse = jiffies;
2351 rt->rt6i_gateway = ort->rt6i_gateway;
2352 rt->rt6i_flags = ort->rt6i_flags;
2353 rt6_set_from(rt, ort);
2354 rt->rt6i_metric = ort->rt6i_metric;
2355 #ifdef CONFIG_IPV6_SUBTREES
2356 rt->rt6i_src = ort->rt6i_src;
2357 #endif
2358 rt->rt6i_prefsrc = ort->rt6i_prefsrc;
2359 rt->rt6i_table = ort->rt6i_table;
2360 rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
2361 }
2362
2363 #ifdef CONFIG_IPV6_ROUTE_INFO
2364 static struct rt6_info *rt6_get_route_info(struct net *net,
2365 const struct in6_addr *prefix, int prefixlen,
2366 const struct in6_addr *gwaddr,
2367 struct net_device *dev)
2368 {
2369 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO;
2370 int ifindex = dev->ifindex;
2371 struct fib6_node *fn;
2372 struct rt6_info *rt = NULL;
2373 struct fib6_table *table;
2374
2375 table = fib6_get_table(net, tb_id);
2376 if (!table)
2377 return NULL;
2378
2379 read_lock_bh(&table->tb6_lock);
2380 fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
2381 if (!fn)
2382 goto out;
2383
2384 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
2385 if (rt->dst.dev->ifindex != ifindex)
2386 continue;
2387 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
2388 continue;
2389 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
2390 continue;
2391 dst_hold(&rt->dst);
2392 break;
2393 }
2394 out:
2395 read_unlock_bh(&table->tb6_lock);
2396 return rt;
2397 }
2398
2399 static struct rt6_info *rt6_add_route_info(struct net *net,
2400 const struct in6_addr *prefix, int prefixlen,
2401 const struct in6_addr *gwaddr,
2402 struct net_device *dev,
2403 unsigned int pref)
2404 {
2405 struct fib6_config cfg = {
2406 .fc_metric = IP6_RT_PRIO_USER,
2407 .fc_ifindex = dev->ifindex,
2408 .fc_dst_len = prefixlen,
2409 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
2410 RTF_UP | RTF_PREF(pref),
2411 .fc_nlinfo.portid = 0,
2412 .fc_nlinfo.nlh = NULL,
2413 .fc_nlinfo.nl_net = net,
2414 };
2415
2416 cfg.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_INFO,
2417 cfg.fc_dst = *prefix;
2418 cfg.fc_gateway = *gwaddr;
2419
2420 /* We should treat it as a default route if prefix length is 0. */
2421 if (!prefixlen)
2422 cfg.fc_flags |= RTF_DEFAULT;
2423
2424 ip6_route_add(&cfg);
2425
2426 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, dev);
2427 }
2428 #endif
2429
2430 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2431 {
2432 u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT;
2433 struct rt6_info *rt;
2434 struct fib6_table *table;
2435
2436 table = fib6_get_table(dev_net(dev), tb_id);
2437 if (!table)
2438 return NULL;
2439
2440 read_lock_bh(&table->tb6_lock);
2441 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2442 if (dev == rt->dst.dev &&
2443 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2444 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2445 break;
2446 }
2447 if (rt)
2448 dst_hold(&rt->dst);
2449 read_unlock_bh(&table->tb6_lock);
2450 return rt;
2451 }
2452
2453 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2454 struct net_device *dev,
2455 unsigned int pref)
2456 {
2457 struct fib6_config cfg = {
2458 .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
2459 .fc_metric = IP6_RT_PRIO_USER,
2460 .fc_ifindex = dev->ifindex,
2461 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2462 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2463 .fc_nlinfo.portid = 0,
2464 .fc_nlinfo.nlh = NULL,
2465 .fc_nlinfo.nl_net = dev_net(dev),
2466 };
2467
2468 cfg.fc_gateway = *gwaddr;
2469
2470 if (!ip6_route_add(&cfg)) {
2471 struct fib6_table *table;
2472
2473 table = fib6_get_table(dev_net(dev), cfg.fc_table);
2474 if (table)
2475 table->flags |= RT6_TABLE_HAS_DFLT_ROUTER;
2476 }
2477
2478 return rt6_get_dflt_router(gwaddr, dev);
2479 }
2480
2481 static void __rt6_purge_dflt_routers(struct fib6_table *table)
2482 {
2483 struct rt6_info *rt;
2484
2485 restart:
2486 read_lock_bh(&table->tb6_lock);
2487 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2488 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2489 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2490 dst_hold(&rt->dst);
2491 read_unlock_bh(&table->tb6_lock);
2492 ip6_del_rt(rt);
2493 goto restart;
2494 }
2495 }
2496 read_unlock_bh(&table->tb6_lock);
2497
2498 table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
2499 }
2500
2501 void rt6_purge_dflt_routers(struct net *net)
2502 {
2503 struct fib6_table *table;
2504 struct hlist_head *head;
2505 unsigned int h;
2506
2507 rcu_read_lock();
2508
2509 for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
2510 head = &net->ipv6.fib_table_hash[h];
2511 hlist_for_each_entry_rcu(table, head, tb6_hlist) {
2512 if (table->flags & RT6_TABLE_HAS_DFLT_ROUTER)
2513 __rt6_purge_dflt_routers(table);
2514 }
2515 }
2516
2517 rcu_read_unlock();
2518 }
2519
2520 static void rtmsg_to_fib6_config(struct net *net,
2521 struct in6_rtmsg *rtmsg,
2522 struct fib6_config *cfg)
2523 {
2524 memset(cfg, 0, sizeof(*cfg));
2525
2526 cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
2527 : RT6_TABLE_MAIN;
2528 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2529 cfg->fc_metric = rtmsg->rtmsg_metric;
2530 cfg->fc_expires = rtmsg->rtmsg_info;
2531 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2532 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2533 cfg->fc_flags = rtmsg->rtmsg_flags;
2534
2535 cfg->fc_nlinfo.nl_net = net;
2536
2537 cfg->fc_dst = rtmsg->rtmsg_dst;
2538 cfg->fc_src = rtmsg->rtmsg_src;
2539 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2540 }
2541
2542 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2543 {
2544 struct fib6_config cfg;
2545 struct in6_rtmsg rtmsg;
2546 int err;
2547
2548 switch (cmd) {
2549 case SIOCADDRT: /* Add a route */
2550 case SIOCDELRT: /* Delete a route */
2551 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2552 return -EPERM;
2553 err = copy_from_user(&rtmsg, arg,
2554 sizeof(struct in6_rtmsg));
2555 if (err)
2556 return -EFAULT;
2557
2558 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2559
2560 rtnl_lock();
2561 switch (cmd) {
2562 case SIOCADDRT:
2563 err = ip6_route_add(&cfg);
2564 break;
2565 case SIOCDELRT:
2566 err = ip6_route_del(&cfg);
2567 break;
2568 default:
2569 err = -EINVAL;
2570 }
2571 rtnl_unlock();
2572
2573 return err;
2574 }
2575
2576 return -EINVAL;
2577 }
2578
2579 /*
2580 * Drop the packet on the floor
2581 */
2582
2583 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2584 {
2585 int type;
2586 struct dst_entry *dst = skb_dst(skb);
2587 switch (ipstats_mib_noroutes) {
2588 case IPSTATS_MIB_INNOROUTES:
2589 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2590 if (type == IPV6_ADDR_ANY) {
2591 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2592 IPSTATS_MIB_INADDRERRORS);
2593 break;
2594 }
2595 /* FALLTHROUGH */
2596 case IPSTATS_MIB_OUTNOROUTES:
2597 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2598 ipstats_mib_noroutes);
2599 break;
2600 }
2601 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2602 kfree_skb(skb);
2603 return 0;
2604 }
2605
2606 static int ip6_pkt_discard(struct sk_buff *skb)
2607 {
2608 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2609 }
2610
2611 static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2612 {
2613 skb->dev = skb_dst(skb)->dev;
2614 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2615 }
2616
2617 static int ip6_pkt_prohibit(struct sk_buff *skb)
2618 {
2619 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2620 }
2621
2622 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
2623 {
2624 skb->dev = skb_dst(skb)->dev;
2625 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2626 }
2627
2628 /*
2629 * Allocate a dst for local (unicast / anycast) address.
2630 */
2631
2632 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2633 const struct in6_addr *addr,
2634 bool anycast)
2635 {
2636 u32 tb_id;
2637 struct net *net = dev_net(idev->dev);
2638 struct net_device *dev = net->loopback_dev;
2639 struct rt6_info *rt;
2640
2641 /* use L3 Master device as loopback for host routes if device
2642 * is enslaved and address is not link local or multicast
2643 */
2644 if (!rt6_need_strict(addr))
2645 dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
2646
2647 rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
2648 if (!rt)
2649 return ERR_PTR(-ENOMEM);
2650
2651 in6_dev_hold(idev);
2652
2653 rt->dst.flags |= DST_HOST;
2654 rt->dst.input = ip6_input;
2655 rt->dst.output = ip6_output;
2656 rt->rt6i_idev = idev;
2657
2658 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2659 if (anycast)
2660 rt->rt6i_flags |= RTF_ANYCAST;
2661 else
2662 rt->rt6i_flags |= RTF_LOCAL;
2663
2664 rt->rt6i_gateway = *addr;
2665 rt->rt6i_dst.addr = *addr;
2666 rt->rt6i_dst.plen = 128;
2667 tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
2668 rt->rt6i_table = fib6_get_table(net, tb_id);
2669 rt->dst.flags |= DST_NOCACHE;
2670
2671 atomic_set(&rt->dst.__refcnt, 1);
2672
2673 return rt;
2674 }
2675
2676 /* remove deleted ip from prefsrc entries */
2677 struct arg_dev_net_ip {
2678 struct net_device *dev;
2679 struct net *net;
2680 struct in6_addr *addr;
2681 };
2682
2683 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2684 {
2685 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2686 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2687 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2688
2689 if (((void *)rt->dst.dev == dev || !dev) &&
2690 rt != net->ipv6.ip6_null_entry &&
2691 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2692 /* remove prefsrc entry */
2693 rt->rt6i_prefsrc.plen = 0;
2694 }
2695 return 0;
2696 }
2697
2698 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2699 {
2700 struct net *net = dev_net(ifp->idev->dev);
2701 struct arg_dev_net_ip adni = {
2702 .dev = ifp->idev->dev,
2703 .net = net,
2704 .addr = &ifp->addr,
2705 };
2706 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2707 }
2708
2709 #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
2710 #define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
2711
2712 /* Remove routers and update dst entries when gateway turn into host. */
2713 static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
2714 {
2715 struct in6_addr *gateway = (struct in6_addr *)arg;
2716
2717 if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
2718 ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
2719 ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
2720 return -1;
2721 }
2722 return 0;
2723 }
2724
2725 void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
2726 {
2727 fib6_clean_all(net, fib6_clean_tohost, gateway);
2728 }
2729
2730 struct arg_dev_net {
2731 struct net_device *dev;
2732 struct net *net;
2733 };
2734
2735 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2736 {
2737 const struct arg_dev_net *adn = arg;
2738 const struct net_device *dev = adn->dev;
2739
2740 if ((rt->dst.dev == dev || !dev) &&
2741 rt != adn->net->ipv6.ip6_null_entry)
2742 return -1;
2743
2744 return 0;
2745 }
2746
2747 void rt6_ifdown(struct net *net, struct net_device *dev)
2748 {
2749 struct arg_dev_net adn = {
2750 .dev = dev,
2751 .net = net,
2752 };
2753
2754 fib6_clean_all(net, fib6_ifdown, &adn);
2755 icmp6_clean_all(fib6_ifdown, &adn);
2756 if (dev)
2757 rt6_uncached_list_flush_dev(net, dev);
2758 }
2759
2760 struct rt6_mtu_change_arg {
2761 struct net_device *dev;
2762 unsigned int mtu;
2763 };
2764
2765 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2766 {
2767 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2768 struct inet6_dev *idev;
2769
2770 /* In IPv6 pmtu discovery is not optional,
2771 so that RTAX_MTU lock cannot disable it.
2772 We still use this lock to block changes
2773 caused by addrconf/ndisc.
2774 */
2775
2776 idev = __in6_dev_get(arg->dev);
2777 if (!idev)
2778 return 0;
2779
2780 /* For administrative MTU increase, there is no way to discover
2781 IPv6 PMTU increase, so PMTU increase should be updated here.
2782 Since RFC 1981 doesn't include administrative MTU increase
2783 update PMTU increase is a MUST. (i.e. jumbo frame)
2784 */
2785 /*
2786 If new MTU is less than route PMTU, this new MTU will be the
2787 lowest MTU in the path, update the route PMTU to reflect PMTU
2788 decreases; if new MTU is greater than route PMTU, and the
2789 old MTU is the lowest MTU in the path, update the route PMTU
2790 to reflect the increase. In this case if the other nodes' MTU
2791 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2792 PMTU discovery.
2793 */
2794 if (rt->dst.dev == arg->dev &&
2795 dst_metric_raw(&rt->dst, RTAX_MTU) &&
2796 !dst_metric_locked(&rt->dst, RTAX_MTU)) {
2797 if (rt->rt6i_flags & RTF_CACHE) {
2798 /* For RTF_CACHE with rt6i_pmtu == 0
2799 * (i.e. a redirected route),
2800 * the metrics of its rt->dst.from has already
2801 * been updated.
2802 */
2803 if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
2804 rt->rt6i_pmtu = arg->mtu;
2805 } else if (dst_mtu(&rt->dst) >= arg->mtu ||
2806 (dst_mtu(&rt->dst) < arg->mtu &&
2807 dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
2808 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2809 }
2810 }
2811 return 0;
2812 }
2813
2814 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2815 {
2816 struct rt6_mtu_change_arg arg = {
2817 .dev = dev,
2818 .mtu = mtu,
2819 };
2820
2821 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2822 }
2823
2824 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2825 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2826 [RTA_OIF] = { .type = NLA_U32 },
2827 [RTA_IIF] = { .type = NLA_U32 },
2828 [RTA_PRIORITY] = { .type = NLA_U32 },
2829 [RTA_METRICS] = { .type = NLA_NESTED },
2830 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2831 [RTA_PREF] = { .type = NLA_U8 },
2832 [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
2833 [RTA_ENCAP] = { .type = NLA_NESTED },
2834 [RTA_EXPIRES] = { .type = NLA_U32 },
2835 [RTA_UID] = { .type = NLA_U32 },
2836 };
2837
2838 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2839 struct fib6_config *cfg)
2840 {
2841 struct rtmsg *rtm;
2842 struct nlattr *tb[RTA_MAX+1];
2843 unsigned int pref;
2844 int err;
2845
2846 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2847 if (err < 0)
2848 goto errout;
2849
2850 err = -EINVAL;
2851 rtm = nlmsg_data(nlh);
2852 memset(cfg, 0, sizeof(*cfg));
2853
2854 cfg->fc_table = rtm->rtm_table;
2855 cfg->fc_dst_len = rtm->rtm_dst_len;
2856 cfg->fc_src_len = rtm->rtm_src_len;
2857 cfg->fc_flags = RTF_UP;
2858 cfg->fc_protocol = rtm->rtm_protocol;
2859 cfg->fc_type = rtm->rtm_type;
2860
2861 if (rtm->rtm_type == RTN_UNREACHABLE ||
2862 rtm->rtm_type == RTN_BLACKHOLE ||
2863 rtm->rtm_type == RTN_PROHIBIT ||
2864 rtm->rtm_type == RTN_THROW)
2865 cfg->fc_flags |= RTF_REJECT;
2866
2867 if (rtm->rtm_type == RTN_LOCAL)
2868 cfg->fc_flags |= RTF_LOCAL;
2869
2870 if (rtm->rtm_flags & RTM_F_CLONED)
2871 cfg->fc_flags |= RTF_CACHE;
2872
2873 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2874 cfg->fc_nlinfo.nlh = nlh;
2875 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2876
2877 if (tb[RTA_GATEWAY]) {
2878 cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
2879 cfg->fc_flags |= RTF_GATEWAY;
2880 }
2881
2882 if (tb[RTA_DST]) {
2883 int plen = (rtm->rtm_dst_len + 7) >> 3;
2884
2885 if (nla_len(tb[RTA_DST]) < plen)
2886 goto errout;
2887
2888 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2889 }
2890
2891 if (tb[RTA_SRC]) {
2892 int plen = (rtm->rtm_src_len + 7) >> 3;
2893
2894 if (nla_len(tb[RTA_SRC]) < plen)
2895 goto errout;
2896
2897 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2898 }
2899
2900 if (tb[RTA_PREFSRC])
2901 cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]);
2902
2903 if (tb[RTA_OIF])
2904 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2905
2906 if (tb[RTA_PRIORITY])
2907 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2908
2909 if (tb[RTA_METRICS]) {
2910 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2911 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2912 }
2913
2914 if (tb[RTA_TABLE])
2915 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2916
2917 if (tb[RTA_MULTIPATH]) {
2918 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2919 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2920
2921 err = lwtunnel_valid_encap_type_attr(cfg->fc_mp,
2922 cfg->fc_mp_len);
2923 if (err < 0)
2924 goto errout;
2925 }
2926
2927 if (tb[RTA_PREF]) {
2928 pref = nla_get_u8(tb[RTA_PREF]);
2929 if (pref != ICMPV6_ROUTER_PREF_LOW &&
2930 pref != ICMPV6_ROUTER_PREF_HIGH)
2931 pref = ICMPV6_ROUTER_PREF_MEDIUM;
2932 cfg->fc_flags |= RTF_PREF(pref);
2933 }
2934
2935 if (tb[RTA_ENCAP])
2936 cfg->fc_encap = tb[RTA_ENCAP];
2937
2938 if (tb[RTA_ENCAP_TYPE]) {
2939 cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
2940
2941 err = lwtunnel_valid_encap_type(cfg->fc_encap_type);
2942 if (err < 0)
2943 goto errout;
2944 }
2945
2946 if (tb[RTA_EXPIRES]) {
2947 unsigned long timeout = addrconf_timeout_fixup(nla_get_u32(tb[RTA_EXPIRES]), HZ);
2948
2949 if (addrconf_finite_timeout(timeout)) {
2950 cfg->fc_expires = jiffies_to_clock_t(timeout * HZ);
2951 cfg->fc_flags |= RTF_EXPIRES;
2952 }
2953 }
2954
2955 err = 0;
2956 errout:
2957 return err;
2958 }
2959
2960 struct rt6_nh {
2961 struct rt6_info *rt6_info;
2962 struct fib6_config r_cfg;
2963 struct mx6_config mxc;
2964 struct list_head next;
2965 };
2966
2967 static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
2968 {
2969 struct rt6_nh *nh;
2970
2971 list_for_each_entry(nh, rt6_nh_list, next) {
2972 pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6 nexthop %pI6 ifi %d\n",
2973 &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
2974 nh->r_cfg.fc_ifindex);
2975 }
2976 }
2977
2978 static int ip6_route_info_append(struct list_head *rt6_nh_list,
2979 struct rt6_info *rt, struct fib6_config *r_cfg)
2980 {
2981 struct rt6_nh *nh;
2982 struct rt6_info *rtnh;
2983 int err = -EEXIST;
2984
2985 list_for_each_entry(nh, rt6_nh_list, next) {
2986 /* check if rt6_info already exists */
2987 rtnh = nh->rt6_info;
2988
2989 if (rtnh->dst.dev == rt->dst.dev &&
2990 rtnh->rt6i_idev == rt->rt6i_idev &&
2991 ipv6_addr_equal(&rtnh->rt6i_gateway,
2992 &rt->rt6i_gateway))
2993 return err;
2994 }
2995
2996 nh = kzalloc(sizeof(*nh), GFP_KERNEL);
2997 if (!nh)
2998 return -ENOMEM;
2999 nh->rt6_info = rt;
3000 err = ip6_convert_metrics(&nh->mxc, r_cfg);
3001 if (err) {
3002 kfree(nh);
3003 return err;
3004 }
3005 memcpy(&nh->r_cfg, r_cfg, sizeof(*r_cfg));
3006 list_add_tail(&nh->next, rt6_nh_list);
3007
3008 return 0;
3009 }
3010
3011 static int ip6_route_multipath_add(struct fib6_config *cfg)
3012 {
3013 struct fib6_config r_cfg;
3014 struct rtnexthop *rtnh;
3015 struct rt6_info *rt;
3016 struct rt6_nh *err_nh;
3017 struct rt6_nh *nh, *nh_safe;
3018 int remaining;
3019 int attrlen;
3020 int err = 1;
3021 int nhn = 0;
3022 int replace = (cfg->fc_nlinfo.nlh &&
3023 (cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_REPLACE));
3024 LIST_HEAD(rt6_nh_list);
3025
3026 remaining = cfg->fc_mp_len;
3027 rtnh = (struct rtnexthop *)cfg->fc_mp;
3028
3029 /* Parse a Multipath Entry and build a list (rt6_nh_list) of
3030 * rt6_info structs per nexthop
3031 */
3032 while (rtnh_ok(rtnh, remaining)) {
3033 memcpy(&r_cfg, cfg, sizeof(*cfg));
3034 if (rtnh->rtnh_ifindex)
3035 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3036
3037 attrlen = rtnh_attrlen(rtnh);
3038 if (attrlen > 0) {
3039 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3040
3041 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3042 if (nla) {
3043 r_cfg.fc_gateway = nla_get_in6_addr(nla);
3044 r_cfg.fc_flags |= RTF_GATEWAY;
3045 }
3046 r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
3047 nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
3048 if (nla)
3049 r_cfg.fc_encap_type = nla_get_u16(nla);
3050 }
3051
3052 rt = ip6_route_info_create(&r_cfg);
3053 if (IS_ERR(rt)) {
3054 err = PTR_ERR(rt);
3055 rt = NULL;
3056 goto cleanup;
3057 }
3058
3059 err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
3060 if (err) {
3061 dst_free(&rt->dst);
3062 goto cleanup;
3063 }
3064
3065 rtnh = rtnh_next(rtnh, &remaining);
3066 }
3067
3068 err_nh = NULL;
3069 list_for_each_entry(nh, &rt6_nh_list, next) {
3070 err = __ip6_ins_rt(nh->rt6_info, &cfg->fc_nlinfo, &nh->mxc);
3071 /* nh->rt6_info is used or freed at this point, reset to NULL*/
3072 nh->rt6_info = NULL;
3073 if (err) {
3074 if (replace && nhn)
3075 ip6_print_replace_route_err(&rt6_nh_list);
3076 err_nh = nh;
3077 goto add_errout;
3078 }
3079
3080 /* Because each route is added like a single route we remove
3081 * these flags after the first nexthop: if there is a collision,
3082 * we have already failed to add the first nexthop:
3083 * fib6_add_rt2node() has rejected it; when replacing, old
3084 * nexthops have been replaced by first new, the rest should
3085 * be added to it.
3086 */
3087 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
3088 NLM_F_REPLACE);
3089 nhn++;
3090 }
3091
3092 goto cleanup;
3093
3094 add_errout:
3095 /* Delete routes that were already added */
3096 list_for_each_entry(nh, &rt6_nh_list, next) {
3097 if (err_nh == nh)
3098 break;
3099 ip6_route_del(&nh->r_cfg);
3100 }
3101
3102 cleanup:
3103 list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
3104 if (nh->rt6_info)
3105 dst_free(&nh->rt6_info->dst);
3106 kfree(nh->mxc.mx);
3107 list_del(&nh->next);
3108 kfree(nh);
3109 }
3110
3111 return err;
3112 }
3113
3114 static int ip6_route_multipath_del(struct fib6_config *cfg)
3115 {
3116 struct fib6_config r_cfg;
3117 struct rtnexthop *rtnh;
3118 int remaining;
3119 int attrlen;
3120 int err = 1, last_err = 0;
3121
3122 remaining = cfg->fc_mp_len;
3123 rtnh = (struct rtnexthop *)cfg->fc_mp;
3124
3125 /* Parse a Multipath Entry */
3126 while (rtnh_ok(rtnh, remaining)) {
3127 memcpy(&r_cfg, cfg, sizeof(*cfg));
3128 if (rtnh->rtnh_ifindex)
3129 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
3130
3131 attrlen = rtnh_attrlen(rtnh);
3132 if (attrlen > 0) {
3133 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
3134
3135 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
3136 if (nla) {
3137 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
3138 r_cfg.fc_flags |= RTF_GATEWAY;
3139 }
3140 }
3141 err = ip6_route_del(&r_cfg);
3142 if (err)
3143 last_err = err;
3144
3145 rtnh = rtnh_next(rtnh, &remaining);
3146 }
3147
3148 return last_err;
3149 }
3150
3151 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3152 {
3153 struct fib6_config cfg;
3154 int err;
3155
3156 err = rtm_to_fib6_config(skb, nlh, &cfg);
3157 if (err < 0)
3158 return err;
3159
3160 if (cfg.fc_mp)
3161 return ip6_route_multipath_del(&cfg);
3162 else
3163 return ip6_route_del(&cfg);
3164 }
3165
3166 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
3167 {
3168 struct fib6_config cfg;
3169 int err;
3170
3171 err = rtm_to_fib6_config(skb, nlh, &cfg);
3172 if (err < 0)
3173 return err;
3174
3175 if (cfg.fc_mp)
3176 return ip6_route_multipath_add(&cfg);
3177 else
3178 return ip6_route_add(&cfg);
3179 }
3180
3181 static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
3182 {
3183 return NLMSG_ALIGN(sizeof(struct rtmsg))
3184 + nla_total_size(16) /* RTA_SRC */
3185 + nla_total_size(16) /* RTA_DST */
3186 + nla_total_size(16) /* RTA_GATEWAY */
3187 + nla_total_size(16) /* RTA_PREFSRC */
3188 + nla_total_size(4) /* RTA_TABLE */
3189 + nla_total_size(4) /* RTA_IIF */
3190 + nla_total_size(4) /* RTA_OIF */
3191 + nla_total_size(4) /* RTA_PRIORITY */
3192 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
3193 + nla_total_size(sizeof(struct rta_cacheinfo))
3194 + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
3195 + nla_total_size(1) /* RTA_PREF */
3196 + lwtunnel_get_encap_size(rt->dst.lwtstate);
3197 }
3198
3199 static int rt6_fill_node(struct net *net,
3200 struct sk_buff *skb, struct rt6_info *rt,
3201 struct in6_addr *dst, struct in6_addr *src,
3202 int iif, int type, u32 portid, u32 seq,
3203 int prefix, int nowait, unsigned int flags)
3204 {
3205 u32 metrics[RTAX_MAX];
3206 struct rtmsg *rtm;
3207 struct nlmsghdr *nlh;
3208 long expires;
3209 u32 table;
3210
3211 if (prefix) { /* user wants prefix routes only */
3212 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
3213 /* success since this is not a prefix route */
3214 return 1;
3215 }
3216 }
3217
3218 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
3219 if (!nlh)
3220 return -EMSGSIZE;
3221
3222 rtm = nlmsg_data(nlh);
3223 rtm->rtm_family = AF_INET6;
3224 rtm->rtm_dst_len = rt->rt6i_dst.plen;
3225 rtm->rtm_src_len = rt->rt6i_src.plen;
3226 rtm->rtm_tos = 0;
3227 if (rt->rt6i_table)
3228 table = rt->rt6i_table->tb6_id;
3229 else
3230 table = RT6_TABLE_UNSPEC;
3231 rtm->rtm_table = table;
3232 if (nla_put_u32(skb, RTA_TABLE, table))
3233 goto nla_put_failure;
3234 if (rt->rt6i_flags & RTF_REJECT) {
3235 switch (rt->dst.error) {
3236 case -EINVAL:
3237 rtm->rtm_type = RTN_BLACKHOLE;
3238 break;
3239 case -EACCES:
3240 rtm->rtm_type = RTN_PROHIBIT;
3241 break;
3242 case -EAGAIN:
3243 rtm->rtm_type = RTN_THROW;
3244 break;
3245 default:
3246 rtm->rtm_type = RTN_UNREACHABLE;
3247 break;
3248 }
3249 }
3250 else if (rt->rt6i_flags & RTF_LOCAL)
3251 rtm->rtm_type = RTN_LOCAL;
3252 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
3253 rtm->rtm_type = RTN_LOCAL;
3254 else
3255 rtm->rtm_type = RTN_UNICAST;
3256 rtm->rtm_flags = 0;
3257 if (!netif_carrier_ok(rt->dst.dev)) {
3258 rtm->rtm_flags |= RTNH_F_LINKDOWN;
3259 if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
3260 rtm->rtm_flags |= RTNH_F_DEAD;
3261 }
3262 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
3263 rtm->rtm_protocol = rt->rt6i_protocol;
3264 if (rt->rt6i_flags & RTF_DYNAMIC)
3265 rtm->rtm_protocol = RTPROT_REDIRECT;
3266 else if (rt->rt6i_flags & RTF_ADDRCONF) {
3267 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
3268 rtm->rtm_protocol = RTPROT_RA;
3269 else
3270 rtm->rtm_protocol = RTPROT_KERNEL;
3271 }
3272
3273 if (rt->rt6i_flags & RTF_CACHE)
3274 rtm->rtm_flags |= RTM_F_CLONED;
3275
3276 if (dst) {
3277 if (nla_put_in6_addr(skb, RTA_DST, dst))
3278 goto nla_put_failure;
3279 rtm->rtm_dst_len = 128;
3280 } else if (rtm->rtm_dst_len)
3281 if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr))
3282 goto nla_put_failure;
3283 #ifdef CONFIG_IPV6_SUBTREES
3284 if (src) {
3285 if (nla_put_in6_addr(skb, RTA_SRC, src))
3286 goto nla_put_failure;
3287 rtm->rtm_src_len = 128;
3288 } else if (rtm->rtm_src_len &&
3289 nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr))
3290 goto nla_put_failure;
3291 #endif
3292 if (iif) {
3293 #ifdef CONFIG_IPV6_MROUTE
3294 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
3295 int err = ip6mr_get_route(net, skb, rtm, nowait,
3296 portid);
3297
3298 if (err <= 0) {
3299 if (!nowait) {
3300 if (err == 0)
3301 return 0;
3302 goto nla_put_failure;
3303 } else {
3304 if (err == -EMSGSIZE)
3305 goto nla_put_failure;
3306 }
3307 }
3308 } else
3309 #endif
3310 if (nla_put_u32(skb, RTA_IIF, iif))
3311 goto nla_put_failure;
3312 } else if (dst) {
3313 struct in6_addr saddr_buf;
3314 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
3315 nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3316 goto nla_put_failure;
3317 }
3318
3319 if (rt->rt6i_prefsrc.plen) {
3320 struct in6_addr saddr_buf;
3321 saddr_buf = rt->rt6i_prefsrc.addr;
3322 if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf))
3323 goto nla_put_failure;
3324 }
3325
3326 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
3327 if (rt->rt6i_pmtu)
3328 metrics[RTAX_MTU - 1] = rt->rt6i_pmtu;
3329 if (rtnetlink_put_metrics(skb, metrics) < 0)
3330 goto nla_put_failure;
3331
3332 if (rt->rt6i_flags & RTF_GATEWAY) {
3333 if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0)
3334 goto nla_put_failure;
3335 }
3336
3337 if (rt->dst.dev &&
3338 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
3339 goto nla_put_failure;
3340 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
3341 goto nla_put_failure;
3342
3343 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
3344
3345 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
3346 goto nla_put_failure;
3347
3348 if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
3349 goto nla_put_failure;
3350
3351 if (lwtunnel_fill_encap(skb, rt->dst.lwtstate) < 0)
3352 goto nla_put_failure;
3353
3354 nlmsg_end(skb, nlh);
3355 return 0;
3356
3357 nla_put_failure:
3358 nlmsg_cancel(skb, nlh);
3359 return -EMSGSIZE;
3360 }
3361
3362 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
3363 {
3364 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
3365 int prefix;
3366
3367 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
3368 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
3369 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
3370 } else
3371 prefix = 0;
3372
3373 return rt6_fill_node(arg->net,
3374 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
3375 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
3376 prefix, 0, NLM_F_MULTI);
3377 }
3378
3379 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
3380 {
3381 struct net *net = sock_net(in_skb->sk);
3382 struct nlattr *tb[RTA_MAX+1];
3383 struct rt6_info *rt;
3384 struct sk_buff *skb;
3385 struct rtmsg *rtm;
3386 struct flowi6 fl6;
3387 int err, iif = 0, oif = 0;
3388
3389 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
3390 if (err < 0)
3391 goto errout;
3392
3393 err = -EINVAL;
3394 memset(&fl6, 0, sizeof(fl6));
3395 rtm = nlmsg_data(nlh);
3396 fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0);
3397
3398 if (tb[RTA_SRC]) {
3399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
3400 goto errout;
3401
3402 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
3403 }
3404
3405 if (tb[RTA_DST]) {
3406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
3407 goto errout;
3408
3409 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
3410 }
3411
3412 if (tb[RTA_IIF])
3413 iif = nla_get_u32(tb[RTA_IIF]);
3414
3415 if (tb[RTA_OIF])
3416 oif = nla_get_u32(tb[RTA_OIF]);
3417
3418 if (tb[RTA_MARK])
3419 fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]);
3420
3421 if (tb[RTA_UID])
3422 fl6.flowi6_uid = make_kuid(current_user_ns(),
3423 nla_get_u32(tb[RTA_UID]));
3424 else
3425 fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
3426
3427 if (iif) {
3428 struct net_device *dev;
3429 int flags = 0;
3430
3431 dev = __dev_get_by_index(net, iif);
3432 if (!dev) {
3433 err = -ENODEV;
3434 goto errout;
3435 }
3436
3437 fl6.flowi6_iif = iif;
3438
3439 if (!ipv6_addr_any(&fl6.saddr))
3440 flags |= RT6_LOOKUP_F_HAS_SADDR;
3441
3442 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
3443 flags);
3444 } else {
3445 fl6.flowi6_oif = oif;
3446
3447 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
3448 }
3449
3450 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
3451 if (!skb) {
3452 ip6_rt_put(rt);
3453 err = -ENOBUFS;
3454 goto errout;
3455 }
3456
3457 /* Reserve room for dummy headers, this skb can pass
3458 through good chunk of routing engine.
3459 */
3460 skb_reset_mac_header(skb);
3461 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
3462
3463 skb_dst_set(skb, &rt->dst);
3464
3465 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
3466 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
3467 nlh->nlmsg_seq, 0, 0, 0);
3468 if (err < 0) {
3469 kfree_skb(skb);
3470 goto errout;
3471 }
3472
3473 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
3474 errout:
3475 return err;
3476 }
3477
3478 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info,
3479 unsigned int nlm_flags)
3480 {
3481 struct sk_buff *skb;
3482 struct net *net = info->nl_net;
3483 u32 seq;
3484 int err;
3485
3486 err = -ENOBUFS;
3487 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
3488
3489 skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
3490 if (!skb)
3491 goto errout;
3492
3493 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
3494 event, info->portid, seq, 0, 0, nlm_flags);
3495 if (err < 0) {
3496 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
3497 WARN_ON(err == -EMSGSIZE);
3498 kfree_skb(skb);
3499 goto errout;
3500 }
3501 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
3502 info->nlh, gfp_any());
3503 return;
3504 errout:
3505 if (err < 0)
3506 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
3507 }
3508
3509 static int ip6_route_dev_notify(struct notifier_block *this,
3510 unsigned long event, void *ptr)
3511 {
3512 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
3513 struct net *net = dev_net(dev);
3514
3515 if (!(dev->flags & IFF_LOOPBACK))
3516 return NOTIFY_OK;
3517
3518 if (event == NETDEV_REGISTER) {
3519 net->ipv6.ip6_null_entry->dst.dev = dev;
3520 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
3521 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3522 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
3523 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
3524 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
3525 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
3526 #endif
3527 } else if (event == NETDEV_UNREGISTER) {
3528 in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
3529 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3530 in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
3531 in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
3532 #endif
3533 }
3534
3535 return NOTIFY_OK;
3536 }
3537
3538 /*
3539 * /proc
3540 */
3541
3542 #ifdef CONFIG_PROC_FS
3543
3544 static const struct file_operations ipv6_route_proc_fops = {
3545 .owner = THIS_MODULE,
3546 .open = ipv6_route_open,
3547 .read = seq_read,
3548 .llseek = seq_lseek,
3549 .release = seq_release_net,
3550 };
3551
3552 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
3553 {
3554 struct net *net = (struct net *)seq->private;
3555 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
3556 net->ipv6.rt6_stats->fib_nodes,
3557 net->ipv6.rt6_stats->fib_route_nodes,
3558 net->ipv6.rt6_stats->fib_rt_alloc,
3559 net->ipv6.rt6_stats->fib_rt_entries,
3560 net->ipv6.rt6_stats->fib_rt_cache,
3561 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
3562 net->ipv6.rt6_stats->fib_discarded_routes);
3563
3564 return 0;
3565 }
3566
3567 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
3568 {
3569 return single_open_net(inode, file, rt6_stats_seq_show);
3570 }
3571
3572 static const struct file_operations rt6_stats_seq_fops = {
3573 .owner = THIS_MODULE,
3574 .open = rt6_stats_seq_open,
3575 .read = seq_read,
3576 .llseek = seq_lseek,
3577 .release = single_release_net,
3578 };
3579 #endif /* CONFIG_PROC_FS */
3580
3581 #ifdef CONFIG_SYSCTL
3582
3583 static
3584 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
3585 void __user *buffer, size_t *lenp, loff_t *ppos)
3586 {
3587 struct net *net;
3588 int delay;
3589 if (!write)
3590 return -EINVAL;
3591
3592 net = (struct net *)ctl->extra1;
3593 delay = net->ipv6.sysctl.flush_delay;
3594 proc_dointvec(ctl, write, buffer, lenp, ppos);
3595 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
3596 return 0;
3597 }
3598
3599 struct ctl_table ipv6_route_table_template[] = {
3600 {
3601 .procname = "flush",
3602 .data = &init_net.ipv6.sysctl.flush_delay,
3603 .maxlen = sizeof(int),
3604 .mode = 0200,
3605 .proc_handler = ipv6_sysctl_rtcache_flush
3606 },
3607 {
3608 .procname = "gc_thresh",
3609 .data = &ip6_dst_ops_template.gc_thresh,
3610 .maxlen = sizeof(int),
3611 .mode = 0644,
3612 .proc_handler = proc_dointvec,
3613 },
3614 {
3615 .procname = "max_size",
3616 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
3617 .maxlen = sizeof(int),
3618 .mode = 0644,
3619 .proc_handler = proc_dointvec,
3620 },
3621 {
3622 .procname = "gc_min_interval",
3623 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3624 .maxlen = sizeof(int),
3625 .mode = 0644,
3626 .proc_handler = proc_dointvec_jiffies,
3627 },
3628 {
3629 .procname = "gc_timeout",
3630 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
3631 .maxlen = sizeof(int),
3632 .mode = 0644,
3633 .proc_handler = proc_dointvec_jiffies,
3634 },
3635 {
3636 .procname = "gc_interval",
3637 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
3638 .maxlen = sizeof(int),
3639 .mode = 0644,
3640 .proc_handler = proc_dointvec_jiffies,
3641 },
3642 {
3643 .procname = "gc_elasticity",
3644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
3645 .maxlen = sizeof(int),
3646 .mode = 0644,
3647 .proc_handler = proc_dointvec,
3648 },
3649 {
3650 .procname = "mtu_expires",
3651 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
3652 .maxlen = sizeof(int),
3653 .mode = 0644,
3654 .proc_handler = proc_dointvec_jiffies,
3655 },
3656 {
3657 .procname = "min_adv_mss",
3658 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
3659 .maxlen = sizeof(int),
3660 .mode = 0644,
3661 .proc_handler = proc_dointvec,
3662 },
3663 {
3664 .procname = "gc_min_interval_ms",
3665 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
3666 .maxlen = sizeof(int),
3667 .mode = 0644,
3668 .proc_handler = proc_dointvec_ms_jiffies,
3669 },
3670 { }
3671 };
3672
3673 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
3674 {
3675 struct ctl_table *table;
3676
3677 table = kmemdup(ipv6_route_table_template,
3678 sizeof(ipv6_route_table_template),
3679 GFP_KERNEL);
3680
3681 if (table) {
3682 table[0].data = &net->ipv6.sysctl.flush_delay;
3683 table[0].extra1 = net;
3684 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
3685 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
3686 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3687 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
3688 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
3689 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
3690 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
3691 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
3692 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
3693
3694 /* Don't export sysctls to unprivileged users */
3695 if (net->user_ns != &init_user_ns)
3696 table[0].procname = NULL;
3697 }
3698
3699 return table;
3700 }
3701 #endif
3702
3703 static int __net_init ip6_route_net_init(struct net *net)
3704 {
3705 int ret = -ENOMEM;
3706
3707 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3708 sizeof(net->ipv6.ip6_dst_ops));
3709
3710 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3711 goto out_ip6_dst_ops;
3712
3713 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3714 sizeof(*net->ipv6.ip6_null_entry),
3715 GFP_KERNEL);
3716 if (!net->ipv6.ip6_null_entry)
3717 goto out_ip6_dst_entries;
3718 net->ipv6.ip6_null_entry->dst.path =
3719 (struct dst_entry *)net->ipv6.ip6_null_entry;
3720 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3721 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3722 ip6_template_metrics, true);
3723
3724 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3725 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3726 sizeof(*net->ipv6.ip6_prohibit_entry),
3727 GFP_KERNEL);
3728 if (!net->ipv6.ip6_prohibit_entry)
3729 goto out_ip6_null_entry;
3730 net->ipv6.ip6_prohibit_entry->dst.path =
3731 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3732 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3733 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3734 ip6_template_metrics, true);
3735
3736 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3737 sizeof(*net->ipv6.ip6_blk_hole_entry),
3738 GFP_KERNEL);
3739 if (!net->ipv6.ip6_blk_hole_entry)
3740 goto out_ip6_prohibit_entry;
3741 net->ipv6.ip6_blk_hole_entry->dst.path =
3742 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3743 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3744 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3745 ip6_template_metrics, true);
3746 #endif
3747
3748 net->ipv6.sysctl.flush_delay = 0;
3749 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3750 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3751 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3752 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3753 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3754 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3755 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3756
3757 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3758
3759 ret = 0;
3760 out:
3761 return ret;
3762
3763 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3764 out_ip6_prohibit_entry:
3765 kfree(net->ipv6.ip6_prohibit_entry);
3766 out_ip6_null_entry:
3767 kfree(net->ipv6.ip6_null_entry);
3768 #endif
3769 out_ip6_dst_entries:
3770 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3771 out_ip6_dst_ops:
3772 goto out;
3773 }
3774
3775 static void __net_exit ip6_route_net_exit(struct net *net)
3776 {
3777 kfree(net->ipv6.ip6_null_entry);
3778 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3779 kfree(net->ipv6.ip6_prohibit_entry);
3780 kfree(net->ipv6.ip6_blk_hole_entry);
3781 #endif
3782 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3783 }
3784
3785 static int __net_init ip6_route_net_init_late(struct net *net)
3786 {
3787 #ifdef CONFIG_PROC_FS
3788 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3789 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3790 #endif
3791 return 0;
3792 }
3793
3794 static void __net_exit ip6_route_net_exit_late(struct net *net)
3795 {
3796 #ifdef CONFIG_PROC_FS
3797 remove_proc_entry("ipv6_route", net->proc_net);
3798 remove_proc_entry("rt6_stats", net->proc_net);
3799 #endif
3800 }
3801
3802 static struct pernet_operations ip6_route_net_ops = {
3803 .init = ip6_route_net_init,
3804 .exit = ip6_route_net_exit,
3805 };
3806
3807 static int __net_init ipv6_inetpeer_init(struct net *net)
3808 {
3809 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3810
3811 if (!bp)
3812 return -ENOMEM;
3813 inet_peer_base_init(bp);
3814 net->ipv6.peers = bp;
3815 return 0;
3816 }
3817
3818 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3819 {
3820 struct inet_peer_base *bp = net->ipv6.peers;
3821
3822 net->ipv6.peers = NULL;
3823 inetpeer_invalidate_tree(bp);
3824 kfree(bp);
3825 }
3826
3827 static struct pernet_operations ipv6_inetpeer_ops = {
3828 .init = ipv6_inetpeer_init,
3829 .exit = ipv6_inetpeer_exit,
3830 };
3831
3832 static struct pernet_operations ip6_route_net_late_ops = {
3833 .init = ip6_route_net_init_late,
3834 .exit = ip6_route_net_exit_late,
3835 };
3836
3837 static struct notifier_block ip6_route_dev_notifier = {
3838 .notifier_call = ip6_route_dev_notify,
3839 .priority = ADDRCONF_NOTIFY_PRIORITY - 10,
3840 };
3841
3842 void __init ip6_route_init_special_entries(void)
3843 {
3844 /* Registering of the loopback is done before this portion of code,
3845 * the loopback reference in rt6_info will not be taken, do it
3846 * manually for init_net */
3847 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3848 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3849 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3850 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3851 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3852 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3853 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3854 #endif
3855 }
3856
3857 int __init ip6_route_init(void)
3858 {
3859 int ret;
3860 int cpu;
3861
3862 ret = -ENOMEM;
3863 ip6_dst_ops_template.kmem_cachep =
3864 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3865 SLAB_HWCACHE_ALIGN, NULL);
3866 if (!ip6_dst_ops_template.kmem_cachep)
3867 goto out;
3868
3869 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3870 if (ret)
3871 goto out_kmem_cache;
3872
3873 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3874 if (ret)
3875 goto out_dst_entries;
3876
3877 ret = register_pernet_subsys(&ip6_route_net_ops);
3878 if (ret)
3879 goto out_register_inetpeer;
3880
3881 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3882
3883 ret = fib6_init();
3884 if (ret)
3885 goto out_register_subsys;
3886
3887 ret = xfrm6_init();
3888 if (ret)
3889 goto out_fib6_init;
3890
3891 ret = fib6_rules_init();
3892 if (ret)
3893 goto xfrm6_init;
3894
3895 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3896 if (ret)
3897 goto fib6_rules_init;
3898
3899 ret = -ENOBUFS;
3900 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3901 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3902 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3903 goto out_register_late_subsys;
3904
3905 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3906 if (ret)
3907 goto out_register_late_subsys;
3908
3909 for_each_possible_cpu(cpu) {
3910 struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
3911
3912 INIT_LIST_HEAD(&ul->head);
3913 spin_lock_init(&ul->lock);
3914 }
3915
3916 out:
3917 return ret;
3918
3919 out_register_late_subsys:
3920 unregister_pernet_subsys(&ip6_route_net_late_ops);
3921 fib6_rules_init:
3922 fib6_rules_cleanup();
3923 xfrm6_init:
3924 xfrm6_fini();
3925 out_fib6_init:
3926 fib6_gc_cleanup();
3927 out_register_subsys:
3928 unregister_pernet_subsys(&ip6_route_net_ops);
3929 out_register_inetpeer:
3930 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3931 out_dst_entries:
3932 dst_entries_destroy(&ip6_dst_blackhole_ops);
3933 out_kmem_cache:
3934 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3935 goto out;
3936 }
3937
3938 void ip6_route_cleanup(void)
3939 {
3940 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3941 unregister_pernet_subsys(&ip6_route_net_late_ops);
3942 fib6_rules_cleanup();
3943 xfrm6_fini();
3944 fib6_gc_cleanup();
3945 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3946 unregister_pernet_subsys(&ip6_route_net_ops);
3947 dst_entries_destroy(&ip6_dst_blackhole_ops);
3948 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3949 }