]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/route.c
Merge remote-tracking branch 'wireless-next/master' into mac80211-next
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60 #include <net/nexthop.h>
61
62 #include <asm/uaccess.h>
63
64 #ifdef CONFIG_SYSCTL
65 #include <linux/sysctl.h>
66 #endif
67
68 enum rt6_nud_state {
69 RT6_NUD_FAIL_HARD = -3,
70 RT6_NUD_FAIL_PROBE = -2,
71 RT6_NUD_FAIL_DO_RR = -1,
72 RT6_NUD_SUCCEED = 1
73 };
74
75 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
76 const struct in6_addr *dest);
77 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
78 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
79 static unsigned int ip6_mtu(const struct dst_entry *dst);
80 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
81 static void ip6_dst_destroy(struct dst_entry *);
82 static void ip6_dst_ifdown(struct dst_entry *,
83 struct net_device *dev, int how);
84 static int ip6_dst_gc(struct dst_ops *ops);
85
86 static int ip6_pkt_discard(struct sk_buff *skb);
87 static int ip6_pkt_discard_out(struct sk_buff *skb);
88 static int ip6_pkt_prohibit(struct sk_buff *skb);
89 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
90 static void ip6_link_failure(struct sk_buff *skb);
91 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
92 struct sk_buff *skb, u32 mtu);
93 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
94 struct sk_buff *skb);
95 static int rt6_score_route(struct rt6_info *rt, int oif, int strict);
96
97 #ifdef CONFIG_IPV6_ROUTE_INFO
98 static struct rt6_info *rt6_add_route_info(struct net *net,
99 const struct in6_addr *prefix, int prefixlen,
100 const struct in6_addr *gwaddr, int ifindex,
101 unsigned int pref);
102 static struct rt6_info *rt6_get_route_info(struct net *net,
103 const struct in6_addr *prefix, int prefixlen,
104 const struct in6_addr *gwaddr, int ifindex);
105 #endif
106
107 static void rt6_bind_peer(struct rt6_info *rt, int create)
108 {
109 struct inet_peer_base *base;
110 struct inet_peer *peer;
111
112 base = inetpeer_base_ptr(rt->_rt6i_peer);
113 if (!base)
114 return;
115
116 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
117 if (peer) {
118 if (!rt6_set_peer(rt, peer))
119 inet_putpeer(peer);
120 }
121 }
122
123 static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create)
124 {
125 if (rt6_has_peer(rt))
126 return rt6_peer_ptr(rt);
127
128 rt6_bind_peer(rt, create);
129 return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL);
130 }
131
132 static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt)
133 {
134 return __rt6_get_peer(rt, 1);
135 }
136
137 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
138 {
139 struct rt6_info *rt = (struct rt6_info *) dst;
140 struct inet_peer *peer;
141 u32 *p = NULL;
142
143 if (!(rt->dst.flags & DST_HOST))
144 return NULL;
145
146 peer = rt6_get_peer_create(rt);
147 if (peer) {
148 u32 *old_p = __DST_METRICS_PTR(old);
149 unsigned long prev, new;
150
151 p = peer->metrics;
152 if (inet_metrics_new(peer))
153 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
154
155 new = (unsigned long) p;
156 prev = cmpxchg(&dst->_metrics, old, new);
157
158 if (prev != old) {
159 p = __DST_METRICS_PTR(prev);
160 if (prev & DST_METRICS_READ_ONLY)
161 p = NULL;
162 }
163 }
164 return p;
165 }
166
167 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
168 struct sk_buff *skb,
169 const void *daddr)
170 {
171 struct in6_addr *p = &rt->rt6i_gateway;
172
173 if (!ipv6_addr_any(p))
174 return (const void *) p;
175 else if (skb)
176 return &ipv6_hdr(skb)->daddr;
177 return daddr;
178 }
179
180 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
181 struct sk_buff *skb,
182 const void *daddr)
183 {
184 struct rt6_info *rt = (struct rt6_info *) dst;
185 struct neighbour *n;
186
187 daddr = choose_neigh_daddr(rt, skb, daddr);
188 n = __ipv6_neigh_lookup(dst->dev, daddr);
189 if (n)
190 return n;
191 return neigh_create(&nd_tbl, daddr, dst->dev);
192 }
193
194 static struct dst_ops ip6_dst_ops_template = {
195 .family = AF_INET6,
196 .protocol = cpu_to_be16(ETH_P_IPV6),
197 .gc = ip6_dst_gc,
198 .gc_thresh = 1024,
199 .check = ip6_dst_check,
200 .default_advmss = ip6_default_advmss,
201 .mtu = ip6_mtu,
202 .cow_metrics = ipv6_cow_metrics,
203 .destroy = ip6_dst_destroy,
204 .ifdown = ip6_dst_ifdown,
205 .negative_advice = ip6_negative_advice,
206 .link_failure = ip6_link_failure,
207 .update_pmtu = ip6_rt_update_pmtu,
208 .redirect = rt6_do_redirect,
209 .local_out = __ip6_local_out,
210 .neigh_lookup = ip6_neigh_lookup,
211 };
212
213 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
214 {
215 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
216
217 return mtu ? : dst->dev->mtu;
218 }
219
220 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
221 struct sk_buff *skb, u32 mtu)
222 {
223 }
224
225 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
226 struct sk_buff *skb)
227 {
228 }
229
230 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
231 unsigned long old)
232 {
233 return NULL;
234 }
235
236 static struct dst_ops ip6_dst_blackhole_ops = {
237 .family = AF_INET6,
238 .protocol = cpu_to_be16(ETH_P_IPV6),
239 .destroy = ip6_dst_destroy,
240 .check = ip6_dst_check,
241 .mtu = ip6_blackhole_mtu,
242 .default_advmss = ip6_default_advmss,
243 .update_pmtu = ip6_rt_blackhole_update_pmtu,
244 .redirect = ip6_rt_blackhole_redirect,
245 .cow_metrics = ip6_rt_blackhole_cow_metrics,
246 .neigh_lookup = ip6_neigh_lookup,
247 };
248
249 static const u32 ip6_template_metrics[RTAX_MAX] = {
250 [RTAX_HOPLIMIT - 1] = 0,
251 };
252
253 static const struct rt6_info ip6_null_entry_template = {
254 .dst = {
255 .__refcnt = ATOMIC_INIT(1),
256 .__use = 1,
257 .obsolete = DST_OBSOLETE_FORCE_CHK,
258 .error = -ENETUNREACH,
259 .input = ip6_pkt_discard,
260 .output = ip6_pkt_discard_out,
261 },
262 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
263 .rt6i_protocol = RTPROT_KERNEL,
264 .rt6i_metric = ~(u32) 0,
265 .rt6i_ref = ATOMIC_INIT(1),
266 };
267
268 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
269
270 static const struct rt6_info ip6_prohibit_entry_template = {
271 .dst = {
272 .__refcnt = ATOMIC_INIT(1),
273 .__use = 1,
274 .obsolete = DST_OBSOLETE_FORCE_CHK,
275 .error = -EACCES,
276 .input = ip6_pkt_prohibit,
277 .output = ip6_pkt_prohibit_out,
278 },
279 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
280 .rt6i_protocol = RTPROT_KERNEL,
281 .rt6i_metric = ~(u32) 0,
282 .rt6i_ref = ATOMIC_INIT(1),
283 };
284
285 static const struct rt6_info ip6_blk_hole_entry_template = {
286 .dst = {
287 .__refcnt = ATOMIC_INIT(1),
288 .__use = 1,
289 .obsolete = DST_OBSOLETE_FORCE_CHK,
290 .error = -EINVAL,
291 .input = dst_discard,
292 .output = dst_discard,
293 },
294 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
295 .rt6i_protocol = RTPROT_KERNEL,
296 .rt6i_metric = ~(u32) 0,
297 .rt6i_ref = ATOMIC_INIT(1),
298 };
299
300 #endif
301
302 /* allocate dst with ip6_dst_ops */
303 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
304 struct net_device *dev,
305 int flags,
306 struct fib6_table *table)
307 {
308 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
309 0, DST_OBSOLETE_FORCE_CHK, flags);
310
311 if (rt) {
312 struct dst_entry *dst = &rt->dst;
313
314 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
315 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
316 rt->rt6i_genid = rt_genid_ipv6(net);
317 INIT_LIST_HEAD(&rt->rt6i_siblings);
318 }
319 return rt;
320 }
321
322 static void ip6_dst_destroy(struct dst_entry *dst)
323 {
324 struct rt6_info *rt = (struct rt6_info *)dst;
325 struct inet6_dev *idev = rt->rt6i_idev;
326 struct dst_entry *from = dst->from;
327
328 if (!(rt->dst.flags & DST_HOST))
329 dst_destroy_metrics_generic(dst);
330
331 if (idev) {
332 rt->rt6i_idev = NULL;
333 in6_dev_put(idev);
334 }
335
336 dst->from = NULL;
337 dst_release(from);
338
339 if (rt6_has_peer(rt)) {
340 struct inet_peer *peer = rt6_peer_ptr(rt);
341 inet_putpeer(peer);
342 }
343 }
344
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347 {
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
350 struct net_device *loopback_dev =
351 dev_net(dev)->loopback_dev;
352
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 }
363 }
364
365 static bool rt6_check_expired(const struct rt6_info *rt)
366 {
367 if (rt->rt6i_flags & RTF_EXPIRES) {
368 if (time_after(jiffies, rt->dst.expires))
369 return true;
370 } else if (rt->dst.from) {
371 return rt6_check_expired((struct rt6_info *) rt->dst.from);
372 }
373 return false;
374 }
375
376 static bool rt6_need_strict(const struct in6_addr *daddr)
377 {
378 return ipv6_addr_type(daddr) &
379 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
380 }
381
382 /* Multipath route selection:
383 * Hash based function using packet header and flowlabel.
384 * Adapted from fib_info_hashfn()
385 */
386 static int rt6_info_hash_nhsfn(unsigned int candidate_count,
387 const struct flowi6 *fl6)
388 {
389 unsigned int val = fl6->flowi6_proto;
390
391 val ^= ipv6_addr_hash(&fl6->daddr);
392 val ^= ipv6_addr_hash(&fl6->saddr);
393
394 /* Work only if this not encapsulated */
395 switch (fl6->flowi6_proto) {
396 case IPPROTO_UDP:
397 case IPPROTO_TCP:
398 case IPPROTO_SCTP:
399 val ^= (__force u16)fl6->fl6_sport;
400 val ^= (__force u16)fl6->fl6_dport;
401 break;
402
403 case IPPROTO_ICMPV6:
404 val ^= (__force u16)fl6->fl6_icmp_type;
405 val ^= (__force u16)fl6->fl6_icmp_code;
406 break;
407 }
408 /* RFC6438 recommands to use flowlabel */
409 val ^= (__force u32)fl6->flowlabel;
410
411 /* Perhaps, we need to tune, this function? */
412 val = val ^ (val >> 7) ^ (val >> 12);
413 return val % candidate_count;
414 }
415
416 static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
417 struct flowi6 *fl6, int oif,
418 int strict)
419 {
420 struct rt6_info *sibling, *next_sibling;
421 int route_choosen;
422
423 route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6);
424 /* Don't change the route, if route_choosen == 0
425 * (siblings does not include ourself)
426 */
427 if (route_choosen)
428 list_for_each_entry_safe(sibling, next_sibling,
429 &match->rt6i_siblings, rt6i_siblings) {
430 route_choosen--;
431 if (route_choosen == 0) {
432 if (rt6_score_route(sibling, oif, strict) < 0)
433 break;
434 match = sibling;
435 break;
436 }
437 }
438 return match;
439 }
440
441 /*
442 * Route lookup. Any table->tb6_lock is implied.
443 */
444
445 static inline struct rt6_info *rt6_device_match(struct net *net,
446 struct rt6_info *rt,
447 const struct in6_addr *saddr,
448 int oif,
449 int flags)
450 {
451 struct rt6_info *local = NULL;
452 struct rt6_info *sprt;
453
454 if (!oif && ipv6_addr_any(saddr))
455 goto out;
456
457 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
458 struct net_device *dev = sprt->dst.dev;
459
460 if (oif) {
461 if (dev->ifindex == oif)
462 return sprt;
463 if (dev->flags & IFF_LOOPBACK) {
464 if (!sprt->rt6i_idev ||
465 sprt->rt6i_idev->dev->ifindex != oif) {
466 if (flags & RT6_LOOKUP_F_IFACE && oif)
467 continue;
468 if (local && (!oif ||
469 local->rt6i_idev->dev->ifindex == oif))
470 continue;
471 }
472 local = sprt;
473 }
474 } else {
475 if (ipv6_chk_addr(net, saddr, dev,
476 flags & RT6_LOOKUP_F_IFACE))
477 return sprt;
478 }
479 }
480
481 if (oif) {
482 if (local)
483 return local;
484
485 if (flags & RT6_LOOKUP_F_IFACE)
486 return net->ipv6.ip6_null_entry;
487 }
488 out:
489 return rt;
490 }
491
492 #ifdef CONFIG_IPV6_ROUTER_PREF
493 struct __rt6_probe_work {
494 struct work_struct work;
495 struct in6_addr target;
496 struct net_device *dev;
497 };
498
499 static void rt6_probe_deferred(struct work_struct *w)
500 {
501 struct in6_addr mcaddr;
502 struct __rt6_probe_work *work =
503 container_of(w, struct __rt6_probe_work, work);
504
505 addrconf_addr_solict_mult(&work->target, &mcaddr);
506 ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
507 dev_put(work->dev);
508 kfree(w);
509 }
510
511 static void rt6_probe(struct rt6_info *rt)
512 {
513 struct neighbour *neigh;
514 /*
515 * Okay, this does not seem to be appropriate
516 * for now, however, we need to check if it
517 * is really so; aka Router Reachability Probing.
518 *
519 * Router Reachability Probe MUST be rate-limited
520 * to no more than one per minute.
521 */
522 if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
523 return;
524 rcu_read_lock_bh();
525 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
526 if (neigh) {
527 write_lock(&neigh->lock);
528 if (neigh->nud_state & NUD_VALID)
529 goto out;
530 }
531
532 if (!neigh ||
533 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
534 struct __rt6_probe_work *work;
535
536 work = kmalloc(sizeof(*work), GFP_ATOMIC);
537
538 if (neigh && work)
539 __neigh_set_probe_once(neigh);
540
541 if (neigh)
542 write_unlock(&neigh->lock);
543
544 if (work) {
545 INIT_WORK(&work->work, rt6_probe_deferred);
546 work->target = rt->rt6i_gateway;
547 dev_hold(rt->dst.dev);
548 work->dev = rt->dst.dev;
549 schedule_work(&work->work);
550 }
551 } else {
552 out:
553 write_unlock(&neigh->lock);
554 }
555 rcu_read_unlock_bh();
556 }
557 #else
558 static inline void rt6_probe(struct rt6_info *rt)
559 {
560 }
561 #endif
562
563 /*
564 * Default Router Selection (RFC 2461 6.3.6)
565 */
566 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
567 {
568 struct net_device *dev = rt->dst.dev;
569 if (!oif || dev->ifindex == oif)
570 return 2;
571 if ((dev->flags & IFF_LOOPBACK) &&
572 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
573 return 1;
574 return 0;
575 }
576
577 static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
578 {
579 struct neighbour *neigh;
580 enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
581
582 if (rt->rt6i_flags & RTF_NONEXTHOP ||
583 !(rt->rt6i_flags & RTF_GATEWAY))
584 return RT6_NUD_SUCCEED;
585
586 rcu_read_lock_bh();
587 neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
588 if (neigh) {
589 read_lock(&neigh->lock);
590 if (neigh->nud_state & NUD_VALID)
591 ret = RT6_NUD_SUCCEED;
592 #ifdef CONFIG_IPV6_ROUTER_PREF
593 else if (!(neigh->nud_state & NUD_FAILED))
594 ret = RT6_NUD_SUCCEED;
595 else
596 ret = RT6_NUD_FAIL_PROBE;
597 #endif
598 read_unlock(&neigh->lock);
599 } else {
600 ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
601 RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;
602 }
603 rcu_read_unlock_bh();
604
605 return ret;
606 }
607
608 static int rt6_score_route(struct rt6_info *rt, int oif,
609 int strict)
610 {
611 int m;
612
613 m = rt6_check_dev(rt, oif);
614 if (!m && (strict & RT6_LOOKUP_F_IFACE))
615 return RT6_NUD_FAIL_HARD;
616 #ifdef CONFIG_IPV6_ROUTER_PREF
617 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
618 #endif
619 if (strict & RT6_LOOKUP_F_REACHABLE) {
620 int n = rt6_check_neigh(rt);
621 if (n < 0)
622 return n;
623 }
624 return m;
625 }
626
627 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
628 int *mpri, struct rt6_info *match,
629 bool *do_rr)
630 {
631 int m;
632 bool match_do_rr = false;
633
634 if (rt6_check_expired(rt))
635 goto out;
636
637 m = rt6_score_route(rt, oif, strict);
638 if (m == RT6_NUD_FAIL_DO_RR) {
639 match_do_rr = true;
640 m = 0; /* lowest valid score */
641 } else if (m == RT6_NUD_FAIL_HARD) {
642 goto out;
643 }
644
645 if (strict & RT6_LOOKUP_F_REACHABLE)
646 rt6_probe(rt);
647
648 /* note that m can be RT6_NUD_FAIL_PROBE at this point */
649 if (m > *mpri) {
650 *do_rr = match_do_rr;
651 *mpri = m;
652 match = rt;
653 }
654 out:
655 return match;
656 }
657
658 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
659 struct rt6_info *rr_head,
660 u32 metric, int oif, int strict,
661 bool *do_rr)
662 {
663 struct rt6_info *rt, *match;
664 int mpri = -1;
665
666 match = NULL;
667 for (rt = rr_head; rt && rt->rt6i_metric == metric;
668 rt = rt->dst.rt6_next)
669 match = find_match(rt, oif, strict, &mpri, match, do_rr);
670 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
671 rt = rt->dst.rt6_next)
672 match = find_match(rt, oif, strict, &mpri, match, do_rr);
673
674 return match;
675 }
676
677 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
678 {
679 struct rt6_info *match, *rt0;
680 struct net *net;
681 bool do_rr = false;
682
683 rt0 = fn->rr_ptr;
684 if (!rt0)
685 fn->rr_ptr = rt0 = fn->leaf;
686
687 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
688 &do_rr);
689
690 if (do_rr) {
691 struct rt6_info *next = rt0->dst.rt6_next;
692
693 /* no entries matched; do round-robin */
694 if (!next || next->rt6i_metric != rt0->rt6i_metric)
695 next = fn->leaf;
696
697 if (next != rt0)
698 fn->rr_ptr = next;
699 }
700
701 net = dev_net(rt0->dst.dev);
702 return match ? match : net->ipv6.ip6_null_entry;
703 }
704
705 #ifdef CONFIG_IPV6_ROUTE_INFO
706 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
707 const struct in6_addr *gwaddr)
708 {
709 struct net *net = dev_net(dev);
710 struct route_info *rinfo = (struct route_info *) opt;
711 struct in6_addr prefix_buf, *prefix;
712 unsigned int pref;
713 unsigned long lifetime;
714 struct rt6_info *rt;
715
716 if (len < sizeof(struct route_info)) {
717 return -EINVAL;
718 }
719
720 /* Sanity check for prefix_len and length */
721 if (rinfo->length > 3) {
722 return -EINVAL;
723 } else if (rinfo->prefix_len > 128) {
724 return -EINVAL;
725 } else if (rinfo->prefix_len > 64) {
726 if (rinfo->length < 2) {
727 return -EINVAL;
728 }
729 } else if (rinfo->prefix_len > 0) {
730 if (rinfo->length < 1) {
731 return -EINVAL;
732 }
733 }
734
735 pref = rinfo->route_pref;
736 if (pref == ICMPV6_ROUTER_PREF_INVALID)
737 return -EINVAL;
738
739 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
740
741 if (rinfo->length == 3)
742 prefix = (struct in6_addr *)rinfo->prefix;
743 else {
744 /* this function is safe */
745 ipv6_addr_prefix(&prefix_buf,
746 (struct in6_addr *)rinfo->prefix,
747 rinfo->prefix_len);
748 prefix = &prefix_buf;
749 }
750
751 if (rinfo->prefix_len == 0)
752 rt = rt6_get_dflt_router(gwaddr, dev);
753 else
754 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
755 gwaddr, dev->ifindex);
756
757 if (rt && !lifetime) {
758 ip6_del_rt(rt);
759 rt = NULL;
760 }
761
762 if (!rt && lifetime)
763 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
764 pref);
765 else if (rt)
766 rt->rt6i_flags = RTF_ROUTEINFO |
767 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
768
769 if (rt) {
770 if (!addrconf_finite_timeout(lifetime))
771 rt6_clean_expires(rt);
772 else
773 rt6_set_expires(rt, jiffies + HZ * lifetime);
774
775 ip6_rt_put(rt);
776 }
777 return 0;
778 }
779 #endif
780
781 #define BACKTRACK(__net, saddr) \
782 do { \
783 if (rt == __net->ipv6.ip6_null_entry) { \
784 struct fib6_node *pn; \
785 while (1) { \
786 if (fn->fn_flags & RTN_TL_ROOT) \
787 goto out; \
788 pn = fn->parent; \
789 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
790 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
791 else \
792 fn = pn; \
793 if (fn->fn_flags & RTN_RTINFO) \
794 goto restart; \
795 } \
796 } \
797 } while (0)
798
799 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
800 struct fib6_table *table,
801 struct flowi6 *fl6, int flags)
802 {
803 struct fib6_node *fn;
804 struct rt6_info *rt;
805
806 read_lock_bh(&table->tb6_lock);
807 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
808 restart:
809 rt = fn->leaf;
810 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
811 if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
812 rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
813 BACKTRACK(net, &fl6->saddr);
814 out:
815 dst_use(&rt->dst, jiffies);
816 read_unlock_bh(&table->tb6_lock);
817 return rt;
818
819 }
820
821 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
822 int flags)
823 {
824 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
825 }
826 EXPORT_SYMBOL_GPL(ip6_route_lookup);
827
828 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
829 const struct in6_addr *saddr, int oif, int strict)
830 {
831 struct flowi6 fl6 = {
832 .flowi6_oif = oif,
833 .daddr = *daddr,
834 };
835 struct dst_entry *dst;
836 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
837
838 if (saddr) {
839 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
840 flags |= RT6_LOOKUP_F_HAS_SADDR;
841 }
842
843 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
844 if (dst->error == 0)
845 return (struct rt6_info *) dst;
846
847 dst_release(dst);
848
849 return NULL;
850 }
851
852 EXPORT_SYMBOL(rt6_lookup);
853
854 /* ip6_ins_rt is called with FREE table->tb6_lock.
855 It takes new route entry, the addition fails by any reason the
856 route is freed. In any case, if caller does not hold it, it may
857 be destroyed.
858 */
859
860 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
861 {
862 int err;
863 struct fib6_table *table;
864
865 table = rt->rt6i_table;
866 write_lock_bh(&table->tb6_lock);
867 err = fib6_add(&table->tb6_root, rt, info);
868 write_unlock_bh(&table->tb6_lock);
869
870 return err;
871 }
872
873 int ip6_ins_rt(struct rt6_info *rt)
874 {
875 struct nl_info info = {
876 .nl_net = dev_net(rt->dst.dev),
877 };
878 return __ip6_ins_rt(rt, &info);
879 }
880
881 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
882 const struct in6_addr *daddr,
883 const struct in6_addr *saddr)
884 {
885 struct rt6_info *rt;
886
887 /*
888 * Clone the route.
889 */
890
891 rt = ip6_rt_copy(ort, daddr);
892
893 if (rt) {
894 if (ort->rt6i_dst.plen != 128 &&
895 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
896 rt->rt6i_flags |= RTF_ANYCAST;
897
898 rt->rt6i_flags |= RTF_CACHE;
899
900 #ifdef CONFIG_IPV6_SUBTREES
901 if (rt->rt6i_src.plen && saddr) {
902 rt->rt6i_src.addr = *saddr;
903 rt->rt6i_src.plen = 128;
904 }
905 #endif
906 }
907
908 return rt;
909 }
910
911 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
912 const struct in6_addr *daddr)
913 {
914 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
915
916 if (rt)
917 rt->rt6i_flags |= RTF_CACHE;
918 return rt;
919 }
920
921 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
922 struct flowi6 *fl6, int flags)
923 {
924 struct fib6_node *fn;
925 struct rt6_info *rt, *nrt;
926 int strict = 0;
927 int attempts = 3;
928 int err;
929 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
930
931 strict |= flags & RT6_LOOKUP_F_IFACE;
932
933 relookup:
934 read_lock_bh(&table->tb6_lock);
935
936 restart_2:
937 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
938
939 restart:
940 rt = rt6_select(fn, oif, strict | reachable);
941 if (rt->rt6i_nsiblings)
942 rt = rt6_multipath_select(rt, fl6, oif, strict | reachable);
943 BACKTRACK(net, &fl6->saddr);
944 if (rt == net->ipv6.ip6_null_entry ||
945 rt->rt6i_flags & RTF_CACHE)
946 goto out;
947
948 dst_hold(&rt->dst);
949 read_unlock_bh(&table->tb6_lock);
950
951 if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)))
952 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
953 else if (!(rt->dst.flags & DST_HOST))
954 nrt = rt6_alloc_clone(rt, &fl6->daddr);
955 else
956 goto out2;
957
958 ip6_rt_put(rt);
959 rt = nrt ? : net->ipv6.ip6_null_entry;
960
961 dst_hold(&rt->dst);
962 if (nrt) {
963 err = ip6_ins_rt(nrt);
964 if (!err)
965 goto out2;
966 }
967
968 if (--attempts <= 0)
969 goto out2;
970
971 /*
972 * Race condition! In the gap, when table->tb6_lock was
973 * released someone could insert this route. Relookup.
974 */
975 ip6_rt_put(rt);
976 goto relookup;
977
978 out:
979 if (reachable) {
980 reachable = 0;
981 goto restart_2;
982 }
983 dst_hold(&rt->dst);
984 read_unlock_bh(&table->tb6_lock);
985 out2:
986 rt->dst.lastuse = jiffies;
987 rt->dst.__use++;
988
989 return rt;
990 }
991
992 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
993 struct flowi6 *fl6, int flags)
994 {
995 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
996 }
997
998 static struct dst_entry *ip6_route_input_lookup(struct net *net,
999 struct net_device *dev,
1000 struct flowi6 *fl6, int flags)
1001 {
1002 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
1003 flags |= RT6_LOOKUP_F_IFACE;
1004
1005 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
1006 }
1007
1008 void ip6_route_input(struct sk_buff *skb)
1009 {
1010 const struct ipv6hdr *iph = ipv6_hdr(skb);
1011 struct net *net = dev_net(skb->dev);
1012 int flags = RT6_LOOKUP_F_HAS_SADDR;
1013 struct flowi6 fl6 = {
1014 .flowi6_iif = skb->dev->ifindex,
1015 .daddr = iph->daddr,
1016 .saddr = iph->saddr,
1017 .flowlabel = ip6_flowinfo(iph),
1018 .flowi6_mark = skb->mark,
1019 .flowi6_proto = iph->nexthdr,
1020 };
1021
1022 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
1023 }
1024
1025 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
1026 struct flowi6 *fl6, int flags)
1027 {
1028 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
1029 }
1030
1031 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
1032 struct flowi6 *fl6)
1033 {
1034 int flags = 0;
1035
1036 fl6->flowi6_iif = LOOPBACK_IFINDEX;
1037
1038 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
1039 flags |= RT6_LOOKUP_F_IFACE;
1040
1041 if (!ipv6_addr_any(&fl6->saddr))
1042 flags |= RT6_LOOKUP_F_HAS_SADDR;
1043 else if (sk)
1044 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
1045
1046 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
1047 }
1048
1049 EXPORT_SYMBOL(ip6_route_output);
1050
1051 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
1052 {
1053 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
1054 struct dst_entry *new = NULL;
1055
1056 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
1057 if (rt) {
1058 new = &rt->dst;
1059
1060 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
1061 rt6_init_peer(rt, net->ipv6.peers);
1062
1063 new->__use = 1;
1064 new->input = dst_discard;
1065 new->output = dst_discard;
1066
1067 if (dst_metrics_read_only(&ort->dst))
1068 new->_metrics = ort->dst._metrics;
1069 else
1070 dst_copy_metrics(new, &ort->dst);
1071 rt->rt6i_idev = ort->rt6i_idev;
1072 if (rt->rt6i_idev)
1073 in6_dev_hold(rt->rt6i_idev);
1074
1075 rt->rt6i_gateway = ort->rt6i_gateway;
1076 rt->rt6i_flags = ort->rt6i_flags;
1077 rt->rt6i_metric = 0;
1078
1079 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1080 #ifdef CONFIG_IPV6_SUBTREES
1081 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1082 #endif
1083
1084 dst_free(new);
1085 }
1086
1087 dst_release(dst_orig);
1088 return new ? new : ERR_PTR(-ENOMEM);
1089 }
1090
1091 /*
1092 * Destination cache support functions
1093 */
1094
1095 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1096 {
1097 struct rt6_info *rt;
1098
1099 rt = (struct rt6_info *) dst;
1100
1101 /* All IPV6 dsts are created with ->obsolete set to the value
1102 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1103 * into this function always.
1104 */
1105 if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))
1106 return NULL;
1107
1108 if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
1109 return NULL;
1110
1111 if (rt6_check_expired(rt))
1112 return NULL;
1113
1114 return dst;
1115 }
1116
1117 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1118 {
1119 struct rt6_info *rt = (struct rt6_info *) dst;
1120
1121 if (rt) {
1122 if (rt->rt6i_flags & RTF_CACHE) {
1123 if (rt6_check_expired(rt)) {
1124 ip6_del_rt(rt);
1125 dst = NULL;
1126 }
1127 } else {
1128 dst_release(dst);
1129 dst = NULL;
1130 }
1131 }
1132 return dst;
1133 }
1134
1135 static void ip6_link_failure(struct sk_buff *skb)
1136 {
1137 struct rt6_info *rt;
1138
1139 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1140
1141 rt = (struct rt6_info *) skb_dst(skb);
1142 if (rt) {
1143 if (rt->rt6i_flags & RTF_CACHE) {
1144 dst_hold(&rt->dst);
1145 if (ip6_del_rt(rt))
1146 dst_free(&rt->dst);
1147 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
1148 rt->rt6i_node->fn_sernum = -1;
1149 }
1150 }
1151 }
1152
1153 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1154 struct sk_buff *skb, u32 mtu)
1155 {
1156 struct rt6_info *rt6 = (struct rt6_info*)dst;
1157
1158 dst_confirm(dst);
1159 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1160 struct net *net = dev_net(dst->dev);
1161
1162 rt6->rt6i_flags |= RTF_MODIFIED;
1163 if (mtu < IPV6_MIN_MTU) {
1164 u32 features = dst_metric(dst, RTAX_FEATURES);
1165 mtu = IPV6_MIN_MTU;
1166 features |= RTAX_FEATURE_ALLFRAG;
1167 dst_metric_set(dst, RTAX_FEATURES, features);
1168 }
1169 dst_metric_set(dst, RTAX_MTU, mtu);
1170 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1171 }
1172 }
1173
1174 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1175 int oif, u32 mark)
1176 {
1177 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1178 struct dst_entry *dst;
1179 struct flowi6 fl6;
1180
1181 memset(&fl6, 0, sizeof(fl6));
1182 fl6.flowi6_oif = oif;
1183 fl6.flowi6_mark = mark;
1184 fl6.daddr = iph->daddr;
1185 fl6.saddr = iph->saddr;
1186 fl6.flowlabel = ip6_flowinfo(iph);
1187
1188 dst = ip6_route_output(net, NULL, &fl6);
1189 if (!dst->error)
1190 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1191 dst_release(dst);
1192 }
1193 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1194
1195 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1196 {
1197 ip6_update_pmtu(skb, sock_net(sk), mtu,
1198 sk->sk_bound_dev_if, sk->sk_mark);
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1201
1202 /* Handle redirects */
1203 struct ip6rd_flowi {
1204 struct flowi6 fl6;
1205 struct in6_addr gateway;
1206 };
1207
1208 static struct rt6_info *__ip6_route_redirect(struct net *net,
1209 struct fib6_table *table,
1210 struct flowi6 *fl6,
1211 int flags)
1212 {
1213 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1214 struct rt6_info *rt;
1215 struct fib6_node *fn;
1216
1217 /* Get the "current" route for this destination and
1218 * check if the redirect has come from approriate router.
1219 *
1220 * RFC 4861 specifies that redirects should only be
1221 * accepted if they come from the nexthop to the target.
1222 * Due to the way the routes are chosen, this notion
1223 * is a bit fuzzy and one might need to check all possible
1224 * routes.
1225 */
1226
1227 read_lock_bh(&table->tb6_lock);
1228 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1229 restart:
1230 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1231 if (rt6_check_expired(rt))
1232 continue;
1233 if (rt->dst.error)
1234 break;
1235 if (!(rt->rt6i_flags & RTF_GATEWAY))
1236 continue;
1237 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1238 continue;
1239 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1240 continue;
1241 break;
1242 }
1243
1244 if (!rt)
1245 rt = net->ipv6.ip6_null_entry;
1246 else if (rt->dst.error) {
1247 rt = net->ipv6.ip6_null_entry;
1248 goto out;
1249 }
1250 BACKTRACK(net, &fl6->saddr);
1251 out:
1252 dst_hold(&rt->dst);
1253
1254 read_unlock_bh(&table->tb6_lock);
1255
1256 return rt;
1257 };
1258
1259 static struct dst_entry *ip6_route_redirect(struct net *net,
1260 const struct flowi6 *fl6,
1261 const struct in6_addr *gateway)
1262 {
1263 int flags = RT6_LOOKUP_F_HAS_SADDR;
1264 struct ip6rd_flowi rdfl;
1265
1266 rdfl.fl6 = *fl6;
1267 rdfl.gateway = *gateway;
1268
1269 return fib6_rule_lookup(net, &rdfl.fl6,
1270 flags, __ip6_route_redirect);
1271 }
1272
1273 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1274 {
1275 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1276 struct dst_entry *dst;
1277 struct flowi6 fl6;
1278
1279 memset(&fl6, 0, sizeof(fl6));
1280 fl6.flowi6_oif = oif;
1281 fl6.flowi6_mark = mark;
1282 fl6.daddr = iph->daddr;
1283 fl6.saddr = iph->saddr;
1284 fl6.flowlabel = ip6_flowinfo(iph);
1285
1286 dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
1287 rt6_do_redirect(dst, NULL, skb);
1288 dst_release(dst);
1289 }
1290 EXPORT_SYMBOL_GPL(ip6_redirect);
1291
1292 void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
1293 u32 mark)
1294 {
1295 const struct ipv6hdr *iph = ipv6_hdr(skb);
1296 const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
1297 struct dst_entry *dst;
1298 struct flowi6 fl6;
1299
1300 memset(&fl6, 0, sizeof(fl6));
1301 fl6.flowi6_oif = oif;
1302 fl6.flowi6_mark = mark;
1303 fl6.daddr = msg->dest;
1304 fl6.saddr = iph->daddr;
1305
1306 dst = ip6_route_redirect(net, &fl6, &iph->saddr);
1307 rt6_do_redirect(dst, NULL, skb);
1308 dst_release(dst);
1309 }
1310
1311 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1312 {
1313 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1314 }
1315 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1316
1317 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1318 {
1319 struct net_device *dev = dst->dev;
1320 unsigned int mtu = dst_mtu(dst);
1321 struct net *net = dev_net(dev);
1322
1323 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1324
1325 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1326 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1327
1328 /*
1329 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1330 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1331 * IPV6_MAXPLEN is also valid and means: "any MSS,
1332 * rely only on pmtu discovery"
1333 */
1334 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1335 mtu = IPV6_MAXPLEN;
1336 return mtu;
1337 }
1338
1339 static unsigned int ip6_mtu(const struct dst_entry *dst)
1340 {
1341 struct inet6_dev *idev;
1342 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1343
1344 if (mtu)
1345 return mtu;
1346
1347 mtu = IPV6_MIN_MTU;
1348
1349 rcu_read_lock();
1350 idev = __in6_dev_get(dst->dev);
1351 if (idev)
1352 mtu = idev->cnf.mtu6;
1353 rcu_read_unlock();
1354
1355 return mtu;
1356 }
1357
1358 static struct dst_entry *icmp6_dst_gc_list;
1359 static DEFINE_SPINLOCK(icmp6_dst_lock);
1360
1361 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1362 struct flowi6 *fl6)
1363 {
1364 struct dst_entry *dst;
1365 struct rt6_info *rt;
1366 struct inet6_dev *idev = in6_dev_get(dev);
1367 struct net *net = dev_net(dev);
1368
1369 if (unlikely(!idev))
1370 return ERR_PTR(-ENODEV);
1371
1372 rt = ip6_dst_alloc(net, dev, 0, NULL);
1373 if (unlikely(!rt)) {
1374 in6_dev_put(idev);
1375 dst = ERR_PTR(-ENOMEM);
1376 goto out;
1377 }
1378
1379 rt->dst.flags |= DST_HOST;
1380 rt->dst.output = ip6_output;
1381 atomic_set(&rt->dst.__refcnt, 1);
1382 rt->rt6i_gateway = fl6->daddr;
1383 rt->rt6i_dst.addr = fl6->daddr;
1384 rt->rt6i_dst.plen = 128;
1385 rt->rt6i_idev = idev;
1386 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
1387
1388 spin_lock_bh(&icmp6_dst_lock);
1389 rt->dst.next = icmp6_dst_gc_list;
1390 icmp6_dst_gc_list = &rt->dst;
1391 spin_unlock_bh(&icmp6_dst_lock);
1392
1393 fib6_force_start_gc(net);
1394
1395 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1396
1397 out:
1398 return dst;
1399 }
1400
1401 int icmp6_dst_gc(void)
1402 {
1403 struct dst_entry *dst, **pprev;
1404 int more = 0;
1405
1406 spin_lock_bh(&icmp6_dst_lock);
1407 pprev = &icmp6_dst_gc_list;
1408
1409 while ((dst = *pprev) != NULL) {
1410 if (!atomic_read(&dst->__refcnt)) {
1411 *pprev = dst->next;
1412 dst_free(dst);
1413 } else {
1414 pprev = &dst->next;
1415 ++more;
1416 }
1417 }
1418
1419 spin_unlock_bh(&icmp6_dst_lock);
1420
1421 return more;
1422 }
1423
1424 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1425 void *arg)
1426 {
1427 struct dst_entry *dst, **pprev;
1428
1429 spin_lock_bh(&icmp6_dst_lock);
1430 pprev = &icmp6_dst_gc_list;
1431 while ((dst = *pprev) != NULL) {
1432 struct rt6_info *rt = (struct rt6_info *) dst;
1433 if (func(rt, arg)) {
1434 *pprev = dst->next;
1435 dst_free(dst);
1436 } else {
1437 pprev = &dst->next;
1438 }
1439 }
1440 spin_unlock_bh(&icmp6_dst_lock);
1441 }
1442
1443 static int ip6_dst_gc(struct dst_ops *ops)
1444 {
1445 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1446 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1447 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1448 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1449 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1450 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1451 int entries;
1452
1453 entries = dst_entries_get_fast(ops);
1454 if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
1455 entries <= rt_max_size)
1456 goto out;
1457
1458 net->ipv6.ip6_rt_gc_expire++;
1459 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
1460 entries = dst_entries_get_slow(ops);
1461 if (entries < ops->gc_thresh)
1462 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1463 out:
1464 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1465 return entries > rt_max_size;
1466 }
1467
1468 /*
1469 *
1470 */
1471
1472 int ip6_route_add(struct fib6_config *cfg)
1473 {
1474 int err;
1475 struct net *net = cfg->fc_nlinfo.nl_net;
1476 struct rt6_info *rt = NULL;
1477 struct net_device *dev = NULL;
1478 struct inet6_dev *idev = NULL;
1479 struct fib6_table *table;
1480 int addr_type;
1481
1482 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1483 return -EINVAL;
1484 #ifndef CONFIG_IPV6_SUBTREES
1485 if (cfg->fc_src_len)
1486 return -EINVAL;
1487 #endif
1488 if (cfg->fc_ifindex) {
1489 err = -ENODEV;
1490 dev = dev_get_by_index(net, cfg->fc_ifindex);
1491 if (!dev)
1492 goto out;
1493 idev = in6_dev_get(dev);
1494 if (!idev)
1495 goto out;
1496 }
1497
1498 if (cfg->fc_metric == 0)
1499 cfg->fc_metric = IP6_RT_PRIO_USER;
1500
1501 err = -ENOBUFS;
1502 if (cfg->fc_nlinfo.nlh &&
1503 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1504 table = fib6_get_table(net, cfg->fc_table);
1505 if (!table) {
1506 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1507 table = fib6_new_table(net, cfg->fc_table);
1508 }
1509 } else {
1510 table = fib6_new_table(net, cfg->fc_table);
1511 }
1512
1513 if (!table)
1514 goto out;
1515
1516 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1517
1518 if (!rt) {
1519 err = -ENOMEM;
1520 goto out;
1521 }
1522
1523 if (cfg->fc_flags & RTF_EXPIRES)
1524 rt6_set_expires(rt, jiffies +
1525 clock_t_to_jiffies(cfg->fc_expires));
1526 else
1527 rt6_clean_expires(rt);
1528
1529 if (cfg->fc_protocol == RTPROT_UNSPEC)
1530 cfg->fc_protocol = RTPROT_BOOT;
1531 rt->rt6i_protocol = cfg->fc_protocol;
1532
1533 addr_type = ipv6_addr_type(&cfg->fc_dst);
1534
1535 if (addr_type & IPV6_ADDR_MULTICAST)
1536 rt->dst.input = ip6_mc_input;
1537 else if (cfg->fc_flags & RTF_LOCAL)
1538 rt->dst.input = ip6_input;
1539 else
1540 rt->dst.input = ip6_forward;
1541
1542 rt->dst.output = ip6_output;
1543
1544 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1545 rt->rt6i_dst.plen = cfg->fc_dst_len;
1546 if (rt->rt6i_dst.plen == 128)
1547 rt->dst.flags |= DST_HOST;
1548
1549 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1550 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1551 if (!metrics) {
1552 err = -ENOMEM;
1553 goto out;
1554 }
1555 dst_init_metrics(&rt->dst, metrics, 0);
1556 }
1557 #ifdef CONFIG_IPV6_SUBTREES
1558 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1559 rt->rt6i_src.plen = cfg->fc_src_len;
1560 #endif
1561
1562 rt->rt6i_metric = cfg->fc_metric;
1563
1564 /* We cannot add true routes via loopback here,
1565 they would result in kernel looping; promote them to reject routes
1566 */
1567 if ((cfg->fc_flags & RTF_REJECT) ||
1568 (dev && (dev->flags & IFF_LOOPBACK) &&
1569 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1570 !(cfg->fc_flags & RTF_LOCAL))) {
1571 /* hold loopback dev/idev if we haven't done so. */
1572 if (dev != net->loopback_dev) {
1573 if (dev) {
1574 dev_put(dev);
1575 in6_dev_put(idev);
1576 }
1577 dev = net->loopback_dev;
1578 dev_hold(dev);
1579 idev = in6_dev_get(dev);
1580 if (!idev) {
1581 err = -ENODEV;
1582 goto out;
1583 }
1584 }
1585 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1586 switch (cfg->fc_type) {
1587 case RTN_BLACKHOLE:
1588 rt->dst.error = -EINVAL;
1589 rt->dst.output = dst_discard;
1590 rt->dst.input = dst_discard;
1591 break;
1592 case RTN_PROHIBIT:
1593 rt->dst.error = -EACCES;
1594 rt->dst.output = ip6_pkt_prohibit_out;
1595 rt->dst.input = ip6_pkt_prohibit;
1596 break;
1597 case RTN_THROW:
1598 default:
1599 rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
1600 : -ENETUNREACH;
1601 rt->dst.output = ip6_pkt_discard_out;
1602 rt->dst.input = ip6_pkt_discard;
1603 break;
1604 }
1605 goto install_route;
1606 }
1607
1608 if (cfg->fc_flags & RTF_GATEWAY) {
1609 const struct in6_addr *gw_addr;
1610 int gwa_type;
1611
1612 gw_addr = &cfg->fc_gateway;
1613 rt->rt6i_gateway = *gw_addr;
1614 gwa_type = ipv6_addr_type(gw_addr);
1615
1616 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1617 struct rt6_info *grt;
1618
1619 /* IPv6 strictly inhibits using not link-local
1620 addresses as nexthop address.
1621 Otherwise, router will not able to send redirects.
1622 It is very good, but in some (rare!) circumstances
1623 (SIT, PtP, NBMA NOARP links) it is handy to allow
1624 some exceptions. --ANK
1625 */
1626 err = -EINVAL;
1627 if (!(gwa_type & IPV6_ADDR_UNICAST))
1628 goto out;
1629
1630 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1631
1632 err = -EHOSTUNREACH;
1633 if (!grt)
1634 goto out;
1635 if (dev) {
1636 if (dev != grt->dst.dev) {
1637 ip6_rt_put(grt);
1638 goto out;
1639 }
1640 } else {
1641 dev = grt->dst.dev;
1642 idev = grt->rt6i_idev;
1643 dev_hold(dev);
1644 in6_dev_hold(grt->rt6i_idev);
1645 }
1646 if (!(grt->rt6i_flags & RTF_GATEWAY))
1647 err = 0;
1648 ip6_rt_put(grt);
1649
1650 if (err)
1651 goto out;
1652 }
1653 err = -EINVAL;
1654 if (!dev || (dev->flags & IFF_LOOPBACK))
1655 goto out;
1656 }
1657
1658 err = -ENODEV;
1659 if (!dev)
1660 goto out;
1661
1662 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1663 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1664 err = -EINVAL;
1665 goto out;
1666 }
1667 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1668 rt->rt6i_prefsrc.plen = 128;
1669 } else
1670 rt->rt6i_prefsrc.plen = 0;
1671
1672 rt->rt6i_flags = cfg->fc_flags;
1673
1674 install_route:
1675 if (cfg->fc_mx) {
1676 struct nlattr *nla;
1677 int remaining;
1678
1679 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1680 int type = nla_type(nla);
1681
1682 if (type) {
1683 if (type > RTAX_MAX) {
1684 err = -EINVAL;
1685 goto out;
1686 }
1687
1688 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1689 }
1690 }
1691 }
1692
1693 rt->dst.dev = dev;
1694 rt->rt6i_idev = idev;
1695 rt->rt6i_table = table;
1696
1697 cfg->fc_nlinfo.nl_net = dev_net(dev);
1698
1699 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1700
1701 out:
1702 if (dev)
1703 dev_put(dev);
1704 if (idev)
1705 in6_dev_put(idev);
1706 if (rt)
1707 dst_free(&rt->dst);
1708 return err;
1709 }
1710
1711 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1712 {
1713 int err;
1714 struct fib6_table *table;
1715 struct net *net = dev_net(rt->dst.dev);
1716
1717 if (rt == net->ipv6.ip6_null_entry) {
1718 err = -ENOENT;
1719 goto out;
1720 }
1721
1722 table = rt->rt6i_table;
1723 write_lock_bh(&table->tb6_lock);
1724 err = fib6_del(rt, info);
1725 write_unlock_bh(&table->tb6_lock);
1726
1727 out:
1728 ip6_rt_put(rt);
1729 return err;
1730 }
1731
1732 int ip6_del_rt(struct rt6_info *rt)
1733 {
1734 struct nl_info info = {
1735 .nl_net = dev_net(rt->dst.dev),
1736 };
1737 return __ip6_del_rt(rt, &info);
1738 }
1739
1740 static int ip6_route_del(struct fib6_config *cfg)
1741 {
1742 struct fib6_table *table;
1743 struct fib6_node *fn;
1744 struct rt6_info *rt;
1745 int err = -ESRCH;
1746
1747 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1748 if (!table)
1749 return err;
1750
1751 read_lock_bh(&table->tb6_lock);
1752
1753 fn = fib6_locate(&table->tb6_root,
1754 &cfg->fc_dst, cfg->fc_dst_len,
1755 &cfg->fc_src, cfg->fc_src_len);
1756
1757 if (fn) {
1758 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1759 if (cfg->fc_ifindex &&
1760 (!rt->dst.dev ||
1761 rt->dst.dev->ifindex != cfg->fc_ifindex))
1762 continue;
1763 if (cfg->fc_flags & RTF_GATEWAY &&
1764 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1765 continue;
1766 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1767 continue;
1768 dst_hold(&rt->dst);
1769 read_unlock_bh(&table->tb6_lock);
1770
1771 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1772 }
1773 }
1774 read_unlock_bh(&table->tb6_lock);
1775
1776 return err;
1777 }
1778
1779 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1780 {
1781 struct net *net = dev_net(skb->dev);
1782 struct netevent_redirect netevent;
1783 struct rt6_info *rt, *nrt = NULL;
1784 struct ndisc_options ndopts;
1785 struct inet6_dev *in6_dev;
1786 struct neighbour *neigh;
1787 struct rd_msg *msg;
1788 int optlen, on_link;
1789 u8 *lladdr;
1790
1791 optlen = skb_tail_pointer(skb) - skb_transport_header(skb);
1792 optlen -= sizeof(*msg);
1793
1794 if (optlen < 0) {
1795 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1796 return;
1797 }
1798
1799 msg = (struct rd_msg *)icmp6_hdr(skb);
1800
1801 if (ipv6_addr_is_multicast(&msg->dest)) {
1802 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1803 return;
1804 }
1805
1806 on_link = 0;
1807 if (ipv6_addr_equal(&msg->dest, &msg->target)) {
1808 on_link = 1;
1809 } else if (ipv6_addr_type(&msg->target) !=
1810 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1811 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1812 return;
1813 }
1814
1815 in6_dev = __in6_dev_get(skb->dev);
1816 if (!in6_dev)
1817 return;
1818 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1819 return;
1820
1821 /* RFC2461 8.1:
1822 * The IP source address of the Redirect MUST be the same as the current
1823 * first-hop router for the specified ICMP Destination Address.
1824 */
1825
1826 if (!ndisc_parse_options(msg->opt, optlen, &ndopts)) {
1827 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1828 return;
1829 }
1830
1831 lladdr = NULL;
1832 if (ndopts.nd_opts_tgt_lladdr) {
1833 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1834 skb->dev);
1835 if (!lladdr) {
1836 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1837 return;
1838 }
1839 }
1840
1841 rt = (struct rt6_info *) dst;
1842 if (rt == net->ipv6.ip6_null_entry) {
1843 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1844 return;
1845 }
1846
1847 /* Redirect received -> path was valid.
1848 * Look, redirects are sent only in response to data packets,
1849 * so that this nexthop apparently is reachable. --ANK
1850 */
1851 dst_confirm(&rt->dst);
1852
1853 neigh = __neigh_lookup(&nd_tbl, &msg->target, skb->dev, 1);
1854 if (!neigh)
1855 return;
1856
1857 /*
1858 * We have finally decided to accept it.
1859 */
1860
1861 neigh_update(neigh, lladdr, NUD_STALE,
1862 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1863 NEIGH_UPDATE_F_OVERRIDE|
1864 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1865 NEIGH_UPDATE_F_ISROUTER))
1866 );
1867
1868 nrt = ip6_rt_copy(rt, &msg->dest);
1869 if (!nrt)
1870 goto out;
1871
1872 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1873 if (on_link)
1874 nrt->rt6i_flags &= ~RTF_GATEWAY;
1875
1876 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1877
1878 if (ip6_ins_rt(nrt))
1879 goto out;
1880
1881 netevent.old = &rt->dst;
1882 netevent.new = &nrt->dst;
1883 netevent.daddr = &msg->dest;
1884 netevent.neigh = neigh;
1885 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1886
1887 if (rt->rt6i_flags & RTF_CACHE) {
1888 rt = (struct rt6_info *) dst_clone(&rt->dst);
1889 ip6_del_rt(rt);
1890 }
1891
1892 out:
1893 neigh_release(neigh);
1894 }
1895
1896 /*
1897 * Misc support functions
1898 */
1899
1900 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1901 const struct in6_addr *dest)
1902 {
1903 struct net *net = dev_net(ort->dst.dev);
1904 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1905 ort->rt6i_table);
1906
1907 if (rt) {
1908 rt->dst.input = ort->dst.input;
1909 rt->dst.output = ort->dst.output;
1910 rt->dst.flags |= DST_HOST;
1911
1912 rt->rt6i_dst.addr = *dest;
1913 rt->rt6i_dst.plen = 128;
1914 dst_copy_metrics(&rt->dst, &ort->dst);
1915 rt->dst.error = ort->dst.error;
1916 rt->rt6i_idev = ort->rt6i_idev;
1917 if (rt->rt6i_idev)
1918 in6_dev_hold(rt->rt6i_idev);
1919 rt->dst.lastuse = jiffies;
1920
1921 if (ort->rt6i_flags & RTF_GATEWAY)
1922 rt->rt6i_gateway = ort->rt6i_gateway;
1923 else
1924 rt->rt6i_gateway = *dest;
1925 rt->rt6i_flags = ort->rt6i_flags;
1926 rt6_set_from(rt, ort);
1927 rt->rt6i_metric = 0;
1928
1929 #ifdef CONFIG_IPV6_SUBTREES
1930 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1931 #endif
1932 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1933 rt->rt6i_table = ort->rt6i_table;
1934 }
1935 return rt;
1936 }
1937
1938 #ifdef CONFIG_IPV6_ROUTE_INFO
1939 static struct rt6_info *rt6_get_route_info(struct net *net,
1940 const struct in6_addr *prefix, int prefixlen,
1941 const struct in6_addr *gwaddr, int ifindex)
1942 {
1943 struct fib6_node *fn;
1944 struct rt6_info *rt = NULL;
1945 struct fib6_table *table;
1946
1947 table = fib6_get_table(net, RT6_TABLE_INFO);
1948 if (!table)
1949 return NULL;
1950
1951 read_lock_bh(&table->tb6_lock);
1952 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1953 if (!fn)
1954 goto out;
1955
1956 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1957 if (rt->dst.dev->ifindex != ifindex)
1958 continue;
1959 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1960 continue;
1961 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1962 continue;
1963 dst_hold(&rt->dst);
1964 break;
1965 }
1966 out:
1967 read_unlock_bh(&table->tb6_lock);
1968 return rt;
1969 }
1970
1971 static struct rt6_info *rt6_add_route_info(struct net *net,
1972 const struct in6_addr *prefix, int prefixlen,
1973 const struct in6_addr *gwaddr, int ifindex,
1974 unsigned int pref)
1975 {
1976 struct fib6_config cfg = {
1977 .fc_table = RT6_TABLE_INFO,
1978 .fc_metric = IP6_RT_PRIO_USER,
1979 .fc_ifindex = ifindex,
1980 .fc_dst_len = prefixlen,
1981 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1982 RTF_UP | RTF_PREF(pref),
1983 .fc_nlinfo.portid = 0,
1984 .fc_nlinfo.nlh = NULL,
1985 .fc_nlinfo.nl_net = net,
1986 };
1987
1988 cfg.fc_dst = *prefix;
1989 cfg.fc_gateway = *gwaddr;
1990
1991 /* We should treat it as a default route if prefix length is 0. */
1992 if (!prefixlen)
1993 cfg.fc_flags |= RTF_DEFAULT;
1994
1995 ip6_route_add(&cfg);
1996
1997 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1998 }
1999 #endif
2000
2001 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
2002 {
2003 struct rt6_info *rt;
2004 struct fib6_table *table;
2005
2006 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
2007 if (!table)
2008 return NULL;
2009
2010 read_lock_bh(&table->tb6_lock);
2011 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
2012 if (dev == rt->dst.dev &&
2013 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
2014 ipv6_addr_equal(&rt->rt6i_gateway, addr))
2015 break;
2016 }
2017 if (rt)
2018 dst_hold(&rt->dst);
2019 read_unlock_bh(&table->tb6_lock);
2020 return rt;
2021 }
2022
2023 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
2024 struct net_device *dev,
2025 unsigned int pref)
2026 {
2027 struct fib6_config cfg = {
2028 .fc_table = RT6_TABLE_DFLT,
2029 .fc_metric = IP6_RT_PRIO_USER,
2030 .fc_ifindex = dev->ifindex,
2031 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
2032 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
2033 .fc_nlinfo.portid = 0,
2034 .fc_nlinfo.nlh = NULL,
2035 .fc_nlinfo.nl_net = dev_net(dev),
2036 };
2037
2038 cfg.fc_gateway = *gwaddr;
2039
2040 ip6_route_add(&cfg);
2041
2042 return rt6_get_dflt_router(gwaddr, dev);
2043 }
2044
2045 void rt6_purge_dflt_routers(struct net *net)
2046 {
2047 struct rt6_info *rt;
2048 struct fib6_table *table;
2049
2050 /* NOTE: Keep consistent with rt6_get_dflt_router */
2051 table = fib6_get_table(net, RT6_TABLE_DFLT);
2052 if (!table)
2053 return;
2054
2055 restart:
2056 read_lock_bh(&table->tb6_lock);
2057 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
2058 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
2059 (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
2060 dst_hold(&rt->dst);
2061 read_unlock_bh(&table->tb6_lock);
2062 ip6_del_rt(rt);
2063 goto restart;
2064 }
2065 }
2066 read_unlock_bh(&table->tb6_lock);
2067 }
2068
2069 static void rtmsg_to_fib6_config(struct net *net,
2070 struct in6_rtmsg *rtmsg,
2071 struct fib6_config *cfg)
2072 {
2073 memset(cfg, 0, sizeof(*cfg));
2074
2075 cfg->fc_table = RT6_TABLE_MAIN;
2076 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
2077 cfg->fc_metric = rtmsg->rtmsg_metric;
2078 cfg->fc_expires = rtmsg->rtmsg_info;
2079 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
2080 cfg->fc_src_len = rtmsg->rtmsg_src_len;
2081 cfg->fc_flags = rtmsg->rtmsg_flags;
2082
2083 cfg->fc_nlinfo.nl_net = net;
2084
2085 cfg->fc_dst = rtmsg->rtmsg_dst;
2086 cfg->fc_src = rtmsg->rtmsg_src;
2087 cfg->fc_gateway = rtmsg->rtmsg_gateway;
2088 }
2089
2090 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
2091 {
2092 struct fib6_config cfg;
2093 struct in6_rtmsg rtmsg;
2094 int err;
2095
2096 switch(cmd) {
2097 case SIOCADDRT: /* Add a route */
2098 case SIOCDELRT: /* Delete a route */
2099 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
2100 return -EPERM;
2101 err = copy_from_user(&rtmsg, arg,
2102 sizeof(struct in6_rtmsg));
2103 if (err)
2104 return -EFAULT;
2105
2106 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
2107
2108 rtnl_lock();
2109 switch (cmd) {
2110 case SIOCADDRT:
2111 err = ip6_route_add(&cfg);
2112 break;
2113 case SIOCDELRT:
2114 err = ip6_route_del(&cfg);
2115 break;
2116 default:
2117 err = -EINVAL;
2118 }
2119 rtnl_unlock();
2120
2121 return err;
2122 }
2123
2124 return -EINVAL;
2125 }
2126
2127 /*
2128 * Drop the packet on the floor
2129 */
2130
2131 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2132 {
2133 int type;
2134 struct dst_entry *dst = skb_dst(skb);
2135 switch (ipstats_mib_noroutes) {
2136 case IPSTATS_MIB_INNOROUTES:
2137 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2138 if (type == IPV6_ADDR_ANY) {
2139 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2140 IPSTATS_MIB_INADDRERRORS);
2141 break;
2142 }
2143 /* FALLTHROUGH */
2144 case IPSTATS_MIB_OUTNOROUTES:
2145 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2146 ipstats_mib_noroutes);
2147 break;
2148 }
2149 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2150 kfree_skb(skb);
2151 return 0;
2152 }
2153
2154 static int ip6_pkt_discard(struct sk_buff *skb)
2155 {
2156 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2157 }
2158
2159 static int ip6_pkt_discard_out(struct sk_buff *skb)
2160 {
2161 skb->dev = skb_dst(skb)->dev;
2162 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2163 }
2164
2165 static int ip6_pkt_prohibit(struct sk_buff *skb)
2166 {
2167 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2168 }
2169
2170 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2171 {
2172 skb->dev = skb_dst(skb)->dev;
2173 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2174 }
2175
2176 /*
2177 * Allocate a dst for local (unicast / anycast) address.
2178 */
2179
2180 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2181 const struct in6_addr *addr,
2182 bool anycast)
2183 {
2184 struct net *net = dev_net(idev->dev);
2185 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
2186 DST_NOCOUNT, NULL);
2187 if (!rt)
2188 return ERR_PTR(-ENOMEM);
2189
2190 in6_dev_hold(idev);
2191
2192 rt->dst.flags |= DST_HOST;
2193 rt->dst.input = ip6_input;
2194 rt->dst.output = ip6_output;
2195 rt->rt6i_idev = idev;
2196
2197 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2198 if (anycast)
2199 rt->rt6i_flags |= RTF_ANYCAST;
2200 else
2201 rt->rt6i_flags |= RTF_LOCAL;
2202
2203 rt->rt6i_gateway = *addr;
2204 rt->rt6i_dst.addr = *addr;
2205 rt->rt6i_dst.plen = 128;
2206 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2207
2208 atomic_set(&rt->dst.__refcnt, 1);
2209
2210 return rt;
2211 }
2212
2213 int ip6_route_get_saddr(struct net *net,
2214 struct rt6_info *rt,
2215 const struct in6_addr *daddr,
2216 unsigned int prefs,
2217 struct in6_addr *saddr)
2218 {
2219 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2220 int err = 0;
2221 if (rt->rt6i_prefsrc.plen)
2222 *saddr = rt->rt6i_prefsrc.addr;
2223 else
2224 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2225 daddr, prefs, saddr);
2226 return err;
2227 }
2228
2229 /* remove deleted ip from prefsrc entries */
2230 struct arg_dev_net_ip {
2231 struct net_device *dev;
2232 struct net *net;
2233 struct in6_addr *addr;
2234 };
2235
2236 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2237 {
2238 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2239 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2240 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2241
2242 if (((void *)rt->dst.dev == dev || !dev) &&
2243 rt != net->ipv6.ip6_null_entry &&
2244 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2245 /* remove prefsrc entry */
2246 rt->rt6i_prefsrc.plen = 0;
2247 }
2248 return 0;
2249 }
2250
2251 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2252 {
2253 struct net *net = dev_net(ifp->idev->dev);
2254 struct arg_dev_net_ip adni = {
2255 .dev = ifp->idev->dev,
2256 .net = net,
2257 .addr = &ifp->addr,
2258 };
2259 fib6_clean_all(net, fib6_remove_prefsrc, &adni);
2260 }
2261
2262 struct arg_dev_net {
2263 struct net_device *dev;
2264 struct net *net;
2265 };
2266
2267 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2268 {
2269 const struct arg_dev_net *adn = arg;
2270 const struct net_device *dev = adn->dev;
2271
2272 if ((rt->dst.dev == dev || !dev) &&
2273 rt != adn->net->ipv6.ip6_null_entry)
2274 return -1;
2275
2276 return 0;
2277 }
2278
2279 void rt6_ifdown(struct net *net, struct net_device *dev)
2280 {
2281 struct arg_dev_net adn = {
2282 .dev = dev,
2283 .net = net,
2284 };
2285
2286 fib6_clean_all(net, fib6_ifdown, &adn);
2287 icmp6_clean_all(fib6_ifdown, &adn);
2288 }
2289
2290 struct rt6_mtu_change_arg {
2291 struct net_device *dev;
2292 unsigned int mtu;
2293 };
2294
2295 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2296 {
2297 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2298 struct inet6_dev *idev;
2299
2300 /* In IPv6 pmtu discovery is not optional,
2301 so that RTAX_MTU lock cannot disable it.
2302 We still use this lock to block changes
2303 caused by addrconf/ndisc.
2304 */
2305
2306 idev = __in6_dev_get(arg->dev);
2307 if (!idev)
2308 return 0;
2309
2310 /* For administrative MTU increase, there is no way to discover
2311 IPv6 PMTU increase, so PMTU increase should be updated here.
2312 Since RFC 1981 doesn't include administrative MTU increase
2313 update PMTU increase is a MUST. (i.e. jumbo frame)
2314 */
2315 /*
2316 If new MTU is less than route PMTU, this new MTU will be the
2317 lowest MTU in the path, update the route PMTU to reflect PMTU
2318 decreases; if new MTU is greater than route PMTU, and the
2319 old MTU is the lowest MTU in the path, update the route PMTU
2320 to reflect the increase. In this case if the other nodes' MTU
2321 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2322 PMTU discouvery.
2323 */
2324 if (rt->dst.dev == arg->dev &&
2325 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2326 (dst_mtu(&rt->dst) >= arg->mtu ||
2327 (dst_mtu(&rt->dst) < arg->mtu &&
2328 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2329 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2330 }
2331 return 0;
2332 }
2333
2334 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2335 {
2336 struct rt6_mtu_change_arg arg = {
2337 .dev = dev,
2338 .mtu = mtu,
2339 };
2340
2341 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);
2342 }
2343
2344 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2345 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2346 [RTA_OIF] = { .type = NLA_U32 },
2347 [RTA_IIF] = { .type = NLA_U32 },
2348 [RTA_PRIORITY] = { .type = NLA_U32 },
2349 [RTA_METRICS] = { .type = NLA_NESTED },
2350 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
2351 };
2352
2353 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2354 struct fib6_config *cfg)
2355 {
2356 struct rtmsg *rtm;
2357 struct nlattr *tb[RTA_MAX+1];
2358 int err;
2359
2360 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2361 if (err < 0)
2362 goto errout;
2363
2364 err = -EINVAL;
2365 rtm = nlmsg_data(nlh);
2366 memset(cfg, 0, sizeof(*cfg));
2367
2368 cfg->fc_table = rtm->rtm_table;
2369 cfg->fc_dst_len = rtm->rtm_dst_len;
2370 cfg->fc_src_len = rtm->rtm_src_len;
2371 cfg->fc_flags = RTF_UP;
2372 cfg->fc_protocol = rtm->rtm_protocol;
2373 cfg->fc_type = rtm->rtm_type;
2374
2375 if (rtm->rtm_type == RTN_UNREACHABLE ||
2376 rtm->rtm_type == RTN_BLACKHOLE ||
2377 rtm->rtm_type == RTN_PROHIBIT ||
2378 rtm->rtm_type == RTN_THROW)
2379 cfg->fc_flags |= RTF_REJECT;
2380
2381 if (rtm->rtm_type == RTN_LOCAL)
2382 cfg->fc_flags |= RTF_LOCAL;
2383
2384 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2385 cfg->fc_nlinfo.nlh = nlh;
2386 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2387
2388 if (tb[RTA_GATEWAY]) {
2389 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2390 cfg->fc_flags |= RTF_GATEWAY;
2391 }
2392
2393 if (tb[RTA_DST]) {
2394 int plen = (rtm->rtm_dst_len + 7) >> 3;
2395
2396 if (nla_len(tb[RTA_DST]) < plen)
2397 goto errout;
2398
2399 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2400 }
2401
2402 if (tb[RTA_SRC]) {
2403 int plen = (rtm->rtm_src_len + 7) >> 3;
2404
2405 if (nla_len(tb[RTA_SRC]) < plen)
2406 goto errout;
2407
2408 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2409 }
2410
2411 if (tb[RTA_PREFSRC])
2412 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2413
2414 if (tb[RTA_OIF])
2415 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2416
2417 if (tb[RTA_PRIORITY])
2418 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2419
2420 if (tb[RTA_METRICS]) {
2421 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2422 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2423 }
2424
2425 if (tb[RTA_TABLE])
2426 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2427
2428 if (tb[RTA_MULTIPATH]) {
2429 cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
2430 cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
2431 }
2432
2433 err = 0;
2434 errout:
2435 return err;
2436 }
2437
2438 static int ip6_route_multipath(struct fib6_config *cfg, int add)
2439 {
2440 struct fib6_config r_cfg;
2441 struct rtnexthop *rtnh;
2442 int remaining;
2443 int attrlen;
2444 int err = 0, last_err = 0;
2445
2446 beginning:
2447 rtnh = (struct rtnexthop *)cfg->fc_mp;
2448 remaining = cfg->fc_mp_len;
2449
2450 /* Parse a Multipath Entry */
2451 while (rtnh_ok(rtnh, remaining)) {
2452 memcpy(&r_cfg, cfg, sizeof(*cfg));
2453 if (rtnh->rtnh_ifindex)
2454 r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
2455
2456 attrlen = rtnh_attrlen(rtnh);
2457 if (attrlen > 0) {
2458 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
2459
2460 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
2461 if (nla) {
2462 nla_memcpy(&r_cfg.fc_gateway, nla, 16);
2463 r_cfg.fc_flags |= RTF_GATEWAY;
2464 }
2465 }
2466 err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
2467 if (err) {
2468 last_err = err;
2469 /* If we are trying to remove a route, do not stop the
2470 * loop when ip6_route_del() fails (because next hop is
2471 * already gone), we should try to remove all next hops.
2472 */
2473 if (add) {
2474 /* If add fails, we should try to delete all
2475 * next hops that have been already added.
2476 */
2477 add = 0;
2478 goto beginning;
2479 }
2480 }
2481 /* Because each route is added like a single route we remove
2482 * this flag after the first nexthop (if there is a collision,
2483 * we have already fail to add the first nexthop:
2484 * fib6_add_rt2node() has reject it).
2485 */
2486 cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
2487 rtnh = rtnh_next(rtnh, &remaining);
2488 }
2489
2490 return last_err;
2491 }
2492
2493 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2494 {
2495 struct fib6_config cfg;
2496 int err;
2497
2498 err = rtm_to_fib6_config(skb, nlh, &cfg);
2499 if (err < 0)
2500 return err;
2501
2502 if (cfg.fc_mp)
2503 return ip6_route_multipath(&cfg, 0);
2504 else
2505 return ip6_route_del(&cfg);
2506 }
2507
2508 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh)
2509 {
2510 struct fib6_config cfg;
2511 int err;
2512
2513 err = rtm_to_fib6_config(skb, nlh, &cfg);
2514 if (err < 0)
2515 return err;
2516
2517 if (cfg.fc_mp)
2518 return ip6_route_multipath(&cfg, 1);
2519 else
2520 return ip6_route_add(&cfg);
2521 }
2522
2523 static inline size_t rt6_nlmsg_size(void)
2524 {
2525 return NLMSG_ALIGN(sizeof(struct rtmsg))
2526 + nla_total_size(16) /* RTA_SRC */
2527 + nla_total_size(16) /* RTA_DST */
2528 + nla_total_size(16) /* RTA_GATEWAY */
2529 + nla_total_size(16) /* RTA_PREFSRC */
2530 + nla_total_size(4) /* RTA_TABLE */
2531 + nla_total_size(4) /* RTA_IIF */
2532 + nla_total_size(4) /* RTA_OIF */
2533 + nla_total_size(4) /* RTA_PRIORITY */
2534 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2535 + nla_total_size(sizeof(struct rta_cacheinfo));
2536 }
2537
2538 static int rt6_fill_node(struct net *net,
2539 struct sk_buff *skb, struct rt6_info *rt,
2540 struct in6_addr *dst, struct in6_addr *src,
2541 int iif, int type, u32 portid, u32 seq,
2542 int prefix, int nowait, unsigned int flags)
2543 {
2544 struct rtmsg *rtm;
2545 struct nlmsghdr *nlh;
2546 long expires;
2547 u32 table;
2548
2549 if (prefix) { /* user wants prefix routes only */
2550 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2551 /* success since this is not a prefix route */
2552 return 1;
2553 }
2554 }
2555
2556 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2557 if (!nlh)
2558 return -EMSGSIZE;
2559
2560 rtm = nlmsg_data(nlh);
2561 rtm->rtm_family = AF_INET6;
2562 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2563 rtm->rtm_src_len = rt->rt6i_src.plen;
2564 rtm->rtm_tos = 0;
2565 if (rt->rt6i_table)
2566 table = rt->rt6i_table->tb6_id;
2567 else
2568 table = RT6_TABLE_UNSPEC;
2569 rtm->rtm_table = table;
2570 if (nla_put_u32(skb, RTA_TABLE, table))
2571 goto nla_put_failure;
2572 if (rt->rt6i_flags & RTF_REJECT) {
2573 switch (rt->dst.error) {
2574 case -EINVAL:
2575 rtm->rtm_type = RTN_BLACKHOLE;
2576 break;
2577 case -EACCES:
2578 rtm->rtm_type = RTN_PROHIBIT;
2579 break;
2580 case -EAGAIN:
2581 rtm->rtm_type = RTN_THROW;
2582 break;
2583 default:
2584 rtm->rtm_type = RTN_UNREACHABLE;
2585 break;
2586 }
2587 }
2588 else if (rt->rt6i_flags & RTF_LOCAL)
2589 rtm->rtm_type = RTN_LOCAL;
2590 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2591 rtm->rtm_type = RTN_LOCAL;
2592 else
2593 rtm->rtm_type = RTN_UNICAST;
2594 rtm->rtm_flags = 0;
2595 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2596 rtm->rtm_protocol = rt->rt6i_protocol;
2597 if (rt->rt6i_flags & RTF_DYNAMIC)
2598 rtm->rtm_protocol = RTPROT_REDIRECT;
2599 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2600 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2601 rtm->rtm_protocol = RTPROT_RA;
2602 else
2603 rtm->rtm_protocol = RTPROT_KERNEL;
2604 }
2605
2606 if (rt->rt6i_flags & RTF_CACHE)
2607 rtm->rtm_flags |= RTM_F_CLONED;
2608
2609 if (dst) {
2610 if (nla_put(skb, RTA_DST, 16, dst))
2611 goto nla_put_failure;
2612 rtm->rtm_dst_len = 128;
2613 } else if (rtm->rtm_dst_len)
2614 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2615 goto nla_put_failure;
2616 #ifdef CONFIG_IPV6_SUBTREES
2617 if (src) {
2618 if (nla_put(skb, RTA_SRC, 16, src))
2619 goto nla_put_failure;
2620 rtm->rtm_src_len = 128;
2621 } else if (rtm->rtm_src_len &&
2622 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2623 goto nla_put_failure;
2624 #endif
2625 if (iif) {
2626 #ifdef CONFIG_IPV6_MROUTE
2627 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2628 int err = ip6mr_get_route(net, skb, rtm, nowait);
2629 if (err <= 0) {
2630 if (!nowait) {
2631 if (err == 0)
2632 return 0;
2633 goto nla_put_failure;
2634 } else {
2635 if (err == -EMSGSIZE)
2636 goto nla_put_failure;
2637 }
2638 }
2639 } else
2640 #endif
2641 if (nla_put_u32(skb, RTA_IIF, iif))
2642 goto nla_put_failure;
2643 } else if (dst) {
2644 struct in6_addr saddr_buf;
2645 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2646 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2647 goto nla_put_failure;
2648 }
2649
2650 if (rt->rt6i_prefsrc.plen) {
2651 struct in6_addr saddr_buf;
2652 saddr_buf = rt->rt6i_prefsrc.addr;
2653 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2654 goto nla_put_failure;
2655 }
2656
2657 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2658 goto nla_put_failure;
2659
2660 if (rt->rt6i_flags & RTF_GATEWAY) {
2661 if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0)
2662 goto nla_put_failure;
2663 }
2664
2665 if (rt->dst.dev &&
2666 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2667 goto nla_put_failure;
2668 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2669 goto nla_put_failure;
2670
2671 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2672
2673 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2674 goto nla_put_failure;
2675
2676 return nlmsg_end(skb, nlh);
2677
2678 nla_put_failure:
2679 nlmsg_cancel(skb, nlh);
2680 return -EMSGSIZE;
2681 }
2682
2683 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2684 {
2685 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2686 int prefix;
2687
2688 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2689 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2690 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2691 } else
2692 prefix = 0;
2693
2694 return rt6_fill_node(arg->net,
2695 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2696 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2697 prefix, 0, NLM_F_MULTI);
2698 }
2699
2700 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)
2701 {
2702 struct net *net = sock_net(in_skb->sk);
2703 struct nlattr *tb[RTA_MAX+1];
2704 struct rt6_info *rt;
2705 struct sk_buff *skb;
2706 struct rtmsg *rtm;
2707 struct flowi6 fl6;
2708 int err, iif = 0, oif = 0;
2709
2710 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2711 if (err < 0)
2712 goto errout;
2713
2714 err = -EINVAL;
2715 memset(&fl6, 0, sizeof(fl6));
2716
2717 if (tb[RTA_SRC]) {
2718 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2719 goto errout;
2720
2721 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2722 }
2723
2724 if (tb[RTA_DST]) {
2725 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2726 goto errout;
2727
2728 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2729 }
2730
2731 if (tb[RTA_IIF])
2732 iif = nla_get_u32(tb[RTA_IIF]);
2733
2734 if (tb[RTA_OIF])
2735 oif = nla_get_u32(tb[RTA_OIF]);
2736
2737 if (iif) {
2738 struct net_device *dev;
2739 int flags = 0;
2740
2741 dev = __dev_get_by_index(net, iif);
2742 if (!dev) {
2743 err = -ENODEV;
2744 goto errout;
2745 }
2746
2747 fl6.flowi6_iif = iif;
2748
2749 if (!ipv6_addr_any(&fl6.saddr))
2750 flags |= RT6_LOOKUP_F_HAS_SADDR;
2751
2752 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2753 flags);
2754 } else {
2755 fl6.flowi6_oif = oif;
2756
2757 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2758 }
2759
2760 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2761 if (!skb) {
2762 ip6_rt_put(rt);
2763 err = -ENOBUFS;
2764 goto errout;
2765 }
2766
2767 /* Reserve room for dummy headers, this skb can pass
2768 through good chunk of routing engine.
2769 */
2770 skb_reset_mac_header(skb);
2771 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2772
2773 skb_dst_set(skb, &rt->dst);
2774
2775 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2776 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2777 nlh->nlmsg_seq, 0, 0, 0);
2778 if (err < 0) {
2779 kfree_skb(skb);
2780 goto errout;
2781 }
2782
2783 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2784 errout:
2785 return err;
2786 }
2787
2788 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2789 {
2790 struct sk_buff *skb;
2791 struct net *net = info->nl_net;
2792 u32 seq;
2793 int err;
2794
2795 err = -ENOBUFS;
2796 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2797
2798 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2799 if (!skb)
2800 goto errout;
2801
2802 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2803 event, info->portid, seq, 0, 0, 0);
2804 if (err < 0) {
2805 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2806 WARN_ON(err == -EMSGSIZE);
2807 kfree_skb(skb);
2808 goto errout;
2809 }
2810 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2811 info->nlh, gfp_any());
2812 return;
2813 errout:
2814 if (err < 0)
2815 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2816 }
2817
2818 static int ip6_route_dev_notify(struct notifier_block *this,
2819 unsigned long event, void *ptr)
2820 {
2821 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2822 struct net *net = dev_net(dev);
2823
2824 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2825 net->ipv6.ip6_null_entry->dst.dev = dev;
2826 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2827 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2828 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2829 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2830 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2831 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2832 #endif
2833 }
2834
2835 return NOTIFY_OK;
2836 }
2837
2838 /*
2839 * /proc
2840 */
2841
2842 #ifdef CONFIG_PROC_FS
2843
2844 static const struct file_operations ipv6_route_proc_fops = {
2845 .owner = THIS_MODULE,
2846 .open = ipv6_route_open,
2847 .read = seq_read,
2848 .llseek = seq_lseek,
2849 .release = seq_release_net,
2850 };
2851
2852 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2853 {
2854 struct net *net = (struct net *)seq->private;
2855 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2856 net->ipv6.rt6_stats->fib_nodes,
2857 net->ipv6.rt6_stats->fib_route_nodes,
2858 net->ipv6.rt6_stats->fib_rt_alloc,
2859 net->ipv6.rt6_stats->fib_rt_entries,
2860 net->ipv6.rt6_stats->fib_rt_cache,
2861 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2862 net->ipv6.rt6_stats->fib_discarded_routes);
2863
2864 return 0;
2865 }
2866
2867 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2868 {
2869 return single_open_net(inode, file, rt6_stats_seq_show);
2870 }
2871
2872 static const struct file_operations rt6_stats_seq_fops = {
2873 .owner = THIS_MODULE,
2874 .open = rt6_stats_seq_open,
2875 .read = seq_read,
2876 .llseek = seq_lseek,
2877 .release = single_release_net,
2878 };
2879 #endif /* CONFIG_PROC_FS */
2880
2881 #ifdef CONFIG_SYSCTL
2882
2883 static
2884 int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
2885 void __user *buffer, size_t *lenp, loff_t *ppos)
2886 {
2887 struct net *net;
2888 int delay;
2889 if (!write)
2890 return -EINVAL;
2891
2892 net = (struct net *)ctl->extra1;
2893 delay = net->ipv6.sysctl.flush_delay;
2894 proc_dointvec(ctl, write, buffer, lenp, ppos);
2895 fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
2896 return 0;
2897 }
2898
2899 struct ctl_table ipv6_route_table_template[] = {
2900 {
2901 .procname = "flush",
2902 .data = &init_net.ipv6.sysctl.flush_delay,
2903 .maxlen = sizeof(int),
2904 .mode = 0200,
2905 .proc_handler = ipv6_sysctl_rtcache_flush
2906 },
2907 {
2908 .procname = "gc_thresh",
2909 .data = &ip6_dst_ops_template.gc_thresh,
2910 .maxlen = sizeof(int),
2911 .mode = 0644,
2912 .proc_handler = proc_dointvec,
2913 },
2914 {
2915 .procname = "max_size",
2916 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2917 .maxlen = sizeof(int),
2918 .mode = 0644,
2919 .proc_handler = proc_dointvec,
2920 },
2921 {
2922 .procname = "gc_min_interval",
2923 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2924 .maxlen = sizeof(int),
2925 .mode = 0644,
2926 .proc_handler = proc_dointvec_jiffies,
2927 },
2928 {
2929 .procname = "gc_timeout",
2930 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2931 .maxlen = sizeof(int),
2932 .mode = 0644,
2933 .proc_handler = proc_dointvec_jiffies,
2934 },
2935 {
2936 .procname = "gc_interval",
2937 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2938 .maxlen = sizeof(int),
2939 .mode = 0644,
2940 .proc_handler = proc_dointvec_jiffies,
2941 },
2942 {
2943 .procname = "gc_elasticity",
2944 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2945 .maxlen = sizeof(int),
2946 .mode = 0644,
2947 .proc_handler = proc_dointvec,
2948 },
2949 {
2950 .procname = "mtu_expires",
2951 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2952 .maxlen = sizeof(int),
2953 .mode = 0644,
2954 .proc_handler = proc_dointvec_jiffies,
2955 },
2956 {
2957 .procname = "min_adv_mss",
2958 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2959 .maxlen = sizeof(int),
2960 .mode = 0644,
2961 .proc_handler = proc_dointvec,
2962 },
2963 {
2964 .procname = "gc_min_interval_ms",
2965 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2966 .maxlen = sizeof(int),
2967 .mode = 0644,
2968 .proc_handler = proc_dointvec_ms_jiffies,
2969 },
2970 { }
2971 };
2972
2973 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2974 {
2975 struct ctl_table *table;
2976
2977 table = kmemdup(ipv6_route_table_template,
2978 sizeof(ipv6_route_table_template),
2979 GFP_KERNEL);
2980
2981 if (table) {
2982 table[0].data = &net->ipv6.sysctl.flush_delay;
2983 table[0].extra1 = net;
2984 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2985 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2986 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2987 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2988 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2989 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2990 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2991 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2992 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2993
2994 /* Don't export sysctls to unprivileged users */
2995 if (net->user_ns != &init_user_ns)
2996 table[0].procname = NULL;
2997 }
2998
2999 return table;
3000 }
3001 #endif
3002
3003 static int __net_init ip6_route_net_init(struct net *net)
3004 {
3005 int ret = -ENOMEM;
3006
3007 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
3008 sizeof(net->ipv6.ip6_dst_ops));
3009
3010 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
3011 goto out_ip6_dst_ops;
3012
3013 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
3014 sizeof(*net->ipv6.ip6_null_entry),
3015 GFP_KERNEL);
3016 if (!net->ipv6.ip6_null_entry)
3017 goto out_ip6_dst_entries;
3018 net->ipv6.ip6_null_entry->dst.path =
3019 (struct dst_entry *)net->ipv6.ip6_null_entry;
3020 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3021 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
3022 ip6_template_metrics, true);
3023
3024 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3025 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
3026 sizeof(*net->ipv6.ip6_prohibit_entry),
3027 GFP_KERNEL);
3028 if (!net->ipv6.ip6_prohibit_entry)
3029 goto out_ip6_null_entry;
3030 net->ipv6.ip6_prohibit_entry->dst.path =
3031 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
3032 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3033 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
3034 ip6_template_metrics, true);
3035
3036 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
3037 sizeof(*net->ipv6.ip6_blk_hole_entry),
3038 GFP_KERNEL);
3039 if (!net->ipv6.ip6_blk_hole_entry)
3040 goto out_ip6_prohibit_entry;
3041 net->ipv6.ip6_blk_hole_entry->dst.path =
3042 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
3043 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
3044 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
3045 ip6_template_metrics, true);
3046 #endif
3047
3048 net->ipv6.sysctl.flush_delay = 0;
3049 net->ipv6.sysctl.ip6_rt_max_size = 4096;
3050 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
3051 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
3052 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
3053 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
3054 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
3055 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
3056
3057 net->ipv6.ip6_rt_gc_expire = 30*HZ;
3058
3059 ret = 0;
3060 out:
3061 return ret;
3062
3063 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3064 out_ip6_prohibit_entry:
3065 kfree(net->ipv6.ip6_prohibit_entry);
3066 out_ip6_null_entry:
3067 kfree(net->ipv6.ip6_null_entry);
3068 #endif
3069 out_ip6_dst_entries:
3070 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3071 out_ip6_dst_ops:
3072 goto out;
3073 }
3074
3075 static void __net_exit ip6_route_net_exit(struct net *net)
3076 {
3077 kfree(net->ipv6.ip6_null_entry);
3078 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3079 kfree(net->ipv6.ip6_prohibit_entry);
3080 kfree(net->ipv6.ip6_blk_hole_entry);
3081 #endif
3082 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
3083 }
3084
3085 static int __net_init ip6_route_net_init_late(struct net *net)
3086 {
3087 #ifdef CONFIG_PROC_FS
3088 proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
3089 proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
3090 #endif
3091 return 0;
3092 }
3093
3094 static void __net_exit ip6_route_net_exit_late(struct net *net)
3095 {
3096 #ifdef CONFIG_PROC_FS
3097 remove_proc_entry("ipv6_route", net->proc_net);
3098 remove_proc_entry("rt6_stats", net->proc_net);
3099 #endif
3100 }
3101
3102 static struct pernet_operations ip6_route_net_ops = {
3103 .init = ip6_route_net_init,
3104 .exit = ip6_route_net_exit,
3105 };
3106
3107 static int __net_init ipv6_inetpeer_init(struct net *net)
3108 {
3109 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
3110
3111 if (!bp)
3112 return -ENOMEM;
3113 inet_peer_base_init(bp);
3114 net->ipv6.peers = bp;
3115 return 0;
3116 }
3117
3118 static void __net_exit ipv6_inetpeer_exit(struct net *net)
3119 {
3120 struct inet_peer_base *bp = net->ipv6.peers;
3121
3122 net->ipv6.peers = NULL;
3123 inetpeer_invalidate_tree(bp);
3124 kfree(bp);
3125 }
3126
3127 static struct pernet_operations ipv6_inetpeer_ops = {
3128 .init = ipv6_inetpeer_init,
3129 .exit = ipv6_inetpeer_exit,
3130 };
3131
3132 static struct pernet_operations ip6_route_net_late_ops = {
3133 .init = ip6_route_net_init_late,
3134 .exit = ip6_route_net_exit_late,
3135 };
3136
3137 static struct notifier_block ip6_route_dev_notifier = {
3138 .notifier_call = ip6_route_dev_notify,
3139 .priority = 0,
3140 };
3141
3142 int __init ip6_route_init(void)
3143 {
3144 int ret;
3145
3146 ret = -ENOMEM;
3147 ip6_dst_ops_template.kmem_cachep =
3148 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3149 SLAB_HWCACHE_ALIGN, NULL);
3150 if (!ip6_dst_ops_template.kmem_cachep)
3151 goto out;
3152
3153 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3154 if (ret)
3155 goto out_kmem_cache;
3156
3157 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3158 if (ret)
3159 goto out_dst_entries;
3160
3161 ret = register_pernet_subsys(&ip6_route_net_ops);
3162 if (ret)
3163 goto out_register_inetpeer;
3164
3165 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3166
3167 /* Registering of the loopback is done before this portion of code,
3168 * the loopback reference in rt6_info will not be taken, do it
3169 * manually for init_net */
3170 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3171 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3172 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3173 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3174 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3175 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3176 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3177 #endif
3178 ret = fib6_init();
3179 if (ret)
3180 goto out_register_subsys;
3181
3182 ret = xfrm6_init();
3183 if (ret)
3184 goto out_fib6_init;
3185
3186 ret = fib6_rules_init();
3187 if (ret)
3188 goto xfrm6_init;
3189
3190 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3191 if (ret)
3192 goto fib6_rules_init;
3193
3194 ret = -ENOBUFS;
3195 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3196 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3197 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3198 goto out_register_late_subsys;
3199
3200 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3201 if (ret)
3202 goto out_register_late_subsys;
3203
3204 out:
3205 return ret;
3206
3207 out_register_late_subsys:
3208 unregister_pernet_subsys(&ip6_route_net_late_ops);
3209 fib6_rules_init:
3210 fib6_rules_cleanup();
3211 xfrm6_init:
3212 xfrm6_fini();
3213 out_fib6_init:
3214 fib6_gc_cleanup();
3215 out_register_subsys:
3216 unregister_pernet_subsys(&ip6_route_net_ops);
3217 out_register_inetpeer:
3218 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3219 out_dst_entries:
3220 dst_entries_destroy(&ip6_dst_blackhole_ops);
3221 out_kmem_cache:
3222 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3223 goto out;
3224 }
3225
3226 void ip6_route_cleanup(void)
3227 {
3228 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3229 unregister_pernet_subsys(&ip6_route_net_late_ops);
3230 fib6_rules_cleanup();
3231 xfrm6_fini();
3232 fib6_gc_cleanup();
3233 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3234 unregister_pernet_subsys(&ip6_route_net_ops);
3235 dst_entries_destroy(&ip6_dst_blackhole_ops);
3236 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3237 }