]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/route.c
Merge branch 'viafb-next' of git://github.com/schandinat/linux-2.6
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101 {
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128 }
129
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
145 };
146
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148 {
149 return 0;
150 }
151
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153 {
154 }
155
156 static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
158 .protocol = cpu_to_be16(ETH_P_IPV6),
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
161 .default_mtu = ip6_blackhole_default_mtu,
162 .default_advmss = ip6_default_advmss,
163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
164 };
165
166 static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168 };
169
170 static struct rt6_info ip6_null_entry_template = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_protocol = RTPROT_KERNEL,
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183 };
184
185 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
187 static int ip6_pkt_prohibit(struct sk_buff *skb);
188 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
189
190 static struct rt6_info ip6_prohibit_entry_template = {
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
200 .rt6i_protocol = RTPROT_KERNEL,
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203 };
204
205 static struct rt6_info ip6_blk_hole_entry_template = {
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
211 .input = dst_discard,
212 .output = dst_discard,
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
215 .rt6i_protocol = RTPROT_KERNEL,
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218 };
219
220 #endif
221
222 /* allocate dst with ip6_dst_ops */
223 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
224 {
225 return (struct rt6_info *)dst_alloc(ops, 0);
226 }
227
228 static void ip6_dst_destroy(struct dst_entry *dst)
229 {
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
232 struct inet_peer *peer = rt->rt6i_peer;
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
237 }
238 if (peer) {
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242 }
243
244 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246 static u32 rt6_peer_genid(void)
247 {
248 return atomic_read(&__rt6_peer_genid);
249 }
250
251 void rt6_bind_peer(struct rt6_info *rt, int create)
252 {
253 struct inet_peer *peer;
254
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
260 }
261
262 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264 {
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
267 struct net_device *loopback_dev =
268 dev_net(dev)->loopback_dev;
269
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278 }
279
280 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281 {
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
284 }
285
286 static inline int rt6_need_strict(struct in6_addr *daddr)
287 {
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
290 }
291
292 /*
293 * Route lookup. Any table->tb6_lock is implied.
294 */
295
296 static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
298 struct in6_addr *saddr,
299 int oif,
300 int flags)
301 {
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
317 if (flags & RT6_LOOKUP_F_IFACE && oif)
318 continue;
319 if (local && (!oif ||
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
329 }
330 }
331
332 if (oif) {
333 if (local)
334 return local;
335
336 if (flags & RT6_LOOKUP_F_IFACE)
337 return net->ipv6.ip6_null_entry;
338 }
339 out:
340 return rt;
341 }
342
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 static void rt6_probe(struct rt6_info *rt)
345 {
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371 }
372 #else
373 static inline void rt6_probe(struct rt6_info *rt)
374 {
375 }
376 #endif
377
378 /*
379 * Default Router Selection (RFC 2461 6.3.6)
380 */
381 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
382 {
383 struct net_device *dev = rt->rt6i_dev;
384 if (!oif || dev->ifindex == oif)
385 return 2;
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
390 }
391
392 static inline int rt6_check_neigh(struct rt6_info *rt)
393 {
394 struct neighbour *neigh = rt->rt6i_nexthop;
395 int m;
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
402 m = 2;
403 #ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406 #endif
407 else
408 m = 1;
409 read_unlock_bh(&neigh->lock);
410 } else
411 m = 0;
412 return m;
413 }
414
415 static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
417 {
418 int m, n;
419
420 m = rt6_check_dev(rt, oif);
421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
422 return -1;
423 #ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425 #endif
426 n = rt6_check_neigh(rt);
427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
428 return -1;
429 return m;
430 }
431
432 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
434 {
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453 out:
454 return match;
455 }
456
457 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460 {
461 struct rt6_info *rt, *match;
462 int mpri = -1;
463
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
466 rt = rt->dst.rt6_next)
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
469 rt = rt->dst.rt6_next)
470 match = find_match(rt, oif, strict, &mpri, match);
471
472 return match;
473 }
474
475 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476 {
477 struct rt6_info *match, *rt0;
478 struct net *net;
479
480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
481 __func__, fn->leaf, oif);
482
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
486
487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
488
489 if (!match &&
490 (strict & RT6_LOOKUP_F_REACHABLE)) {
491 struct rt6_info *next = rt0->dst.rt6_next;
492
493 /* no entries matched; do round-robin */
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
499 }
500
501 RT6_TRACE("%s() => %p\n",
502 __func__, match);
503
504 net = dev_net(rt0->rt6i_dev);
505 return match ? match : net->ipv6.ip6_null_entry;
506 }
507
508 #ifdef CONFIG_IPV6_ROUTE_INFO
509 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511 {
512 struct net *net = dev_net(dev);
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
516 unsigned long lifetime;
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
540 return -EINVAL;
541
542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
556
557 if (rt && !lifetime) {
558 ip6_del_rt(rt);
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
570 if (!addrconf_finite_timeout(lifetime)) {
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
576 dst_release(&rt->dst);
577 }
578 return 0;
579 }
580 #endif
581
582 #define BACKTRACK(__net, saddr) \
583 do { \
584 if (rt == __net->ipv6.ip6_null_entry) { \
585 struct fib6_node *pn; \
586 while (1) { \
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
596 } \
597 } \
598 } while(0)
599
600 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
602 struct flowi6 *fl6, int flags)
603 {
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
609 restart:
610 rt = fn->leaf;
611 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
612 BACKTRACK(net, &fl6->saddr);
613 out:
614 dst_use(&rt->dst, jiffies);
615 read_unlock_bh(&table->tb6_lock);
616 return rt;
617
618 }
619
620 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
622 {
623 struct flowi6 fl6 = {
624 .flowi6_oif = oif,
625 .daddr = *daddr,
626 };
627 struct dst_entry *dst;
628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
629
630 if (saddr) {
631 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
635 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
641 return NULL;
642 }
643
644 EXPORT_SYMBOL(rt6_lookup);
645
646 /* ip6_ins_rt is called with FREE table->tb6_lock.
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
652 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
653 {
654 int err;
655 struct fib6_table *table;
656
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
659 err = fib6_add(&table->tb6_root, rt, info);
660 write_unlock_bh(&table->tb6_lock);
661
662 return err;
663 }
664
665 int ip6_ins_rt(struct rt6_info *rt)
666 {
667 struct nl_info info = {
668 .nl_net = dev_net(rt->rt6i_dev),
669 };
670 return __ip6_ins_rt(rt, &info);
671 }
672
673 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
675 {
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
693 }
694
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
698 rt->dst.flags |= DST_HOST;
699
700 #ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705 #endif
706
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
731 "ipv6: Neighbour table overflow.\n");
732 dst_free(&rt->dst);
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
736
737 }
738
739 return rt;
740 }
741
742 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743 {
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
749 rt->dst.flags |= DST_HOST;
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753 }
754
755 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi6 *fl6, int flags)
757 {
758 struct fib6_node *fn;
759 struct rt6_info *rt, *nrt;
760 int strict = 0;
761 int attempts = 3;
762 int err;
763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
764
765 strict |= flags & RT6_LOOKUP_F_IFACE;
766
767 relookup:
768 read_lock_bh(&table->tb6_lock);
769
770 restart_2:
771 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
772
773 restart:
774 rt = rt6_select(fn, oif, strict | reachable);
775
776 BACKTRACK(net, &fl6->saddr);
777 if (rt == net->ipv6.ip6_null_entry ||
778 rt->rt6i_flags & RTF_CACHE)
779 goto out;
780
781 dst_hold(&rt->dst);
782 read_unlock_bh(&table->tb6_lock);
783
784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
785 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
786 else if (!(rt->dst.flags & DST_HOST))
787 nrt = rt6_alloc_clone(rt, &fl6->daddr);
788 else
789 goto out2;
790
791 dst_release(&rt->dst);
792 rt = nrt ? : net->ipv6.ip6_null_entry;
793
794 dst_hold(&rt->dst);
795 if (nrt) {
796 err = ip6_ins_rt(nrt);
797 if (!err)
798 goto out2;
799 }
800
801 if (--attempts <= 0)
802 goto out2;
803
804 /*
805 * Race condition! In the gap, when table->tb6_lock was
806 * released someone could insert this route. Relookup.
807 */
808 dst_release(&rt->dst);
809 goto relookup;
810
811 out:
812 if (reachable) {
813 reachable = 0;
814 goto restart_2;
815 }
816 dst_hold(&rt->dst);
817 read_unlock_bh(&table->tb6_lock);
818 out2:
819 rt->dst.lastuse = jiffies;
820 rt->dst.__use++;
821
822 return rt;
823 }
824
825 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
826 struct flowi6 *fl6, int flags)
827 {
828 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
829 }
830
831 void ip6_route_input(struct sk_buff *skb)
832 {
833 struct ipv6hdr *iph = ipv6_hdr(skb);
834 struct net *net = dev_net(skb->dev);
835 int flags = RT6_LOOKUP_F_HAS_SADDR;
836 struct flowi6 fl6 = {
837 .flowi6_iif = skb->dev->ifindex,
838 .daddr = iph->daddr,
839 .saddr = iph->saddr,
840 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
841 .flowi6_mark = skb->mark,
842 .flowi6_proto = iph->nexthdr,
843 };
844
845 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
846 flags |= RT6_LOOKUP_F_IFACE;
847
848 skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
849 }
850
851 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
852 struct flowi6 *fl6, int flags)
853 {
854 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
855 }
856
857 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
858 struct flowi6 *fl6)
859 {
860 int flags = 0;
861
862 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
863 flags |= RT6_LOOKUP_F_IFACE;
864
865 if (!ipv6_addr_any(&fl6->saddr))
866 flags |= RT6_LOOKUP_F_HAS_SADDR;
867 else if (sk)
868 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
869
870 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
871 }
872
873 EXPORT_SYMBOL(ip6_route_output);
874
875 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
876 {
877 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
878 struct rt6_info *ort = (struct rt6_info *) dst_orig;
879 struct dst_entry *new = NULL;
880
881 if (rt) {
882 new = &rt->dst;
883
884 new->__use = 1;
885 new->input = dst_discard;
886 new->output = dst_discard;
887
888 dst_copy_metrics(new, &ort->dst);
889 new->dev = ort->dst.dev;
890 if (new->dev)
891 dev_hold(new->dev);
892 rt->rt6i_idev = ort->rt6i_idev;
893 if (rt->rt6i_idev)
894 in6_dev_hold(rt->rt6i_idev);
895 rt->rt6i_expires = 0;
896
897 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
898 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
899 rt->rt6i_metric = 0;
900
901 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
902 #ifdef CONFIG_IPV6_SUBTREES
903 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
904 #endif
905
906 dst_free(new);
907 }
908
909 dst_release(dst_orig);
910 return new ? new : ERR_PTR(-ENOMEM);
911 }
912
913 /*
914 * Destination cache support functions
915 */
916
917 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
918 {
919 struct rt6_info *rt;
920
921 rt = (struct rt6_info *) dst;
922
923 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
924 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
925 if (!rt->rt6i_peer)
926 rt6_bind_peer(rt, 0);
927 rt->rt6i_peer_genid = rt6_peer_genid();
928 }
929 return dst;
930 }
931 return NULL;
932 }
933
934 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
935 {
936 struct rt6_info *rt = (struct rt6_info *) dst;
937
938 if (rt) {
939 if (rt->rt6i_flags & RTF_CACHE) {
940 if (rt6_check_expired(rt)) {
941 ip6_del_rt(rt);
942 dst = NULL;
943 }
944 } else {
945 dst_release(dst);
946 dst = NULL;
947 }
948 }
949 return dst;
950 }
951
952 static void ip6_link_failure(struct sk_buff *skb)
953 {
954 struct rt6_info *rt;
955
956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
957
958 rt = (struct rt6_info *) skb_dst(skb);
959 if (rt) {
960 if (rt->rt6i_flags&RTF_CACHE) {
961 dst_set_expires(&rt->dst, 0);
962 rt->rt6i_flags |= RTF_EXPIRES;
963 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
964 rt->rt6i_node->fn_sernum = -1;
965 }
966 }
967
968 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
969 {
970 struct rt6_info *rt6 = (struct rt6_info*)dst;
971
972 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
973 rt6->rt6i_flags |= RTF_MODIFIED;
974 if (mtu < IPV6_MIN_MTU) {
975 u32 features = dst_metric(dst, RTAX_FEATURES);
976 mtu = IPV6_MIN_MTU;
977 features |= RTAX_FEATURE_ALLFRAG;
978 dst_metric_set(dst, RTAX_FEATURES, features);
979 }
980 dst_metric_set(dst, RTAX_MTU, mtu);
981 }
982 }
983
984 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
985 {
986 struct net_device *dev = dst->dev;
987 unsigned int mtu = dst_mtu(dst);
988 struct net *net = dev_net(dev);
989
990 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
991
992 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
993 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
994
995 /*
996 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
997 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
998 * IPV6_MAXPLEN is also valid and means: "any MSS,
999 * rely only on pmtu discovery"
1000 */
1001 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1002 mtu = IPV6_MAXPLEN;
1003 return mtu;
1004 }
1005
1006 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1007 {
1008 unsigned int mtu = IPV6_MIN_MTU;
1009 struct inet6_dev *idev;
1010
1011 rcu_read_lock();
1012 idev = __in6_dev_get(dst->dev);
1013 if (idev)
1014 mtu = idev->cnf.mtu6;
1015 rcu_read_unlock();
1016
1017 return mtu;
1018 }
1019
1020 static struct dst_entry *icmp6_dst_gc_list;
1021 static DEFINE_SPINLOCK(icmp6_dst_lock);
1022
1023 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1024 struct neighbour *neigh,
1025 const struct in6_addr *addr)
1026 {
1027 struct rt6_info *rt;
1028 struct inet6_dev *idev = in6_dev_get(dev);
1029 struct net *net = dev_net(dev);
1030
1031 if (unlikely(idev == NULL))
1032 return NULL;
1033
1034 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1035 if (unlikely(rt == NULL)) {
1036 in6_dev_put(idev);
1037 goto out;
1038 }
1039
1040 dev_hold(dev);
1041 if (neigh)
1042 neigh_hold(neigh);
1043 else {
1044 neigh = ndisc_get_neigh(dev, addr);
1045 if (IS_ERR(neigh))
1046 neigh = NULL;
1047 }
1048
1049 rt->rt6i_dev = dev;
1050 rt->rt6i_idev = idev;
1051 rt->rt6i_nexthop = neigh;
1052 atomic_set(&rt->dst.__refcnt, 1);
1053 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1054 rt->dst.output = ip6_output;
1055
1056 #if 0 /* there's no chance to use these for ndisc */
1057 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1058 ? DST_HOST
1059 : 0;
1060 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1061 rt->rt6i_dst.plen = 128;
1062 #endif
1063
1064 spin_lock_bh(&icmp6_dst_lock);
1065 rt->dst.next = icmp6_dst_gc_list;
1066 icmp6_dst_gc_list = &rt->dst;
1067 spin_unlock_bh(&icmp6_dst_lock);
1068
1069 fib6_force_start_gc(net);
1070
1071 out:
1072 return &rt->dst;
1073 }
1074
1075 int icmp6_dst_gc(void)
1076 {
1077 struct dst_entry *dst, **pprev;
1078 int more = 0;
1079
1080 spin_lock_bh(&icmp6_dst_lock);
1081 pprev = &icmp6_dst_gc_list;
1082
1083 while ((dst = *pprev) != NULL) {
1084 if (!atomic_read(&dst->__refcnt)) {
1085 *pprev = dst->next;
1086 dst_free(dst);
1087 } else {
1088 pprev = &dst->next;
1089 ++more;
1090 }
1091 }
1092
1093 spin_unlock_bh(&icmp6_dst_lock);
1094
1095 return more;
1096 }
1097
1098 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1099 void *arg)
1100 {
1101 struct dst_entry *dst, **pprev;
1102
1103 spin_lock_bh(&icmp6_dst_lock);
1104 pprev = &icmp6_dst_gc_list;
1105 while ((dst = *pprev) != NULL) {
1106 struct rt6_info *rt = (struct rt6_info *) dst;
1107 if (func(rt, arg)) {
1108 *pprev = dst->next;
1109 dst_free(dst);
1110 } else {
1111 pprev = &dst->next;
1112 }
1113 }
1114 spin_unlock_bh(&icmp6_dst_lock);
1115 }
1116
1117 static int ip6_dst_gc(struct dst_ops *ops)
1118 {
1119 unsigned long now = jiffies;
1120 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1121 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1122 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1123 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1124 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1125 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1126 int entries;
1127
1128 entries = dst_entries_get_fast(ops);
1129 if (time_after(rt_last_gc + rt_min_interval, now) &&
1130 entries <= rt_max_size)
1131 goto out;
1132
1133 net->ipv6.ip6_rt_gc_expire++;
1134 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1135 net->ipv6.ip6_rt_last_gc = now;
1136 entries = dst_entries_get_slow(ops);
1137 if (entries < ops->gc_thresh)
1138 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1139 out:
1140 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1141 return entries > rt_max_size;
1142 }
1143
1144 /* Clean host part of a prefix. Not necessary in radix tree,
1145 but results in cleaner routing tables.
1146
1147 Remove it only when all the things will work!
1148 */
1149
1150 int ip6_dst_hoplimit(struct dst_entry *dst)
1151 {
1152 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1153 if (hoplimit == 0) {
1154 struct net_device *dev = dst->dev;
1155 struct inet6_dev *idev;
1156
1157 rcu_read_lock();
1158 idev = __in6_dev_get(dev);
1159 if (idev)
1160 hoplimit = idev->cnf.hop_limit;
1161 else
1162 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1163 rcu_read_unlock();
1164 }
1165 return hoplimit;
1166 }
1167 EXPORT_SYMBOL(ip6_dst_hoplimit);
1168
1169 /*
1170 *
1171 */
1172
1173 int ip6_route_add(struct fib6_config *cfg)
1174 {
1175 int err;
1176 struct net *net = cfg->fc_nlinfo.nl_net;
1177 struct rt6_info *rt = NULL;
1178 struct net_device *dev = NULL;
1179 struct inet6_dev *idev = NULL;
1180 struct fib6_table *table;
1181 int addr_type;
1182
1183 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1184 return -EINVAL;
1185 #ifndef CONFIG_IPV6_SUBTREES
1186 if (cfg->fc_src_len)
1187 return -EINVAL;
1188 #endif
1189 if (cfg->fc_ifindex) {
1190 err = -ENODEV;
1191 dev = dev_get_by_index(net, cfg->fc_ifindex);
1192 if (!dev)
1193 goto out;
1194 idev = in6_dev_get(dev);
1195 if (!idev)
1196 goto out;
1197 }
1198
1199 if (cfg->fc_metric == 0)
1200 cfg->fc_metric = IP6_RT_PRIO_USER;
1201
1202 table = fib6_new_table(net, cfg->fc_table);
1203 if (table == NULL) {
1204 err = -ENOBUFS;
1205 goto out;
1206 }
1207
1208 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1209
1210 if (rt == NULL) {
1211 err = -ENOMEM;
1212 goto out;
1213 }
1214
1215 rt->dst.obsolete = -1;
1216 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1217 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1218 0;
1219
1220 if (cfg->fc_protocol == RTPROT_UNSPEC)
1221 cfg->fc_protocol = RTPROT_BOOT;
1222 rt->rt6i_protocol = cfg->fc_protocol;
1223
1224 addr_type = ipv6_addr_type(&cfg->fc_dst);
1225
1226 if (addr_type & IPV6_ADDR_MULTICAST)
1227 rt->dst.input = ip6_mc_input;
1228 else if (cfg->fc_flags & RTF_LOCAL)
1229 rt->dst.input = ip6_input;
1230 else
1231 rt->dst.input = ip6_forward;
1232
1233 rt->dst.output = ip6_output;
1234
1235 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1236 rt->rt6i_dst.plen = cfg->fc_dst_len;
1237 if (rt->rt6i_dst.plen == 128)
1238 rt->dst.flags = DST_HOST;
1239
1240 #ifdef CONFIG_IPV6_SUBTREES
1241 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1242 rt->rt6i_src.plen = cfg->fc_src_len;
1243 #endif
1244
1245 rt->rt6i_metric = cfg->fc_metric;
1246
1247 /* We cannot add true routes via loopback here,
1248 they would result in kernel looping; promote them to reject routes
1249 */
1250 if ((cfg->fc_flags & RTF_REJECT) ||
1251 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1252 && !(cfg->fc_flags&RTF_LOCAL))) {
1253 /* hold loopback dev/idev if we haven't done so. */
1254 if (dev != net->loopback_dev) {
1255 if (dev) {
1256 dev_put(dev);
1257 in6_dev_put(idev);
1258 }
1259 dev = net->loopback_dev;
1260 dev_hold(dev);
1261 idev = in6_dev_get(dev);
1262 if (!idev) {
1263 err = -ENODEV;
1264 goto out;
1265 }
1266 }
1267 rt->dst.output = ip6_pkt_discard_out;
1268 rt->dst.input = ip6_pkt_discard;
1269 rt->dst.error = -ENETUNREACH;
1270 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1271 goto install_route;
1272 }
1273
1274 if (cfg->fc_flags & RTF_GATEWAY) {
1275 struct in6_addr *gw_addr;
1276 int gwa_type;
1277
1278 gw_addr = &cfg->fc_gateway;
1279 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1280 gwa_type = ipv6_addr_type(gw_addr);
1281
1282 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1283 struct rt6_info *grt;
1284
1285 /* IPv6 strictly inhibits using not link-local
1286 addresses as nexthop address.
1287 Otherwise, router will not able to send redirects.
1288 It is very good, but in some (rare!) circumstances
1289 (SIT, PtP, NBMA NOARP links) it is handy to allow
1290 some exceptions. --ANK
1291 */
1292 err = -EINVAL;
1293 if (!(gwa_type&IPV6_ADDR_UNICAST))
1294 goto out;
1295
1296 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1297
1298 err = -EHOSTUNREACH;
1299 if (grt == NULL)
1300 goto out;
1301 if (dev) {
1302 if (dev != grt->rt6i_dev) {
1303 dst_release(&grt->dst);
1304 goto out;
1305 }
1306 } else {
1307 dev = grt->rt6i_dev;
1308 idev = grt->rt6i_idev;
1309 dev_hold(dev);
1310 in6_dev_hold(grt->rt6i_idev);
1311 }
1312 if (!(grt->rt6i_flags&RTF_GATEWAY))
1313 err = 0;
1314 dst_release(&grt->dst);
1315
1316 if (err)
1317 goto out;
1318 }
1319 err = -EINVAL;
1320 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1321 goto out;
1322 }
1323
1324 err = -ENODEV;
1325 if (dev == NULL)
1326 goto out;
1327
1328 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1329 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1330 if (IS_ERR(rt->rt6i_nexthop)) {
1331 err = PTR_ERR(rt->rt6i_nexthop);
1332 rt->rt6i_nexthop = NULL;
1333 goto out;
1334 }
1335 }
1336
1337 rt->rt6i_flags = cfg->fc_flags;
1338
1339 install_route:
1340 if (cfg->fc_mx) {
1341 struct nlattr *nla;
1342 int remaining;
1343
1344 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1345 int type = nla_type(nla);
1346
1347 if (type) {
1348 if (type > RTAX_MAX) {
1349 err = -EINVAL;
1350 goto out;
1351 }
1352
1353 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1354 }
1355 }
1356 }
1357
1358 rt->dst.dev = dev;
1359 rt->rt6i_idev = idev;
1360 rt->rt6i_table = table;
1361
1362 cfg->fc_nlinfo.nl_net = dev_net(dev);
1363
1364 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1365
1366 out:
1367 if (dev)
1368 dev_put(dev);
1369 if (idev)
1370 in6_dev_put(idev);
1371 if (rt)
1372 dst_free(&rt->dst);
1373 return err;
1374 }
1375
1376 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1377 {
1378 int err;
1379 struct fib6_table *table;
1380 struct net *net = dev_net(rt->rt6i_dev);
1381
1382 if (rt == net->ipv6.ip6_null_entry)
1383 return -ENOENT;
1384
1385 table = rt->rt6i_table;
1386 write_lock_bh(&table->tb6_lock);
1387
1388 err = fib6_del(rt, info);
1389 dst_release(&rt->dst);
1390
1391 write_unlock_bh(&table->tb6_lock);
1392
1393 return err;
1394 }
1395
1396 int ip6_del_rt(struct rt6_info *rt)
1397 {
1398 struct nl_info info = {
1399 .nl_net = dev_net(rt->rt6i_dev),
1400 };
1401 return __ip6_del_rt(rt, &info);
1402 }
1403
1404 static int ip6_route_del(struct fib6_config *cfg)
1405 {
1406 struct fib6_table *table;
1407 struct fib6_node *fn;
1408 struct rt6_info *rt;
1409 int err = -ESRCH;
1410
1411 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1412 if (table == NULL)
1413 return err;
1414
1415 read_lock_bh(&table->tb6_lock);
1416
1417 fn = fib6_locate(&table->tb6_root,
1418 &cfg->fc_dst, cfg->fc_dst_len,
1419 &cfg->fc_src, cfg->fc_src_len);
1420
1421 if (fn) {
1422 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1423 if (cfg->fc_ifindex &&
1424 (rt->rt6i_dev == NULL ||
1425 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1426 continue;
1427 if (cfg->fc_flags & RTF_GATEWAY &&
1428 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1429 continue;
1430 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1431 continue;
1432 dst_hold(&rt->dst);
1433 read_unlock_bh(&table->tb6_lock);
1434
1435 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1436 }
1437 }
1438 read_unlock_bh(&table->tb6_lock);
1439
1440 return err;
1441 }
1442
1443 /*
1444 * Handle redirects
1445 */
1446 struct ip6rd_flowi {
1447 struct flowi6 fl6;
1448 struct in6_addr gateway;
1449 };
1450
1451 static struct rt6_info *__ip6_route_redirect(struct net *net,
1452 struct fib6_table *table,
1453 struct flowi6 *fl6,
1454 int flags)
1455 {
1456 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1457 struct rt6_info *rt;
1458 struct fib6_node *fn;
1459
1460 /*
1461 * Get the "current" route for this destination and
1462 * check if the redirect has come from approriate router.
1463 *
1464 * RFC 2461 specifies that redirects should only be
1465 * accepted if they come from the nexthop to the target.
1466 * Due to the way the routes are chosen, this notion
1467 * is a bit fuzzy and one might need to check all possible
1468 * routes.
1469 */
1470
1471 read_lock_bh(&table->tb6_lock);
1472 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1473 restart:
1474 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1475 /*
1476 * Current route is on-link; redirect is always invalid.
1477 *
1478 * Seems, previous statement is not true. It could
1479 * be node, which looks for us as on-link (f.e. proxy ndisc)
1480 * But then router serving it might decide, that we should
1481 * know truth 8)8) --ANK (980726).
1482 */
1483 if (rt6_check_expired(rt))
1484 continue;
1485 if (!(rt->rt6i_flags & RTF_GATEWAY))
1486 continue;
1487 if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
1488 continue;
1489 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1490 continue;
1491 break;
1492 }
1493
1494 if (!rt)
1495 rt = net->ipv6.ip6_null_entry;
1496 BACKTRACK(net, &fl6->saddr);
1497 out:
1498 dst_hold(&rt->dst);
1499
1500 read_unlock_bh(&table->tb6_lock);
1501
1502 return rt;
1503 };
1504
1505 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1506 struct in6_addr *src,
1507 struct in6_addr *gateway,
1508 struct net_device *dev)
1509 {
1510 int flags = RT6_LOOKUP_F_HAS_SADDR;
1511 struct net *net = dev_net(dev);
1512 struct ip6rd_flowi rdfl = {
1513 .fl6 = {
1514 .flowi6_oif = dev->ifindex,
1515 .daddr = *dest,
1516 .saddr = *src,
1517 },
1518 };
1519
1520 ipv6_addr_copy(&rdfl.gateway, gateway);
1521
1522 if (rt6_need_strict(dest))
1523 flags |= RT6_LOOKUP_F_IFACE;
1524
1525 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1526 flags, __ip6_route_redirect);
1527 }
1528
1529 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1530 struct in6_addr *saddr,
1531 struct neighbour *neigh, u8 *lladdr, int on_link)
1532 {
1533 struct rt6_info *rt, *nrt = NULL;
1534 struct netevent_redirect netevent;
1535 struct net *net = dev_net(neigh->dev);
1536
1537 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1538
1539 if (rt == net->ipv6.ip6_null_entry) {
1540 if (net_ratelimit())
1541 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1542 "for redirect target\n");
1543 goto out;
1544 }
1545
1546 /*
1547 * We have finally decided to accept it.
1548 */
1549
1550 neigh_update(neigh, lladdr, NUD_STALE,
1551 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1552 NEIGH_UPDATE_F_OVERRIDE|
1553 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1554 NEIGH_UPDATE_F_ISROUTER))
1555 );
1556
1557 /*
1558 * Redirect received -> path was valid.
1559 * Look, redirects are sent only in response to data packets,
1560 * so that this nexthop apparently is reachable. --ANK
1561 */
1562 dst_confirm(&rt->dst);
1563
1564 /* Duplicate redirect: silently ignore. */
1565 if (neigh == rt->dst.neighbour)
1566 goto out;
1567
1568 nrt = ip6_rt_copy(rt);
1569 if (nrt == NULL)
1570 goto out;
1571
1572 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1573 if (on_link)
1574 nrt->rt6i_flags &= ~RTF_GATEWAY;
1575
1576 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1577 nrt->rt6i_dst.plen = 128;
1578 nrt->dst.flags |= DST_HOST;
1579
1580 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1581 nrt->rt6i_nexthop = neigh_clone(neigh);
1582
1583 if (ip6_ins_rt(nrt))
1584 goto out;
1585
1586 netevent.old = &rt->dst;
1587 netevent.new = &nrt->dst;
1588 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1589
1590 if (rt->rt6i_flags&RTF_CACHE) {
1591 ip6_del_rt(rt);
1592 return;
1593 }
1594
1595 out:
1596 dst_release(&rt->dst);
1597 }
1598
1599 /*
1600 * Handle ICMP "packet too big" messages
1601 * i.e. Path MTU discovery
1602 */
1603
1604 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1605 struct net *net, u32 pmtu, int ifindex)
1606 {
1607 struct rt6_info *rt, *nrt;
1608 int allfrag = 0;
1609 again:
1610 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1611 if (rt == NULL)
1612 return;
1613
1614 if (rt6_check_expired(rt)) {
1615 ip6_del_rt(rt);
1616 goto again;
1617 }
1618
1619 if (pmtu >= dst_mtu(&rt->dst))
1620 goto out;
1621
1622 if (pmtu < IPV6_MIN_MTU) {
1623 /*
1624 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1625 * MTU (1280) and a fragment header should always be included
1626 * after a node receiving Too Big message reporting PMTU is
1627 * less than the IPv6 Minimum Link MTU.
1628 */
1629 pmtu = IPV6_MIN_MTU;
1630 allfrag = 1;
1631 }
1632
1633 /* New mtu received -> path was valid.
1634 They are sent only in response to data packets,
1635 so that this nexthop apparently is reachable. --ANK
1636 */
1637 dst_confirm(&rt->dst);
1638
1639 /* Host route. If it is static, it would be better
1640 not to override it, but add new one, so that
1641 when cache entry will expire old pmtu
1642 would return automatically.
1643 */
1644 if (rt->rt6i_flags & RTF_CACHE) {
1645 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1646 if (allfrag) {
1647 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1648 features |= RTAX_FEATURE_ALLFRAG;
1649 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1650 }
1651 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1652 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1653 goto out;
1654 }
1655
1656 /* Network route.
1657 Two cases are possible:
1658 1. It is connected route. Action: COW
1659 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1660 */
1661 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1662 nrt = rt6_alloc_cow(rt, daddr, saddr);
1663 else
1664 nrt = rt6_alloc_clone(rt, daddr);
1665
1666 if (nrt) {
1667 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1668 if (allfrag) {
1669 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1670 features |= RTAX_FEATURE_ALLFRAG;
1671 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1672 }
1673
1674 /* According to RFC 1981, detecting PMTU increase shouldn't be
1675 * happened within 5 mins, the recommended timer is 10 mins.
1676 * Here this route expiration time is set to ip6_rt_mtu_expires
1677 * which is 10 mins. After 10 mins the decreased pmtu is expired
1678 * and detecting PMTU increase will be automatically happened.
1679 */
1680 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1681 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1682
1683 ip6_ins_rt(nrt);
1684 }
1685 out:
1686 dst_release(&rt->dst);
1687 }
1688
1689 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1690 struct net_device *dev, u32 pmtu)
1691 {
1692 struct net *net = dev_net(dev);
1693
1694 /*
1695 * RFC 1981 states that a node "MUST reduce the size of the packets it
1696 * is sending along the path" that caused the Packet Too Big message.
1697 * Since it's not possible in the general case to determine which
1698 * interface was used to send the original packet, we update the MTU
1699 * on the interface that will be used to send future packets. We also
1700 * update the MTU on the interface that received the Packet Too Big in
1701 * case the original packet was forced out that interface with
1702 * SO_BINDTODEVICE or similar. This is the next best thing to the
1703 * correct behaviour, which would be to update the MTU on all
1704 * interfaces.
1705 */
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1707 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1708 }
1709
1710 /*
1711 * Misc support functions
1712 */
1713
1714 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1715 {
1716 struct net *net = dev_net(ort->rt6i_dev);
1717 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1718
1719 if (rt) {
1720 rt->dst.input = ort->dst.input;
1721 rt->dst.output = ort->dst.output;
1722
1723 dst_copy_metrics(&rt->dst, &ort->dst);
1724 rt->dst.error = ort->dst.error;
1725 rt->dst.dev = ort->dst.dev;
1726 if (rt->dst.dev)
1727 dev_hold(rt->dst.dev);
1728 rt->rt6i_idev = ort->rt6i_idev;
1729 if (rt->rt6i_idev)
1730 in6_dev_hold(rt->rt6i_idev);
1731 rt->dst.lastuse = jiffies;
1732 rt->rt6i_expires = 0;
1733
1734 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1735 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1736 rt->rt6i_metric = 0;
1737
1738 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1739 #ifdef CONFIG_IPV6_SUBTREES
1740 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1741 #endif
1742 rt->rt6i_table = ort->rt6i_table;
1743 }
1744 return rt;
1745 }
1746
1747 #ifdef CONFIG_IPV6_ROUTE_INFO
1748 static struct rt6_info *rt6_get_route_info(struct net *net,
1749 struct in6_addr *prefix, int prefixlen,
1750 struct in6_addr *gwaddr, int ifindex)
1751 {
1752 struct fib6_node *fn;
1753 struct rt6_info *rt = NULL;
1754 struct fib6_table *table;
1755
1756 table = fib6_get_table(net, RT6_TABLE_INFO);
1757 if (table == NULL)
1758 return NULL;
1759
1760 write_lock_bh(&table->tb6_lock);
1761 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1762 if (!fn)
1763 goto out;
1764
1765 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1766 if (rt->rt6i_dev->ifindex != ifindex)
1767 continue;
1768 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1769 continue;
1770 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1771 continue;
1772 dst_hold(&rt->dst);
1773 break;
1774 }
1775 out:
1776 write_unlock_bh(&table->tb6_lock);
1777 return rt;
1778 }
1779
1780 static struct rt6_info *rt6_add_route_info(struct net *net,
1781 struct in6_addr *prefix, int prefixlen,
1782 struct in6_addr *gwaddr, int ifindex,
1783 unsigned pref)
1784 {
1785 struct fib6_config cfg = {
1786 .fc_table = RT6_TABLE_INFO,
1787 .fc_metric = IP6_RT_PRIO_USER,
1788 .fc_ifindex = ifindex,
1789 .fc_dst_len = prefixlen,
1790 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1791 RTF_UP | RTF_PREF(pref),
1792 .fc_nlinfo.pid = 0,
1793 .fc_nlinfo.nlh = NULL,
1794 .fc_nlinfo.nl_net = net,
1795 };
1796
1797 ipv6_addr_copy(&cfg.fc_dst, prefix);
1798 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1799
1800 /* We should treat it as a default route if prefix length is 0. */
1801 if (!prefixlen)
1802 cfg.fc_flags |= RTF_DEFAULT;
1803
1804 ip6_route_add(&cfg);
1805
1806 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1807 }
1808 #endif
1809
1810 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1811 {
1812 struct rt6_info *rt;
1813 struct fib6_table *table;
1814
1815 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1816 if (table == NULL)
1817 return NULL;
1818
1819 write_lock_bh(&table->tb6_lock);
1820 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1821 if (dev == rt->rt6i_dev &&
1822 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1823 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1824 break;
1825 }
1826 if (rt)
1827 dst_hold(&rt->dst);
1828 write_unlock_bh(&table->tb6_lock);
1829 return rt;
1830 }
1831
1832 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1833 struct net_device *dev,
1834 unsigned int pref)
1835 {
1836 struct fib6_config cfg = {
1837 .fc_table = RT6_TABLE_DFLT,
1838 .fc_metric = IP6_RT_PRIO_USER,
1839 .fc_ifindex = dev->ifindex,
1840 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1841 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1842 .fc_nlinfo.pid = 0,
1843 .fc_nlinfo.nlh = NULL,
1844 .fc_nlinfo.nl_net = dev_net(dev),
1845 };
1846
1847 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1848
1849 ip6_route_add(&cfg);
1850
1851 return rt6_get_dflt_router(gwaddr, dev);
1852 }
1853
1854 void rt6_purge_dflt_routers(struct net *net)
1855 {
1856 struct rt6_info *rt;
1857 struct fib6_table *table;
1858
1859 /* NOTE: Keep consistent with rt6_get_dflt_router */
1860 table = fib6_get_table(net, RT6_TABLE_DFLT);
1861 if (table == NULL)
1862 return;
1863
1864 restart:
1865 read_lock_bh(&table->tb6_lock);
1866 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1867 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1868 dst_hold(&rt->dst);
1869 read_unlock_bh(&table->tb6_lock);
1870 ip6_del_rt(rt);
1871 goto restart;
1872 }
1873 }
1874 read_unlock_bh(&table->tb6_lock);
1875 }
1876
1877 static void rtmsg_to_fib6_config(struct net *net,
1878 struct in6_rtmsg *rtmsg,
1879 struct fib6_config *cfg)
1880 {
1881 memset(cfg, 0, sizeof(*cfg));
1882
1883 cfg->fc_table = RT6_TABLE_MAIN;
1884 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1885 cfg->fc_metric = rtmsg->rtmsg_metric;
1886 cfg->fc_expires = rtmsg->rtmsg_info;
1887 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1888 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1889 cfg->fc_flags = rtmsg->rtmsg_flags;
1890
1891 cfg->fc_nlinfo.nl_net = net;
1892
1893 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1894 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1895 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1896 }
1897
1898 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1899 {
1900 struct fib6_config cfg;
1901 struct in6_rtmsg rtmsg;
1902 int err;
1903
1904 switch(cmd) {
1905 case SIOCADDRT: /* Add a route */
1906 case SIOCDELRT: /* Delete a route */
1907 if (!capable(CAP_NET_ADMIN))
1908 return -EPERM;
1909 err = copy_from_user(&rtmsg, arg,
1910 sizeof(struct in6_rtmsg));
1911 if (err)
1912 return -EFAULT;
1913
1914 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1915
1916 rtnl_lock();
1917 switch (cmd) {
1918 case SIOCADDRT:
1919 err = ip6_route_add(&cfg);
1920 break;
1921 case SIOCDELRT:
1922 err = ip6_route_del(&cfg);
1923 break;
1924 default:
1925 err = -EINVAL;
1926 }
1927 rtnl_unlock();
1928
1929 return err;
1930 }
1931
1932 return -EINVAL;
1933 }
1934
1935 /*
1936 * Drop the packet on the floor
1937 */
1938
1939 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1940 {
1941 int type;
1942 struct dst_entry *dst = skb_dst(skb);
1943 switch (ipstats_mib_noroutes) {
1944 case IPSTATS_MIB_INNOROUTES:
1945 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1946 if (type == IPV6_ADDR_ANY) {
1947 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1948 IPSTATS_MIB_INADDRERRORS);
1949 break;
1950 }
1951 /* FALLTHROUGH */
1952 case IPSTATS_MIB_OUTNOROUTES:
1953 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1954 ipstats_mib_noroutes);
1955 break;
1956 }
1957 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1958 kfree_skb(skb);
1959 return 0;
1960 }
1961
1962 static int ip6_pkt_discard(struct sk_buff *skb)
1963 {
1964 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1965 }
1966
1967 static int ip6_pkt_discard_out(struct sk_buff *skb)
1968 {
1969 skb->dev = skb_dst(skb)->dev;
1970 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1971 }
1972
1973 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1974
1975 static int ip6_pkt_prohibit(struct sk_buff *skb)
1976 {
1977 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1978 }
1979
1980 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1981 {
1982 skb->dev = skb_dst(skb)->dev;
1983 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1984 }
1985
1986 #endif
1987
1988 /*
1989 * Allocate a dst for local (unicast / anycast) address.
1990 */
1991
1992 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1993 const struct in6_addr *addr,
1994 int anycast)
1995 {
1996 struct net *net = dev_net(idev->dev);
1997 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1998 struct neighbour *neigh;
1999
2000 if (rt == NULL) {
2001 if (net_ratelimit())
2002 pr_warning("IPv6: Maximum number of routes reached,"
2003 " consider increasing route/max_size.\n");
2004 return ERR_PTR(-ENOMEM);
2005 }
2006
2007 dev_hold(net->loopback_dev);
2008 in6_dev_hold(idev);
2009
2010 rt->dst.flags = DST_HOST;
2011 rt->dst.input = ip6_input;
2012 rt->dst.output = ip6_output;
2013 rt->rt6i_dev = net->loopback_dev;
2014 rt->rt6i_idev = idev;
2015 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
2016 rt->dst.obsolete = -1;
2017
2018 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2019 if (anycast)
2020 rt->rt6i_flags |= RTF_ANYCAST;
2021 else
2022 rt->rt6i_flags |= RTF_LOCAL;
2023 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2024 if (IS_ERR(neigh)) {
2025 dst_free(&rt->dst);
2026
2027 return ERR_CAST(neigh);
2028 }
2029 rt->rt6i_nexthop = neigh;
2030
2031 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2032 rt->rt6i_dst.plen = 128;
2033 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2034
2035 atomic_set(&rt->dst.__refcnt, 1);
2036
2037 return rt;
2038 }
2039
2040 struct arg_dev_net {
2041 struct net_device *dev;
2042 struct net *net;
2043 };
2044
2045 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2046 {
2047 const struct arg_dev_net *adn = arg;
2048 const struct net_device *dev = adn->dev;
2049
2050 if ((rt->rt6i_dev == dev || dev == NULL) &&
2051 rt != adn->net->ipv6.ip6_null_entry) {
2052 RT6_TRACE("deleted by ifdown %p\n", rt);
2053 return -1;
2054 }
2055 return 0;
2056 }
2057
2058 void rt6_ifdown(struct net *net, struct net_device *dev)
2059 {
2060 struct arg_dev_net adn = {
2061 .dev = dev,
2062 .net = net,
2063 };
2064
2065 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2066 icmp6_clean_all(fib6_ifdown, &adn);
2067 }
2068
2069 struct rt6_mtu_change_arg
2070 {
2071 struct net_device *dev;
2072 unsigned mtu;
2073 };
2074
2075 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2076 {
2077 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2078 struct inet6_dev *idev;
2079
2080 /* In IPv6 pmtu discovery is not optional,
2081 so that RTAX_MTU lock cannot disable it.
2082 We still use this lock to block changes
2083 caused by addrconf/ndisc.
2084 */
2085
2086 idev = __in6_dev_get(arg->dev);
2087 if (idev == NULL)
2088 return 0;
2089
2090 /* For administrative MTU increase, there is no way to discover
2091 IPv6 PMTU increase, so PMTU increase should be updated here.
2092 Since RFC 1981 doesn't include administrative MTU increase
2093 update PMTU increase is a MUST. (i.e. jumbo frame)
2094 */
2095 /*
2096 If new MTU is less than route PMTU, this new MTU will be the
2097 lowest MTU in the path, update the route PMTU to reflect PMTU
2098 decreases; if new MTU is greater than route PMTU, and the
2099 old MTU is the lowest MTU in the path, update the route PMTU
2100 to reflect the increase. In this case if the other nodes' MTU
2101 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2102 PMTU discouvery.
2103 */
2104 if (rt->rt6i_dev == arg->dev &&
2105 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2106 (dst_mtu(&rt->dst) >= arg->mtu ||
2107 (dst_mtu(&rt->dst) < arg->mtu &&
2108 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2109 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2110 }
2111 return 0;
2112 }
2113
2114 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2115 {
2116 struct rt6_mtu_change_arg arg = {
2117 .dev = dev,
2118 .mtu = mtu,
2119 };
2120
2121 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2122 }
2123
2124 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2125 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2126 [RTA_OIF] = { .type = NLA_U32 },
2127 [RTA_IIF] = { .type = NLA_U32 },
2128 [RTA_PRIORITY] = { .type = NLA_U32 },
2129 [RTA_METRICS] = { .type = NLA_NESTED },
2130 };
2131
2132 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2133 struct fib6_config *cfg)
2134 {
2135 struct rtmsg *rtm;
2136 struct nlattr *tb[RTA_MAX+1];
2137 int err;
2138
2139 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2140 if (err < 0)
2141 goto errout;
2142
2143 err = -EINVAL;
2144 rtm = nlmsg_data(nlh);
2145 memset(cfg, 0, sizeof(*cfg));
2146
2147 cfg->fc_table = rtm->rtm_table;
2148 cfg->fc_dst_len = rtm->rtm_dst_len;
2149 cfg->fc_src_len = rtm->rtm_src_len;
2150 cfg->fc_flags = RTF_UP;
2151 cfg->fc_protocol = rtm->rtm_protocol;
2152
2153 if (rtm->rtm_type == RTN_UNREACHABLE)
2154 cfg->fc_flags |= RTF_REJECT;
2155
2156 if (rtm->rtm_type == RTN_LOCAL)
2157 cfg->fc_flags |= RTF_LOCAL;
2158
2159 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2160 cfg->fc_nlinfo.nlh = nlh;
2161 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2162
2163 if (tb[RTA_GATEWAY]) {
2164 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2165 cfg->fc_flags |= RTF_GATEWAY;
2166 }
2167
2168 if (tb[RTA_DST]) {
2169 int plen = (rtm->rtm_dst_len + 7) >> 3;
2170
2171 if (nla_len(tb[RTA_DST]) < plen)
2172 goto errout;
2173
2174 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2175 }
2176
2177 if (tb[RTA_SRC]) {
2178 int plen = (rtm->rtm_src_len + 7) >> 3;
2179
2180 if (nla_len(tb[RTA_SRC]) < plen)
2181 goto errout;
2182
2183 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2184 }
2185
2186 if (tb[RTA_OIF])
2187 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2188
2189 if (tb[RTA_PRIORITY])
2190 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2191
2192 if (tb[RTA_METRICS]) {
2193 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2194 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2195 }
2196
2197 if (tb[RTA_TABLE])
2198 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2199
2200 err = 0;
2201 errout:
2202 return err;
2203 }
2204
2205 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2206 {
2207 struct fib6_config cfg;
2208 int err;
2209
2210 err = rtm_to_fib6_config(skb, nlh, &cfg);
2211 if (err < 0)
2212 return err;
2213
2214 return ip6_route_del(&cfg);
2215 }
2216
2217 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2218 {
2219 struct fib6_config cfg;
2220 int err;
2221
2222 err = rtm_to_fib6_config(skb, nlh, &cfg);
2223 if (err < 0)
2224 return err;
2225
2226 return ip6_route_add(&cfg);
2227 }
2228
2229 static inline size_t rt6_nlmsg_size(void)
2230 {
2231 return NLMSG_ALIGN(sizeof(struct rtmsg))
2232 + nla_total_size(16) /* RTA_SRC */
2233 + nla_total_size(16) /* RTA_DST */
2234 + nla_total_size(16) /* RTA_GATEWAY */
2235 + nla_total_size(16) /* RTA_PREFSRC */
2236 + nla_total_size(4) /* RTA_TABLE */
2237 + nla_total_size(4) /* RTA_IIF */
2238 + nla_total_size(4) /* RTA_OIF */
2239 + nla_total_size(4) /* RTA_PRIORITY */
2240 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2241 + nla_total_size(sizeof(struct rta_cacheinfo));
2242 }
2243
2244 static int rt6_fill_node(struct net *net,
2245 struct sk_buff *skb, struct rt6_info *rt,
2246 struct in6_addr *dst, struct in6_addr *src,
2247 int iif, int type, u32 pid, u32 seq,
2248 int prefix, int nowait, unsigned int flags)
2249 {
2250 struct rtmsg *rtm;
2251 struct nlmsghdr *nlh;
2252 long expires;
2253 u32 table;
2254
2255 if (prefix) { /* user wants prefix routes only */
2256 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2257 /* success since this is not a prefix route */
2258 return 1;
2259 }
2260 }
2261
2262 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2263 if (nlh == NULL)
2264 return -EMSGSIZE;
2265
2266 rtm = nlmsg_data(nlh);
2267 rtm->rtm_family = AF_INET6;
2268 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2269 rtm->rtm_src_len = rt->rt6i_src.plen;
2270 rtm->rtm_tos = 0;
2271 if (rt->rt6i_table)
2272 table = rt->rt6i_table->tb6_id;
2273 else
2274 table = RT6_TABLE_UNSPEC;
2275 rtm->rtm_table = table;
2276 NLA_PUT_U32(skb, RTA_TABLE, table);
2277 if (rt->rt6i_flags&RTF_REJECT)
2278 rtm->rtm_type = RTN_UNREACHABLE;
2279 else if (rt->rt6i_flags&RTF_LOCAL)
2280 rtm->rtm_type = RTN_LOCAL;
2281 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2282 rtm->rtm_type = RTN_LOCAL;
2283 else
2284 rtm->rtm_type = RTN_UNICAST;
2285 rtm->rtm_flags = 0;
2286 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2287 rtm->rtm_protocol = rt->rt6i_protocol;
2288 if (rt->rt6i_flags&RTF_DYNAMIC)
2289 rtm->rtm_protocol = RTPROT_REDIRECT;
2290 else if (rt->rt6i_flags & RTF_ADDRCONF)
2291 rtm->rtm_protocol = RTPROT_KERNEL;
2292 else if (rt->rt6i_flags&RTF_DEFAULT)
2293 rtm->rtm_protocol = RTPROT_RA;
2294
2295 if (rt->rt6i_flags&RTF_CACHE)
2296 rtm->rtm_flags |= RTM_F_CLONED;
2297
2298 if (dst) {
2299 NLA_PUT(skb, RTA_DST, 16, dst);
2300 rtm->rtm_dst_len = 128;
2301 } else if (rtm->rtm_dst_len)
2302 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2303 #ifdef CONFIG_IPV6_SUBTREES
2304 if (src) {
2305 NLA_PUT(skb, RTA_SRC, 16, src);
2306 rtm->rtm_src_len = 128;
2307 } else if (rtm->rtm_src_len)
2308 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2309 #endif
2310 if (iif) {
2311 #ifdef CONFIG_IPV6_MROUTE
2312 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2313 int err = ip6mr_get_route(net, skb, rtm, nowait);
2314 if (err <= 0) {
2315 if (!nowait) {
2316 if (err == 0)
2317 return 0;
2318 goto nla_put_failure;
2319 } else {
2320 if (err == -EMSGSIZE)
2321 goto nla_put_failure;
2322 }
2323 }
2324 } else
2325 #endif
2326 NLA_PUT_U32(skb, RTA_IIF, iif);
2327 } else if (dst) {
2328 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2329 struct in6_addr saddr_buf;
2330 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2331 dst, 0, &saddr_buf) == 0)
2332 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2333 }
2334
2335 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2336 goto nla_put_failure;
2337
2338 if (rt->dst.neighbour)
2339 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2340
2341 if (rt->dst.dev)
2342 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2343
2344 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2345
2346 if (!(rt->rt6i_flags & RTF_EXPIRES))
2347 expires = 0;
2348 else if (rt->rt6i_expires - jiffies < INT_MAX)
2349 expires = rt->rt6i_expires - jiffies;
2350 else
2351 expires = INT_MAX;
2352
2353 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2354 expires, rt->dst.error) < 0)
2355 goto nla_put_failure;
2356
2357 return nlmsg_end(skb, nlh);
2358
2359 nla_put_failure:
2360 nlmsg_cancel(skb, nlh);
2361 return -EMSGSIZE;
2362 }
2363
2364 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2365 {
2366 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2367 int prefix;
2368
2369 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2370 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2371 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2372 } else
2373 prefix = 0;
2374
2375 return rt6_fill_node(arg->net,
2376 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2377 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2378 prefix, 0, NLM_F_MULTI);
2379 }
2380
2381 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2382 {
2383 struct net *net = sock_net(in_skb->sk);
2384 struct nlattr *tb[RTA_MAX+1];
2385 struct rt6_info *rt;
2386 struct sk_buff *skb;
2387 struct rtmsg *rtm;
2388 struct flowi6 fl6;
2389 int err, iif = 0;
2390
2391 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2392 if (err < 0)
2393 goto errout;
2394
2395 err = -EINVAL;
2396 memset(&fl6, 0, sizeof(fl6));
2397
2398 if (tb[RTA_SRC]) {
2399 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2400 goto errout;
2401
2402 ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
2403 }
2404
2405 if (tb[RTA_DST]) {
2406 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2407 goto errout;
2408
2409 ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
2410 }
2411
2412 if (tb[RTA_IIF])
2413 iif = nla_get_u32(tb[RTA_IIF]);
2414
2415 if (tb[RTA_OIF])
2416 fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
2417
2418 if (iif) {
2419 struct net_device *dev;
2420 dev = __dev_get_by_index(net, iif);
2421 if (!dev) {
2422 err = -ENODEV;
2423 goto errout;
2424 }
2425 }
2426
2427 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2428 if (skb == NULL) {
2429 err = -ENOBUFS;
2430 goto errout;
2431 }
2432
2433 /* Reserve room for dummy headers, this skb can pass
2434 through good chunk of routing engine.
2435 */
2436 skb_reset_mac_header(skb);
2437 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2438
2439 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
2440 skb_dst_set(skb, &rt->dst);
2441
2442 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2443 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2444 nlh->nlmsg_seq, 0, 0, 0);
2445 if (err < 0) {
2446 kfree_skb(skb);
2447 goto errout;
2448 }
2449
2450 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2451 errout:
2452 return err;
2453 }
2454
2455 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2456 {
2457 struct sk_buff *skb;
2458 struct net *net = info->nl_net;
2459 u32 seq;
2460 int err;
2461
2462 err = -ENOBUFS;
2463 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2464
2465 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2466 if (skb == NULL)
2467 goto errout;
2468
2469 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2470 event, info->pid, seq, 0, 0, 0);
2471 if (err < 0) {
2472 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2473 WARN_ON(err == -EMSGSIZE);
2474 kfree_skb(skb);
2475 goto errout;
2476 }
2477 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2478 info->nlh, gfp_any());
2479 return;
2480 errout:
2481 if (err < 0)
2482 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2483 }
2484
2485 static int ip6_route_dev_notify(struct notifier_block *this,
2486 unsigned long event, void *data)
2487 {
2488 struct net_device *dev = (struct net_device *)data;
2489 struct net *net = dev_net(dev);
2490
2491 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2492 net->ipv6.ip6_null_entry->dst.dev = dev;
2493 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2494 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2495 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2496 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2497 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2498 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2499 #endif
2500 }
2501
2502 return NOTIFY_OK;
2503 }
2504
2505 /*
2506 * /proc
2507 */
2508
2509 #ifdef CONFIG_PROC_FS
2510
2511 struct rt6_proc_arg
2512 {
2513 char *buffer;
2514 int offset;
2515 int length;
2516 int skip;
2517 int len;
2518 };
2519
2520 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2521 {
2522 struct seq_file *m = p_arg;
2523
2524 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2525
2526 #ifdef CONFIG_IPV6_SUBTREES
2527 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2528 #else
2529 seq_puts(m, "00000000000000000000000000000000 00 ");
2530 #endif
2531
2532 if (rt->rt6i_nexthop) {
2533 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2534 } else {
2535 seq_puts(m, "00000000000000000000000000000000");
2536 }
2537 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2538 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2539 rt->dst.__use, rt->rt6i_flags,
2540 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2541 return 0;
2542 }
2543
2544 static int ipv6_route_show(struct seq_file *m, void *v)
2545 {
2546 struct net *net = (struct net *)m->private;
2547 fib6_clean_all(net, rt6_info_route, 0, m);
2548 return 0;
2549 }
2550
2551 static int ipv6_route_open(struct inode *inode, struct file *file)
2552 {
2553 return single_open_net(inode, file, ipv6_route_show);
2554 }
2555
2556 static const struct file_operations ipv6_route_proc_fops = {
2557 .owner = THIS_MODULE,
2558 .open = ipv6_route_open,
2559 .read = seq_read,
2560 .llseek = seq_lseek,
2561 .release = single_release_net,
2562 };
2563
2564 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2565 {
2566 struct net *net = (struct net *)seq->private;
2567 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2568 net->ipv6.rt6_stats->fib_nodes,
2569 net->ipv6.rt6_stats->fib_route_nodes,
2570 net->ipv6.rt6_stats->fib_rt_alloc,
2571 net->ipv6.rt6_stats->fib_rt_entries,
2572 net->ipv6.rt6_stats->fib_rt_cache,
2573 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2574 net->ipv6.rt6_stats->fib_discarded_routes);
2575
2576 return 0;
2577 }
2578
2579 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2580 {
2581 return single_open_net(inode, file, rt6_stats_seq_show);
2582 }
2583
2584 static const struct file_operations rt6_stats_seq_fops = {
2585 .owner = THIS_MODULE,
2586 .open = rt6_stats_seq_open,
2587 .read = seq_read,
2588 .llseek = seq_lseek,
2589 .release = single_release_net,
2590 };
2591 #endif /* CONFIG_PROC_FS */
2592
2593 #ifdef CONFIG_SYSCTL
2594
2595 static
2596 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2597 void __user *buffer, size_t *lenp, loff_t *ppos)
2598 {
2599 struct net *net;
2600 int delay;
2601 if (!write)
2602 return -EINVAL;
2603
2604 net = (struct net *)ctl->extra1;
2605 delay = net->ipv6.sysctl.flush_delay;
2606 proc_dointvec(ctl, write, buffer, lenp, ppos);
2607 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2608 return 0;
2609 }
2610
2611 ctl_table ipv6_route_table_template[] = {
2612 {
2613 .procname = "flush",
2614 .data = &init_net.ipv6.sysctl.flush_delay,
2615 .maxlen = sizeof(int),
2616 .mode = 0200,
2617 .proc_handler = ipv6_sysctl_rtcache_flush
2618 },
2619 {
2620 .procname = "gc_thresh",
2621 .data = &ip6_dst_ops_template.gc_thresh,
2622 .maxlen = sizeof(int),
2623 .mode = 0644,
2624 .proc_handler = proc_dointvec,
2625 },
2626 {
2627 .procname = "max_size",
2628 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2629 .maxlen = sizeof(int),
2630 .mode = 0644,
2631 .proc_handler = proc_dointvec,
2632 },
2633 {
2634 .procname = "gc_min_interval",
2635 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2636 .maxlen = sizeof(int),
2637 .mode = 0644,
2638 .proc_handler = proc_dointvec_jiffies,
2639 },
2640 {
2641 .procname = "gc_timeout",
2642 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2643 .maxlen = sizeof(int),
2644 .mode = 0644,
2645 .proc_handler = proc_dointvec_jiffies,
2646 },
2647 {
2648 .procname = "gc_interval",
2649 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2650 .maxlen = sizeof(int),
2651 .mode = 0644,
2652 .proc_handler = proc_dointvec_jiffies,
2653 },
2654 {
2655 .procname = "gc_elasticity",
2656 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2657 .maxlen = sizeof(int),
2658 .mode = 0644,
2659 .proc_handler = proc_dointvec,
2660 },
2661 {
2662 .procname = "mtu_expires",
2663 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2664 .maxlen = sizeof(int),
2665 .mode = 0644,
2666 .proc_handler = proc_dointvec_jiffies,
2667 },
2668 {
2669 .procname = "min_adv_mss",
2670 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2671 .maxlen = sizeof(int),
2672 .mode = 0644,
2673 .proc_handler = proc_dointvec,
2674 },
2675 {
2676 .procname = "gc_min_interval_ms",
2677 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2678 .maxlen = sizeof(int),
2679 .mode = 0644,
2680 .proc_handler = proc_dointvec_ms_jiffies,
2681 },
2682 { }
2683 };
2684
2685 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2686 {
2687 struct ctl_table *table;
2688
2689 table = kmemdup(ipv6_route_table_template,
2690 sizeof(ipv6_route_table_template),
2691 GFP_KERNEL);
2692
2693 if (table) {
2694 table[0].data = &net->ipv6.sysctl.flush_delay;
2695 table[0].extra1 = net;
2696 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2697 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2698 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2699 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2700 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2701 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2702 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2703 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2704 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2705 }
2706
2707 return table;
2708 }
2709 #endif
2710
2711 static int __net_init ip6_route_net_init(struct net *net)
2712 {
2713 int ret = -ENOMEM;
2714
2715 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2716 sizeof(net->ipv6.ip6_dst_ops));
2717
2718 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2719 goto out_ip6_dst_ops;
2720
2721 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2722 sizeof(*net->ipv6.ip6_null_entry),
2723 GFP_KERNEL);
2724 if (!net->ipv6.ip6_null_entry)
2725 goto out_ip6_dst_entries;
2726 net->ipv6.ip6_null_entry->dst.path =
2727 (struct dst_entry *)net->ipv6.ip6_null_entry;
2728 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2729 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2730 ip6_template_metrics, true);
2731
2732 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2733 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2734 sizeof(*net->ipv6.ip6_prohibit_entry),
2735 GFP_KERNEL);
2736 if (!net->ipv6.ip6_prohibit_entry)
2737 goto out_ip6_null_entry;
2738 net->ipv6.ip6_prohibit_entry->dst.path =
2739 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2740 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2741 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2742 ip6_template_metrics, true);
2743
2744 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2745 sizeof(*net->ipv6.ip6_blk_hole_entry),
2746 GFP_KERNEL);
2747 if (!net->ipv6.ip6_blk_hole_entry)
2748 goto out_ip6_prohibit_entry;
2749 net->ipv6.ip6_blk_hole_entry->dst.path =
2750 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2751 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2752 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2753 ip6_template_metrics, true);
2754 #endif
2755
2756 net->ipv6.sysctl.flush_delay = 0;
2757 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2758 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2759 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2760 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2761 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2762 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2763 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2764
2765 #ifdef CONFIG_PROC_FS
2766 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2767 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2768 #endif
2769 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2770
2771 ret = 0;
2772 out:
2773 return ret;
2774
2775 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2776 out_ip6_prohibit_entry:
2777 kfree(net->ipv6.ip6_prohibit_entry);
2778 out_ip6_null_entry:
2779 kfree(net->ipv6.ip6_null_entry);
2780 #endif
2781 out_ip6_dst_entries:
2782 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2783 out_ip6_dst_ops:
2784 goto out;
2785 }
2786
2787 static void __net_exit ip6_route_net_exit(struct net *net)
2788 {
2789 #ifdef CONFIG_PROC_FS
2790 proc_net_remove(net, "ipv6_route");
2791 proc_net_remove(net, "rt6_stats");
2792 #endif
2793 kfree(net->ipv6.ip6_null_entry);
2794 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2795 kfree(net->ipv6.ip6_prohibit_entry);
2796 kfree(net->ipv6.ip6_blk_hole_entry);
2797 #endif
2798 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2799 }
2800
2801 static struct pernet_operations ip6_route_net_ops = {
2802 .init = ip6_route_net_init,
2803 .exit = ip6_route_net_exit,
2804 };
2805
2806 static struct notifier_block ip6_route_dev_notifier = {
2807 .notifier_call = ip6_route_dev_notify,
2808 .priority = 0,
2809 };
2810
2811 int __init ip6_route_init(void)
2812 {
2813 int ret;
2814
2815 ret = -ENOMEM;
2816 ip6_dst_ops_template.kmem_cachep =
2817 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2818 SLAB_HWCACHE_ALIGN, NULL);
2819 if (!ip6_dst_ops_template.kmem_cachep)
2820 goto out;
2821
2822 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2823 if (ret)
2824 goto out_kmem_cache;
2825
2826 ret = register_pernet_subsys(&ip6_route_net_ops);
2827 if (ret)
2828 goto out_dst_entries;
2829
2830 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2831
2832 /* Registering of the loopback is done before this portion of code,
2833 * the loopback reference in rt6_info will not be taken, do it
2834 * manually for init_net */
2835 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2836 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2837 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2838 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2839 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2840 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2841 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2842 #endif
2843 ret = fib6_init();
2844 if (ret)
2845 goto out_register_subsys;
2846
2847 ret = xfrm6_init();
2848 if (ret)
2849 goto out_fib6_init;
2850
2851 ret = fib6_rules_init();
2852 if (ret)
2853 goto xfrm6_init;
2854
2855 ret = -ENOBUFS;
2856 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2857 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2858 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2859 goto fib6_rules_init;
2860
2861 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2862 if (ret)
2863 goto fib6_rules_init;
2864
2865 out:
2866 return ret;
2867
2868 fib6_rules_init:
2869 fib6_rules_cleanup();
2870 xfrm6_init:
2871 xfrm6_fini();
2872 out_fib6_init:
2873 fib6_gc_cleanup();
2874 out_register_subsys:
2875 unregister_pernet_subsys(&ip6_route_net_ops);
2876 out_dst_entries:
2877 dst_entries_destroy(&ip6_dst_blackhole_ops);
2878 out_kmem_cache:
2879 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2880 goto out;
2881 }
2882
2883 void ip6_route_cleanup(void)
2884 {
2885 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2886 fib6_rules_cleanup();
2887 xfrm6_fini();
2888 fib6_gc_cleanup();
2889 unregister_pernet_subsys(&ip6_route_net_ops);
2890 dst_entries_destroy(&ip6_dst_blackhole_ops);
2891 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2892 }