]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/route.c
ipv6: Normalize arguments to ip6_dst_blackhole().
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/mroute6.h>
38 #include <linux/init.h>
39 #include <linux/if_arp.h>
40 #include <linux/proc_fs.h>
41 #include <linux/seq_file.h>
42 #include <linux/nsproxy.h>
43 #include <linux/slab.h>
44 #include <net/net_namespace.h>
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
76 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
77 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
78 static unsigned int ip6_default_mtu(const struct dst_entry *dst);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
84
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
89
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
94 unsigned pref);
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
98 #endif
99
100 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
101 {
102 struct rt6_info *rt = (struct rt6_info *) dst;
103 struct inet_peer *peer;
104 u32 *p = NULL;
105
106 if (!rt->rt6i_peer)
107 rt6_bind_peer(rt, 1);
108
109 peer = rt->rt6i_peer;
110 if (peer) {
111 u32 *old_p = __DST_METRICS_PTR(old);
112 unsigned long prev, new;
113
114 p = peer->metrics;
115 if (inet_metrics_new(peer))
116 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
117
118 new = (unsigned long) p;
119 prev = cmpxchg(&dst->_metrics, old, new);
120
121 if (prev != old) {
122 p = __DST_METRICS_PTR(prev);
123 if (prev & DST_METRICS_READ_ONLY)
124 p = NULL;
125 }
126 }
127 return p;
128 }
129
130 static struct dst_ops ip6_dst_ops_template = {
131 .family = AF_INET6,
132 .protocol = cpu_to_be16(ETH_P_IPV6),
133 .gc = ip6_dst_gc,
134 .gc_thresh = 1024,
135 .check = ip6_dst_check,
136 .default_advmss = ip6_default_advmss,
137 .default_mtu = ip6_default_mtu,
138 .cow_metrics = ipv6_cow_metrics,
139 .destroy = ip6_dst_destroy,
140 .ifdown = ip6_dst_ifdown,
141 .negative_advice = ip6_negative_advice,
142 .link_failure = ip6_link_failure,
143 .update_pmtu = ip6_rt_update_pmtu,
144 .local_out = __ip6_local_out,
145 };
146
147 static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
148 {
149 return 0;
150 }
151
152 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
153 {
154 }
155
156 static struct dst_ops ip6_dst_blackhole_ops = {
157 .family = AF_INET6,
158 .protocol = cpu_to_be16(ETH_P_IPV6),
159 .destroy = ip6_dst_destroy,
160 .check = ip6_dst_check,
161 .default_mtu = ip6_blackhole_default_mtu,
162 .default_advmss = ip6_default_advmss,
163 .update_pmtu = ip6_rt_blackhole_update_pmtu,
164 };
165
166 static const u32 ip6_template_metrics[RTAX_MAX] = {
167 [RTAX_HOPLIMIT - 1] = 255,
168 };
169
170 static struct rt6_info ip6_null_entry_template = {
171 .dst = {
172 .__refcnt = ATOMIC_INIT(1),
173 .__use = 1,
174 .obsolete = -1,
175 .error = -ENETUNREACH,
176 .input = ip6_pkt_discard,
177 .output = ip6_pkt_discard_out,
178 },
179 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
180 .rt6i_protocol = RTPROT_KERNEL,
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183 };
184
185 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
186
187 static int ip6_pkt_prohibit(struct sk_buff *skb);
188 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
189
190 static struct rt6_info ip6_prohibit_entry_template = {
191 .dst = {
192 .__refcnt = ATOMIC_INIT(1),
193 .__use = 1,
194 .obsolete = -1,
195 .error = -EACCES,
196 .input = ip6_pkt_prohibit,
197 .output = ip6_pkt_prohibit_out,
198 },
199 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
200 .rt6i_protocol = RTPROT_KERNEL,
201 .rt6i_metric = ~(u32) 0,
202 .rt6i_ref = ATOMIC_INIT(1),
203 };
204
205 static struct rt6_info ip6_blk_hole_entry_template = {
206 .dst = {
207 .__refcnt = ATOMIC_INIT(1),
208 .__use = 1,
209 .obsolete = -1,
210 .error = -EINVAL,
211 .input = dst_discard,
212 .output = dst_discard,
213 },
214 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
215 .rt6i_protocol = RTPROT_KERNEL,
216 .rt6i_metric = ~(u32) 0,
217 .rt6i_ref = ATOMIC_INIT(1),
218 };
219
220 #endif
221
222 /* allocate dst with ip6_dst_ops */
223 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
224 {
225 return (struct rt6_info *)dst_alloc(ops, 0);
226 }
227
228 static void ip6_dst_destroy(struct dst_entry *dst)
229 {
230 struct rt6_info *rt = (struct rt6_info *)dst;
231 struct inet6_dev *idev = rt->rt6i_idev;
232 struct inet_peer *peer = rt->rt6i_peer;
233
234 if (idev != NULL) {
235 rt->rt6i_idev = NULL;
236 in6_dev_put(idev);
237 }
238 if (peer) {
239 rt->rt6i_peer = NULL;
240 inet_putpeer(peer);
241 }
242 }
243
244 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
245
246 static u32 rt6_peer_genid(void)
247 {
248 return atomic_read(&__rt6_peer_genid);
249 }
250
251 void rt6_bind_peer(struct rt6_info *rt, int create)
252 {
253 struct inet_peer *peer;
254
255 peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
256 if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
257 inet_putpeer(peer);
258 else
259 rt->rt6i_peer_genid = rt6_peer_genid();
260 }
261
262 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
263 int how)
264 {
265 struct rt6_info *rt = (struct rt6_info *)dst;
266 struct inet6_dev *idev = rt->rt6i_idev;
267 struct net_device *loopback_dev =
268 dev_net(dev)->loopback_dev;
269
270 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
271 struct inet6_dev *loopback_idev =
272 in6_dev_get(loopback_dev);
273 if (loopback_idev != NULL) {
274 rt->rt6i_idev = loopback_idev;
275 in6_dev_put(idev);
276 }
277 }
278 }
279
280 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
281 {
282 return (rt->rt6i_flags & RTF_EXPIRES) &&
283 time_after(jiffies, rt->rt6i_expires);
284 }
285
286 static inline int rt6_need_strict(struct in6_addr *daddr)
287 {
288 return ipv6_addr_type(daddr) &
289 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
290 }
291
292 /*
293 * Route lookup. Any table->tb6_lock is implied.
294 */
295
296 static inline struct rt6_info *rt6_device_match(struct net *net,
297 struct rt6_info *rt,
298 struct in6_addr *saddr,
299 int oif,
300 int flags)
301 {
302 struct rt6_info *local = NULL;
303 struct rt6_info *sprt;
304
305 if (!oif && ipv6_addr_any(saddr))
306 goto out;
307
308 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
309 struct net_device *dev = sprt->rt6i_dev;
310
311 if (oif) {
312 if (dev->ifindex == oif)
313 return sprt;
314 if (dev->flags & IFF_LOOPBACK) {
315 if (sprt->rt6i_idev == NULL ||
316 sprt->rt6i_idev->dev->ifindex != oif) {
317 if (flags & RT6_LOOKUP_F_IFACE && oif)
318 continue;
319 if (local && (!oif ||
320 local->rt6i_idev->dev->ifindex == oif))
321 continue;
322 }
323 local = sprt;
324 }
325 } else {
326 if (ipv6_chk_addr(net, saddr, dev,
327 flags & RT6_LOOKUP_F_IFACE))
328 return sprt;
329 }
330 }
331
332 if (oif) {
333 if (local)
334 return local;
335
336 if (flags & RT6_LOOKUP_F_IFACE)
337 return net->ipv6.ip6_null_entry;
338 }
339 out:
340 return rt;
341 }
342
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 static void rt6_probe(struct rt6_info *rt)
345 {
346 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
347 /*
348 * Okay, this does not seem to be appropriate
349 * for now, however, we need to check if it
350 * is really so; aka Router Reachability Probing.
351 *
352 * Router Reachability Probe MUST be rate-limited
353 * to no more than one per minute.
354 */
355 if (!neigh || (neigh->nud_state & NUD_VALID))
356 return;
357 read_lock_bh(&neigh->lock);
358 if (!(neigh->nud_state & NUD_VALID) &&
359 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
360 struct in6_addr mcaddr;
361 struct in6_addr *target;
362
363 neigh->updated = jiffies;
364 read_unlock_bh(&neigh->lock);
365
366 target = (struct in6_addr *)&neigh->primary_key;
367 addrconf_addr_solict_mult(target, &mcaddr);
368 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
369 } else
370 read_unlock_bh(&neigh->lock);
371 }
372 #else
373 static inline void rt6_probe(struct rt6_info *rt)
374 {
375 }
376 #endif
377
378 /*
379 * Default Router Selection (RFC 2461 6.3.6)
380 */
381 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
382 {
383 struct net_device *dev = rt->rt6i_dev;
384 if (!oif || dev->ifindex == oif)
385 return 2;
386 if ((dev->flags & IFF_LOOPBACK) &&
387 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
388 return 1;
389 return 0;
390 }
391
392 static inline int rt6_check_neigh(struct rt6_info *rt)
393 {
394 struct neighbour *neigh = rt->rt6i_nexthop;
395 int m;
396 if (rt->rt6i_flags & RTF_NONEXTHOP ||
397 !(rt->rt6i_flags & RTF_GATEWAY))
398 m = 1;
399 else if (neigh) {
400 read_lock_bh(&neigh->lock);
401 if (neigh->nud_state & NUD_VALID)
402 m = 2;
403 #ifdef CONFIG_IPV6_ROUTER_PREF
404 else if (neigh->nud_state & NUD_FAILED)
405 m = 0;
406 #endif
407 else
408 m = 1;
409 read_unlock_bh(&neigh->lock);
410 } else
411 m = 0;
412 return m;
413 }
414
415 static int rt6_score_route(struct rt6_info *rt, int oif,
416 int strict)
417 {
418 int m, n;
419
420 m = rt6_check_dev(rt, oif);
421 if (!m && (strict & RT6_LOOKUP_F_IFACE))
422 return -1;
423 #ifdef CONFIG_IPV6_ROUTER_PREF
424 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
425 #endif
426 n = rt6_check_neigh(rt);
427 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
428 return -1;
429 return m;
430 }
431
432 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
433 int *mpri, struct rt6_info *match)
434 {
435 int m;
436
437 if (rt6_check_expired(rt))
438 goto out;
439
440 m = rt6_score_route(rt, oif, strict);
441 if (m < 0)
442 goto out;
443
444 if (m > *mpri) {
445 if (strict & RT6_LOOKUP_F_REACHABLE)
446 rt6_probe(match);
447 *mpri = m;
448 match = rt;
449 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
450 rt6_probe(rt);
451 }
452
453 out:
454 return match;
455 }
456
457 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
458 struct rt6_info *rr_head,
459 u32 metric, int oif, int strict)
460 {
461 struct rt6_info *rt, *match;
462 int mpri = -1;
463
464 match = NULL;
465 for (rt = rr_head; rt && rt->rt6i_metric == metric;
466 rt = rt->dst.rt6_next)
467 match = find_match(rt, oif, strict, &mpri, match);
468 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
469 rt = rt->dst.rt6_next)
470 match = find_match(rt, oif, strict, &mpri, match);
471
472 return match;
473 }
474
475 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
476 {
477 struct rt6_info *match, *rt0;
478 struct net *net;
479
480 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
481 __func__, fn->leaf, oif);
482
483 rt0 = fn->rr_ptr;
484 if (!rt0)
485 fn->rr_ptr = rt0 = fn->leaf;
486
487 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
488
489 if (!match &&
490 (strict & RT6_LOOKUP_F_REACHABLE)) {
491 struct rt6_info *next = rt0->dst.rt6_next;
492
493 /* no entries matched; do round-robin */
494 if (!next || next->rt6i_metric != rt0->rt6i_metric)
495 next = fn->leaf;
496
497 if (next != rt0)
498 fn->rr_ptr = next;
499 }
500
501 RT6_TRACE("%s() => %p\n",
502 __func__, match);
503
504 net = dev_net(rt0->rt6i_dev);
505 return match ? match : net->ipv6.ip6_null_entry;
506 }
507
508 #ifdef CONFIG_IPV6_ROUTE_INFO
509 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
510 struct in6_addr *gwaddr)
511 {
512 struct net *net = dev_net(dev);
513 struct route_info *rinfo = (struct route_info *) opt;
514 struct in6_addr prefix_buf, *prefix;
515 unsigned int pref;
516 unsigned long lifetime;
517 struct rt6_info *rt;
518
519 if (len < sizeof(struct route_info)) {
520 return -EINVAL;
521 }
522
523 /* Sanity check for prefix_len and length */
524 if (rinfo->length > 3) {
525 return -EINVAL;
526 } else if (rinfo->prefix_len > 128) {
527 return -EINVAL;
528 } else if (rinfo->prefix_len > 64) {
529 if (rinfo->length < 2) {
530 return -EINVAL;
531 }
532 } else if (rinfo->prefix_len > 0) {
533 if (rinfo->length < 1) {
534 return -EINVAL;
535 }
536 }
537
538 pref = rinfo->route_pref;
539 if (pref == ICMPV6_ROUTER_PREF_INVALID)
540 return -EINVAL;
541
542 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
543
544 if (rinfo->length == 3)
545 prefix = (struct in6_addr *)rinfo->prefix;
546 else {
547 /* this function is safe */
548 ipv6_addr_prefix(&prefix_buf,
549 (struct in6_addr *)rinfo->prefix,
550 rinfo->prefix_len);
551 prefix = &prefix_buf;
552 }
553
554 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
555 dev->ifindex);
556
557 if (rt && !lifetime) {
558 ip6_del_rt(rt);
559 rt = NULL;
560 }
561
562 if (!rt && lifetime)
563 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
564 pref);
565 else if (rt)
566 rt->rt6i_flags = RTF_ROUTEINFO |
567 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
568
569 if (rt) {
570 if (!addrconf_finite_timeout(lifetime)) {
571 rt->rt6i_flags &= ~RTF_EXPIRES;
572 } else {
573 rt->rt6i_expires = jiffies + HZ * lifetime;
574 rt->rt6i_flags |= RTF_EXPIRES;
575 }
576 dst_release(&rt->dst);
577 }
578 return 0;
579 }
580 #endif
581
582 #define BACKTRACK(__net, saddr) \
583 do { \
584 if (rt == __net->ipv6.ip6_null_entry) { \
585 struct fib6_node *pn; \
586 while (1) { \
587 if (fn->fn_flags & RTN_TL_ROOT) \
588 goto out; \
589 pn = fn->parent; \
590 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
591 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
592 else \
593 fn = pn; \
594 if (fn->fn_flags & RTN_RTINFO) \
595 goto restart; \
596 } \
597 } \
598 } while(0)
599
600 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
601 struct fib6_table *table,
602 struct flowi *fl, int flags)
603 {
604 struct fib6_node *fn;
605 struct rt6_info *rt;
606
607 read_lock_bh(&table->tb6_lock);
608 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
609 restart:
610 rt = fn->leaf;
611 rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
612 BACKTRACK(net, &fl->fl6_src);
613 out:
614 dst_use(&rt->dst, jiffies);
615 read_unlock_bh(&table->tb6_lock);
616 return rt;
617
618 }
619
620 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
621 const struct in6_addr *saddr, int oif, int strict)
622 {
623 struct flowi fl = {
624 .oif = oif,
625 .fl6_dst = *daddr,
626 };
627 struct dst_entry *dst;
628 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
629
630 if (saddr) {
631 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
632 flags |= RT6_LOOKUP_F_HAS_SADDR;
633 }
634
635 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
636 if (dst->error == 0)
637 return (struct rt6_info *) dst;
638
639 dst_release(dst);
640
641 return NULL;
642 }
643
644 EXPORT_SYMBOL(rt6_lookup);
645
646 /* ip6_ins_rt is called with FREE table->tb6_lock.
647 It takes new route entry, the addition fails by any reason the
648 route is freed. In any case, if caller does not hold it, it may
649 be destroyed.
650 */
651
652 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
653 {
654 int err;
655 struct fib6_table *table;
656
657 table = rt->rt6i_table;
658 write_lock_bh(&table->tb6_lock);
659 err = fib6_add(&table->tb6_root, rt, info);
660 write_unlock_bh(&table->tb6_lock);
661
662 return err;
663 }
664
665 int ip6_ins_rt(struct rt6_info *rt)
666 {
667 struct nl_info info = {
668 .nl_net = dev_net(rt->rt6i_dev),
669 };
670 return __ip6_ins_rt(rt, &info);
671 }
672
673 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
674 struct in6_addr *saddr)
675 {
676 struct rt6_info *rt;
677
678 /*
679 * Clone the route.
680 */
681
682 rt = ip6_rt_copy(ort);
683
684 if (rt) {
685 struct neighbour *neigh;
686 int attempts = !in_softirq();
687
688 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
689 if (rt->rt6i_dst.plen != 128 &&
690 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
691 rt->rt6i_flags |= RTF_ANYCAST;
692 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
693 }
694
695 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
696 rt->rt6i_dst.plen = 128;
697 rt->rt6i_flags |= RTF_CACHE;
698 rt->dst.flags |= DST_HOST;
699
700 #ifdef CONFIG_IPV6_SUBTREES
701 if (rt->rt6i_src.plen && saddr) {
702 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
703 rt->rt6i_src.plen = 128;
704 }
705 #endif
706
707 retry:
708 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
709 if (IS_ERR(neigh)) {
710 struct net *net = dev_net(rt->rt6i_dev);
711 int saved_rt_min_interval =
712 net->ipv6.sysctl.ip6_rt_gc_min_interval;
713 int saved_rt_elasticity =
714 net->ipv6.sysctl.ip6_rt_gc_elasticity;
715
716 if (attempts-- > 0) {
717 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
718 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
719
720 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
721
722 net->ipv6.sysctl.ip6_rt_gc_elasticity =
723 saved_rt_elasticity;
724 net->ipv6.sysctl.ip6_rt_gc_min_interval =
725 saved_rt_min_interval;
726 goto retry;
727 }
728
729 if (net_ratelimit())
730 printk(KERN_WARNING
731 "ipv6: Neighbour table overflow.\n");
732 dst_free(&rt->dst);
733 return NULL;
734 }
735 rt->rt6i_nexthop = neigh;
736
737 }
738
739 return rt;
740 }
741
742 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
743 {
744 struct rt6_info *rt = ip6_rt_copy(ort);
745 if (rt) {
746 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
747 rt->rt6i_dst.plen = 128;
748 rt->rt6i_flags |= RTF_CACHE;
749 rt->dst.flags |= DST_HOST;
750 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
751 }
752 return rt;
753 }
754
755 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
756 struct flowi *fl, int flags)
757 {
758 struct fib6_node *fn;
759 struct rt6_info *rt, *nrt;
760 int strict = 0;
761 int attempts = 3;
762 int err;
763 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
764
765 strict |= flags & RT6_LOOKUP_F_IFACE;
766
767 relookup:
768 read_lock_bh(&table->tb6_lock);
769
770 restart_2:
771 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
772
773 restart:
774 rt = rt6_select(fn, oif, strict | reachable);
775
776 BACKTRACK(net, &fl->fl6_src);
777 if (rt == net->ipv6.ip6_null_entry ||
778 rt->rt6i_flags & RTF_CACHE)
779 goto out;
780
781 dst_hold(&rt->dst);
782 read_unlock_bh(&table->tb6_lock);
783
784 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
785 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
786 else
787 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
788
789 dst_release(&rt->dst);
790 rt = nrt ? : net->ipv6.ip6_null_entry;
791
792 dst_hold(&rt->dst);
793 if (nrt) {
794 err = ip6_ins_rt(nrt);
795 if (!err)
796 goto out2;
797 }
798
799 if (--attempts <= 0)
800 goto out2;
801
802 /*
803 * Race condition! In the gap, when table->tb6_lock was
804 * released someone could insert this route. Relookup.
805 */
806 dst_release(&rt->dst);
807 goto relookup;
808
809 out:
810 if (reachable) {
811 reachable = 0;
812 goto restart_2;
813 }
814 dst_hold(&rt->dst);
815 read_unlock_bh(&table->tb6_lock);
816 out2:
817 rt->dst.lastuse = jiffies;
818 rt->dst.__use++;
819
820 return rt;
821 }
822
823 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
824 struct flowi *fl, int flags)
825 {
826 return ip6_pol_route(net, table, fl->iif, fl, flags);
827 }
828
829 void ip6_route_input(struct sk_buff *skb)
830 {
831 struct ipv6hdr *iph = ipv6_hdr(skb);
832 struct net *net = dev_net(skb->dev);
833 int flags = RT6_LOOKUP_F_HAS_SADDR;
834 struct flowi fl = {
835 .iif = skb->dev->ifindex,
836 .fl6_dst = iph->daddr,
837 .fl6_src = iph->saddr,
838 .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
839 .mark = skb->mark,
840 .proto = iph->nexthdr,
841 };
842
843 if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
844 flags |= RT6_LOOKUP_F_IFACE;
845
846 skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
847 }
848
849 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
850 struct flowi *fl, int flags)
851 {
852 return ip6_pol_route(net, table, fl->oif, fl, flags);
853 }
854
855 struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
856 struct flowi *fl)
857 {
858 int flags = 0;
859
860 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
861 flags |= RT6_LOOKUP_F_IFACE;
862
863 if (!ipv6_addr_any(&fl->fl6_src))
864 flags |= RT6_LOOKUP_F_HAS_SADDR;
865 else if (sk)
866 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
867
868 return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
869 }
870
871 EXPORT_SYMBOL(ip6_route_output);
872
873 struct dst_entry *ip6_dst_blackhole(struct net *net, struct dst_entry *dst_orig)
874 {
875 struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
876 struct rt6_info *ort = (struct rt6_info *) dst_orig;
877 struct dst_entry *new = NULL;
878
879 if (rt) {
880 new = &rt->dst;
881
882 new->__use = 1;
883 new->input = dst_discard;
884 new->output = dst_discard;
885
886 dst_copy_metrics(new, &ort->dst);
887 new->dev = ort->dst.dev;
888 if (new->dev)
889 dev_hold(new->dev);
890 rt->rt6i_idev = ort->rt6i_idev;
891 if (rt->rt6i_idev)
892 in6_dev_hold(rt->rt6i_idev);
893 rt->rt6i_expires = 0;
894
895 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
896 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
897 rt->rt6i_metric = 0;
898
899 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
900 #ifdef CONFIG_IPV6_SUBTREES
901 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
902 #endif
903
904 dst_free(new);
905 }
906
907 dst_release(dst_orig);
908 return new ? new : ERR_PTR(-ENOMEM);
909 }
910 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
911
912 /*
913 * Destination cache support functions
914 */
915
916 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
917 {
918 struct rt6_info *rt;
919
920 rt = (struct rt6_info *) dst;
921
922 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
923 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
924 if (!rt->rt6i_peer)
925 rt6_bind_peer(rt, 0);
926 rt->rt6i_peer_genid = rt6_peer_genid();
927 }
928 return dst;
929 }
930 return NULL;
931 }
932
933 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
934 {
935 struct rt6_info *rt = (struct rt6_info *) dst;
936
937 if (rt) {
938 if (rt->rt6i_flags & RTF_CACHE) {
939 if (rt6_check_expired(rt)) {
940 ip6_del_rt(rt);
941 dst = NULL;
942 }
943 } else {
944 dst_release(dst);
945 dst = NULL;
946 }
947 }
948 return dst;
949 }
950
951 static void ip6_link_failure(struct sk_buff *skb)
952 {
953 struct rt6_info *rt;
954
955 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
956
957 rt = (struct rt6_info *) skb_dst(skb);
958 if (rt) {
959 if (rt->rt6i_flags&RTF_CACHE) {
960 dst_set_expires(&rt->dst, 0);
961 rt->rt6i_flags |= RTF_EXPIRES;
962 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
963 rt->rt6i_node->fn_sernum = -1;
964 }
965 }
966
967 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
968 {
969 struct rt6_info *rt6 = (struct rt6_info*)dst;
970
971 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
972 rt6->rt6i_flags |= RTF_MODIFIED;
973 if (mtu < IPV6_MIN_MTU) {
974 u32 features = dst_metric(dst, RTAX_FEATURES);
975 mtu = IPV6_MIN_MTU;
976 features |= RTAX_FEATURE_ALLFRAG;
977 dst_metric_set(dst, RTAX_FEATURES, features);
978 }
979 dst_metric_set(dst, RTAX_MTU, mtu);
980 }
981 }
982
983 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
984 {
985 struct net_device *dev = dst->dev;
986 unsigned int mtu = dst_mtu(dst);
987 struct net *net = dev_net(dev);
988
989 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
990
991 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
992 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
993
994 /*
995 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
996 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
997 * IPV6_MAXPLEN is also valid and means: "any MSS,
998 * rely only on pmtu discovery"
999 */
1000 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1001 mtu = IPV6_MAXPLEN;
1002 return mtu;
1003 }
1004
1005 static unsigned int ip6_default_mtu(const struct dst_entry *dst)
1006 {
1007 unsigned int mtu = IPV6_MIN_MTU;
1008 struct inet6_dev *idev;
1009
1010 rcu_read_lock();
1011 idev = __in6_dev_get(dst->dev);
1012 if (idev)
1013 mtu = idev->cnf.mtu6;
1014 rcu_read_unlock();
1015
1016 return mtu;
1017 }
1018
1019 static struct dst_entry *icmp6_dst_gc_list;
1020 static DEFINE_SPINLOCK(icmp6_dst_lock);
1021
1022 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1023 struct neighbour *neigh,
1024 const struct in6_addr *addr)
1025 {
1026 struct rt6_info *rt;
1027 struct inet6_dev *idev = in6_dev_get(dev);
1028 struct net *net = dev_net(dev);
1029
1030 if (unlikely(idev == NULL))
1031 return NULL;
1032
1033 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1034 if (unlikely(rt == NULL)) {
1035 in6_dev_put(idev);
1036 goto out;
1037 }
1038
1039 dev_hold(dev);
1040 if (neigh)
1041 neigh_hold(neigh);
1042 else {
1043 neigh = ndisc_get_neigh(dev, addr);
1044 if (IS_ERR(neigh))
1045 neigh = NULL;
1046 }
1047
1048 rt->rt6i_dev = dev;
1049 rt->rt6i_idev = idev;
1050 rt->rt6i_nexthop = neigh;
1051 atomic_set(&rt->dst.__refcnt, 1);
1052 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1053 rt->dst.output = ip6_output;
1054
1055 #if 0 /* there's no chance to use these for ndisc */
1056 rt->dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
1057 ? DST_HOST
1058 : 0;
1059 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1060 rt->rt6i_dst.plen = 128;
1061 #endif
1062
1063 spin_lock_bh(&icmp6_dst_lock);
1064 rt->dst.next = icmp6_dst_gc_list;
1065 icmp6_dst_gc_list = &rt->dst;
1066 spin_unlock_bh(&icmp6_dst_lock);
1067
1068 fib6_force_start_gc(net);
1069
1070 out:
1071 return &rt->dst;
1072 }
1073
1074 int icmp6_dst_gc(void)
1075 {
1076 struct dst_entry *dst, **pprev;
1077 int more = 0;
1078
1079 spin_lock_bh(&icmp6_dst_lock);
1080 pprev = &icmp6_dst_gc_list;
1081
1082 while ((dst = *pprev) != NULL) {
1083 if (!atomic_read(&dst->__refcnt)) {
1084 *pprev = dst->next;
1085 dst_free(dst);
1086 } else {
1087 pprev = &dst->next;
1088 ++more;
1089 }
1090 }
1091
1092 spin_unlock_bh(&icmp6_dst_lock);
1093
1094 return more;
1095 }
1096
1097 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1098 void *arg)
1099 {
1100 struct dst_entry *dst, **pprev;
1101
1102 spin_lock_bh(&icmp6_dst_lock);
1103 pprev = &icmp6_dst_gc_list;
1104 while ((dst = *pprev) != NULL) {
1105 struct rt6_info *rt = (struct rt6_info *) dst;
1106 if (func(rt, arg)) {
1107 *pprev = dst->next;
1108 dst_free(dst);
1109 } else {
1110 pprev = &dst->next;
1111 }
1112 }
1113 spin_unlock_bh(&icmp6_dst_lock);
1114 }
1115
1116 static int ip6_dst_gc(struct dst_ops *ops)
1117 {
1118 unsigned long now = jiffies;
1119 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1120 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1121 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1122 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1123 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1124 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1125 int entries;
1126
1127 entries = dst_entries_get_fast(ops);
1128 if (time_after(rt_last_gc + rt_min_interval, now) &&
1129 entries <= rt_max_size)
1130 goto out;
1131
1132 net->ipv6.ip6_rt_gc_expire++;
1133 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1134 net->ipv6.ip6_rt_last_gc = now;
1135 entries = dst_entries_get_slow(ops);
1136 if (entries < ops->gc_thresh)
1137 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1138 out:
1139 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1140 return entries > rt_max_size;
1141 }
1142
1143 /* Clean host part of a prefix. Not necessary in radix tree,
1144 but results in cleaner routing tables.
1145
1146 Remove it only when all the things will work!
1147 */
1148
1149 int ip6_dst_hoplimit(struct dst_entry *dst)
1150 {
1151 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1152 if (hoplimit == 0) {
1153 struct net_device *dev = dst->dev;
1154 struct inet6_dev *idev;
1155
1156 rcu_read_lock();
1157 idev = __in6_dev_get(dev);
1158 if (idev)
1159 hoplimit = idev->cnf.hop_limit;
1160 else
1161 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1162 rcu_read_unlock();
1163 }
1164 return hoplimit;
1165 }
1166 EXPORT_SYMBOL(ip6_dst_hoplimit);
1167
1168 /*
1169 *
1170 */
1171
1172 int ip6_route_add(struct fib6_config *cfg)
1173 {
1174 int err;
1175 struct net *net = cfg->fc_nlinfo.nl_net;
1176 struct rt6_info *rt = NULL;
1177 struct net_device *dev = NULL;
1178 struct inet6_dev *idev = NULL;
1179 struct fib6_table *table;
1180 int addr_type;
1181
1182 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1183 return -EINVAL;
1184 #ifndef CONFIG_IPV6_SUBTREES
1185 if (cfg->fc_src_len)
1186 return -EINVAL;
1187 #endif
1188 if (cfg->fc_ifindex) {
1189 err = -ENODEV;
1190 dev = dev_get_by_index(net, cfg->fc_ifindex);
1191 if (!dev)
1192 goto out;
1193 idev = in6_dev_get(dev);
1194 if (!idev)
1195 goto out;
1196 }
1197
1198 if (cfg->fc_metric == 0)
1199 cfg->fc_metric = IP6_RT_PRIO_USER;
1200
1201 table = fib6_new_table(net, cfg->fc_table);
1202 if (table == NULL) {
1203 err = -ENOBUFS;
1204 goto out;
1205 }
1206
1207 rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1208
1209 if (rt == NULL) {
1210 err = -ENOMEM;
1211 goto out;
1212 }
1213
1214 rt->dst.obsolete = -1;
1215 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1216 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1217 0;
1218
1219 if (cfg->fc_protocol == RTPROT_UNSPEC)
1220 cfg->fc_protocol = RTPROT_BOOT;
1221 rt->rt6i_protocol = cfg->fc_protocol;
1222
1223 addr_type = ipv6_addr_type(&cfg->fc_dst);
1224
1225 if (addr_type & IPV6_ADDR_MULTICAST)
1226 rt->dst.input = ip6_mc_input;
1227 else if (cfg->fc_flags & RTF_LOCAL)
1228 rt->dst.input = ip6_input;
1229 else
1230 rt->dst.input = ip6_forward;
1231
1232 rt->dst.output = ip6_output;
1233
1234 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1235 rt->rt6i_dst.plen = cfg->fc_dst_len;
1236 if (rt->rt6i_dst.plen == 128)
1237 rt->dst.flags = DST_HOST;
1238
1239 #ifdef CONFIG_IPV6_SUBTREES
1240 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1241 rt->rt6i_src.plen = cfg->fc_src_len;
1242 #endif
1243
1244 rt->rt6i_metric = cfg->fc_metric;
1245
1246 /* We cannot add true routes via loopback here,
1247 they would result in kernel looping; promote them to reject routes
1248 */
1249 if ((cfg->fc_flags & RTF_REJECT) ||
1250 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1251 && !(cfg->fc_flags&RTF_LOCAL))) {
1252 /* hold loopback dev/idev if we haven't done so. */
1253 if (dev != net->loopback_dev) {
1254 if (dev) {
1255 dev_put(dev);
1256 in6_dev_put(idev);
1257 }
1258 dev = net->loopback_dev;
1259 dev_hold(dev);
1260 idev = in6_dev_get(dev);
1261 if (!idev) {
1262 err = -ENODEV;
1263 goto out;
1264 }
1265 }
1266 rt->dst.output = ip6_pkt_discard_out;
1267 rt->dst.input = ip6_pkt_discard;
1268 rt->dst.error = -ENETUNREACH;
1269 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1270 goto install_route;
1271 }
1272
1273 if (cfg->fc_flags & RTF_GATEWAY) {
1274 struct in6_addr *gw_addr;
1275 int gwa_type;
1276
1277 gw_addr = &cfg->fc_gateway;
1278 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1279 gwa_type = ipv6_addr_type(gw_addr);
1280
1281 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1282 struct rt6_info *grt;
1283
1284 /* IPv6 strictly inhibits using not link-local
1285 addresses as nexthop address.
1286 Otherwise, router will not able to send redirects.
1287 It is very good, but in some (rare!) circumstances
1288 (SIT, PtP, NBMA NOARP links) it is handy to allow
1289 some exceptions. --ANK
1290 */
1291 err = -EINVAL;
1292 if (!(gwa_type&IPV6_ADDR_UNICAST))
1293 goto out;
1294
1295 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1296
1297 err = -EHOSTUNREACH;
1298 if (grt == NULL)
1299 goto out;
1300 if (dev) {
1301 if (dev != grt->rt6i_dev) {
1302 dst_release(&grt->dst);
1303 goto out;
1304 }
1305 } else {
1306 dev = grt->rt6i_dev;
1307 idev = grt->rt6i_idev;
1308 dev_hold(dev);
1309 in6_dev_hold(grt->rt6i_idev);
1310 }
1311 if (!(grt->rt6i_flags&RTF_GATEWAY))
1312 err = 0;
1313 dst_release(&grt->dst);
1314
1315 if (err)
1316 goto out;
1317 }
1318 err = -EINVAL;
1319 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1320 goto out;
1321 }
1322
1323 err = -ENODEV;
1324 if (dev == NULL)
1325 goto out;
1326
1327 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1328 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1329 if (IS_ERR(rt->rt6i_nexthop)) {
1330 err = PTR_ERR(rt->rt6i_nexthop);
1331 rt->rt6i_nexthop = NULL;
1332 goto out;
1333 }
1334 }
1335
1336 rt->rt6i_flags = cfg->fc_flags;
1337
1338 install_route:
1339 if (cfg->fc_mx) {
1340 struct nlattr *nla;
1341 int remaining;
1342
1343 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1344 int type = nla_type(nla);
1345
1346 if (type) {
1347 if (type > RTAX_MAX) {
1348 err = -EINVAL;
1349 goto out;
1350 }
1351
1352 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1353 }
1354 }
1355 }
1356
1357 rt->dst.dev = dev;
1358 rt->rt6i_idev = idev;
1359 rt->rt6i_table = table;
1360
1361 cfg->fc_nlinfo.nl_net = dev_net(dev);
1362
1363 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1364
1365 out:
1366 if (dev)
1367 dev_put(dev);
1368 if (idev)
1369 in6_dev_put(idev);
1370 if (rt)
1371 dst_free(&rt->dst);
1372 return err;
1373 }
1374
1375 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1376 {
1377 int err;
1378 struct fib6_table *table;
1379 struct net *net = dev_net(rt->rt6i_dev);
1380
1381 if (rt == net->ipv6.ip6_null_entry)
1382 return -ENOENT;
1383
1384 table = rt->rt6i_table;
1385 write_lock_bh(&table->tb6_lock);
1386
1387 err = fib6_del(rt, info);
1388 dst_release(&rt->dst);
1389
1390 write_unlock_bh(&table->tb6_lock);
1391
1392 return err;
1393 }
1394
1395 int ip6_del_rt(struct rt6_info *rt)
1396 {
1397 struct nl_info info = {
1398 .nl_net = dev_net(rt->rt6i_dev),
1399 };
1400 return __ip6_del_rt(rt, &info);
1401 }
1402
1403 static int ip6_route_del(struct fib6_config *cfg)
1404 {
1405 struct fib6_table *table;
1406 struct fib6_node *fn;
1407 struct rt6_info *rt;
1408 int err = -ESRCH;
1409
1410 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1411 if (table == NULL)
1412 return err;
1413
1414 read_lock_bh(&table->tb6_lock);
1415
1416 fn = fib6_locate(&table->tb6_root,
1417 &cfg->fc_dst, cfg->fc_dst_len,
1418 &cfg->fc_src, cfg->fc_src_len);
1419
1420 if (fn) {
1421 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1422 if (cfg->fc_ifindex &&
1423 (rt->rt6i_dev == NULL ||
1424 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1425 continue;
1426 if (cfg->fc_flags & RTF_GATEWAY &&
1427 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1428 continue;
1429 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1430 continue;
1431 dst_hold(&rt->dst);
1432 read_unlock_bh(&table->tb6_lock);
1433
1434 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1435 }
1436 }
1437 read_unlock_bh(&table->tb6_lock);
1438
1439 return err;
1440 }
1441
1442 /*
1443 * Handle redirects
1444 */
1445 struct ip6rd_flowi {
1446 struct flowi fl;
1447 struct in6_addr gateway;
1448 };
1449
1450 static struct rt6_info *__ip6_route_redirect(struct net *net,
1451 struct fib6_table *table,
1452 struct flowi *fl,
1453 int flags)
1454 {
1455 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1456 struct rt6_info *rt;
1457 struct fib6_node *fn;
1458
1459 /*
1460 * Get the "current" route for this destination and
1461 * check if the redirect has come from approriate router.
1462 *
1463 * RFC 2461 specifies that redirects should only be
1464 * accepted if they come from the nexthop to the target.
1465 * Due to the way the routes are chosen, this notion
1466 * is a bit fuzzy and one might need to check all possible
1467 * routes.
1468 */
1469
1470 read_lock_bh(&table->tb6_lock);
1471 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1472 restart:
1473 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1474 /*
1475 * Current route is on-link; redirect is always invalid.
1476 *
1477 * Seems, previous statement is not true. It could
1478 * be node, which looks for us as on-link (f.e. proxy ndisc)
1479 * But then router serving it might decide, that we should
1480 * know truth 8)8) --ANK (980726).
1481 */
1482 if (rt6_check_expired(rt))
1483 continue;
1484 if (!(rt->rt6i_flags & RTF_GATEWAY))
1485 continue;
1486 if (fl->oif != rt->rt6i_dev->ifindex)
1487 continue;
1488 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1489 continue;
1490 break;
1491 }
1492
1493 if (!rt)
1494 rt = net->ipv6.ip6_null_entry;
1495 BACKTRACK(net, &fl->fl6_src);
1496 out:
1497 dst_hold(&rt->dst);
1498
1499 read_unlock_bh(&table->tb6_lock);
1500
1501 return rt;
1502 };
1503
1504 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1505 struct in6_addr *src,
1506 struct in6_addr *gateway,
1507 struct net_device *dev)
1508 {
1509 int flags = RT6_LOOKUP_F_HAS_SADDR;
1510 struct net *net = dev_net(dev);
1511 struct ip6rd_flowi rdfl = {
1512 .fl = {
1513 .oif = dev->ifindex,
1514 .fl6_dst = *dest,
1515 .fl6_src = *src,
1516 },
1517 };
1518
1519 ipv6_addr_copy(&rdfl.gateway, gateway);
1520
1521 if (rt6_need_strict(dest))
1522 flags |= RT6_LOOKUP_F_IFACE;
1523
1524 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1525 flags, __ip6_route_redirect);
1526 }
1527
1528 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1529 struct in6_addr *saddr,
1530 struct neighbour *neigh, u8 *lladdr, int on_link)
1531 {
1532 struct rt6_info *rt, *nrt = NULL;
1533 struct netevent_redirect netevent;
1534 struct net *net = dev_net(neigh->dev);
1535
1536 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1537
1538 if (rt == net->ipv6.ip6_null_entry) {
1539 if (net_ratelimit())
1540 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1541 "for redirect target\n");
1542 goto out;
1543 }
1544
1545 /*
1546 * We have finally decided to accept it.
1547 */
1548
1549 neigh_update(neigh, lladdr, NUD_STALE,
1550 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1551 NEIGH_UPDATE_F_OVERRIDE|
1552 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1553 NEIGH_UPDATE_F_ISROUTER))
1554 );
1555
1556 /*
1557 * Redirect received -> path was valid.
1558 * Look, redirects are sent only in response to data packets,
1559 * so that this nexthop apparently is reachable. --ANK
1560 */
1561 dst_confirm(&rt->dst);
1562
1563 /* Duplicate redirect: silently ignore. */
1564 if (neigh == rt->dst.neighbour)
1565 goto out;
1566
1567 nrt = ip6_rt_copy(rt);
1568 if (nrt == NULL)
1569 goto out;
1570
1571 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1572 if (on_link)
1573 nrt->rt6i_flags &= ~RTF_GATEWAY;
1574
1575 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1576 nrt->rt6i_dst.plen = 128;
1577 nrt->dst.flags |= DST_HOST;
1578
1579 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1580 nrt->rt6i_nexthop = neigh_clone(neigh);
1581
1582 if (ip6_ins_rt(nrt))
1583 goto out;
1584
1585 netevent.old = &rt->dst;
1586 netevent.new = &nrt->dst;
1587 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1588
1589 if (rt->rt6i_flags&RTF_CACHE) {
1590 ip6_del_rt(rt);
1591 return;
1592 }
1593
1594 out:
1595 dst_release(&rt->dst);
1596 }
1597
1598 /*
1599 * Handle ICMP "packet too big" messages
1600 * i.e. Path MTU discovery
1601 */
1602
1603 static void rt6_do_pmtu_disc(struct in6_addr *daddr, struct in6_addr *saddr,
1604 struct net *net, u32 pmtu, int ifindex)
1605 {
1606 struct rt6_info *rt, *nrt;
1607 int allfrag = 0;
1608 again:
1609 rt = rt6_lookup(net, daddr, saddr, ifindex, 0);
1610 if (rt == NULL)
1611 return;
1612
1613 if (rt6_check_expired(rt)) {
1614 ip6_del_rt(rt);
1615 goto again;
1616 }
1617
1618 if (pmtu >= dst_mtu(&rt->dst))
1619 goto out;
1620
1621 if (pmtu < IPV6_MIN_MTU) {
1622 /*
1623 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1624 * MTU (1280) and a fragment header should always be included
1625 * after a node receiving Too Big message reporting PMTU is
1626 * less than the IPv6 Minimum Link MTU.
1627 */
1628 pmtu = IPV6_MIN_MTU;
1629 allfrag = 1;
1630 }
1631
1632 /* New mtu received -> path was valid.
1633 They are sent only in response to data packets,
1634 so that this nexthop apparently is reachable. --ANK
1635 */
1636 dst_confirm(&rt->dst);
1637
1638 /* Host route. If it is static, it would be better
1639 not to override it, but add new one, so that
1640 when cache entry will expire old pmtu
1641 would return automatically.
1642 */
1643 if (rt->rt6i_flags & RTF_CACHE) {
1644 dst_metric_set(&rt->dst, RTAX_MTU, pmtu);
1645 if (allfrag) {
1646 u32 features = dst_metric(&rt->dst, RTAX_FEATURES);
1647 features |= RTAX_FEATURE_ALLFRAG;
1648 dst_metric_set(&rt->dst, RTAX_FEATURES, features);
1649 }
1650 dst_set_expires(&rt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1651 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1652 goto out;
1653 }
1654
1655 /* Network route.
1656 Two cases are possible:
1657 1. It is connected route. Action: COW
1658 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1659 */
1660 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1661 nrt = rt6_alloc_cow(rt, daddr, saddr);
1662 else
1663 nrt = rt6_alloc_clone(rt, daddr);
1664
1665 if (nrt) {
1666 dst_metric_set(&nrt->dst, RTAX_MTU, pmtu);
1667 if (allfrag) {
1668 u32 features = dst_metric(&nrt->dst, RTAX_FEATURES);
1669 features |= RTAX_FEATURE_ALLFRAG;
1670 dst_metric_set(&nrt->dst, RTAX_FEATURES, features);
1671 }
1672
1673 /* According to RFC 1981, detecting PMTU increase shouldn't be
1674 * happened within 5 mins, the recommended timer is 10 mins.
1675 * Here this route expiration time is set to ip6_rt_mtu_expires
1676 * which is 10 mins. After 10 mins the decreased pmtu is expired
1677 * and detecting PMTU increase will be automatically happened.
1678 */
1679 dst_set_expires(&nrt->dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1680 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1681
1682 ip6_ins_rt(nrt);
1683 }
1684 out:
1685 dst_release(&rt->dst);
1686 }
1687
1688 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1689 struct net_device *dev, u32 pmtu)
1690 {
1691 struct net *net = dev_net(dev);
1692
1693 /*
1694 * RFC 1981 states that a node "MUST reduce the size of the packets it
1695 * is sending along the path" that caused the Packet Too Big message.
1696 * Since it's not possible in the general case to determine which
1697 * interface was used to send the original packet, we update the MTU
1698 * on the interface that will be used to send future packets. We also
1699 * update the MTU on the interface that received the Packet Too Big in
1700 * case the original packet was forced out that interface with
1701 * SO_BINDTODEVICE or similar. This is the next best thing to the
1702 * correct behaviour, which would be to update the MTU on all
1703 * interfaces.
1704 */
1705 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0);
1706 rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex);
1707 }
1708
1709 /*
1710 * Misc support functions
1711 */
1712
1713 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1714 {
1715 struct net *net = dev_net(ort->rt6i_dev);
1716 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1717
1718 if (rt) {
1719 rt->dst.input = ort->dst.input;
1720 rt->dst.output = ort->dst.output;
1721
1722 dst_copy_metrics(&rt->dst, &ort->dst);
1723 rt->dst.error = ort->dst.error;
1724 rt->dst.dev = ort->dst.dev;
1725 if (rt->dst.dev)
1726 dev_hold(rt->dst.dev);
1727 rt->rt6i_idev = ort->rt6i_idev;
1728 if (rt->rt6i_idev)
1729 in6_dev_hold(rt->rt6i_idev);
1730 rt->dst.lastuse = jiffies;
1731 rt->rt6i_expires = 0;
1732
1733 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1734 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1735 rt->rt6i_metric = 0;
1736
1737 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1738 #ifdef CONFIG_IPV6_SUBTREES
1739 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1740 #endif
1741 rt->rt6i_table = ort->rt6i_table;
1742 }
1743 return rt;
1744 }
1745
1746 #ifdef CONFIG_IPV6_ROUTE_INFO
1747 static struct rt6_info *rt6_get_route_info(struct net *net,
1748 struct in6_addr *prefix, int prefixlen,
1749 struct in6_addr *gwaddr, int ifindex)
1750 {
1751 struct fib6_node *fn;
1752 struct rt6_info *rt = NULL;
1753 struct fib6_table *table;
1754
1755 table = fib6_get_table(net, RT6_TABLE_INFO);
1756 if (table == NULL)
1757 return NULL;
1758
1759 write_lock_bh(&table->tb6_lock);
1760 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1761 if (!fn)
1762 goto out;
1763
1764 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1765 if (rt->rt6i_dev->ifindex != ifindex)
1766 continue;
1767 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1768 continue;
1769 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1770 continue;
1771 dst_hold(&rt->dst);
1772 break;
1773 }
1774 out:
1775 write_unlock_bh(&table->tb6_lock);
1776 return rt;
1777 }
1778
1779 static struct rt6_info *rt6_add_route_info(struct net *net,
1780 struct in6_addr *prefix, int prefixlen,
1781 struct in6_addr *gwaddr, int ifindex,
1782 unsigned pref)
1783 {
1784 struct fib6_config cfg = {
1785 .fc_table = RT6_TABLE_INFO,
1786 .fc_metric = IP6_RT_PRIO_USER,
1787 .fc_ifindex = ifindex,
1788 .fc_dst_len = prefixlen,
1789 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1790 RTF_UP | RTF_PREF(pref),
1791 .fc_nlinfo.pid = 0,
1792 .fc_nlinfo.nlh = NULL,
1793 .fc_nlinfo.nl_net = net,
1794 };
1795
1796 ipv6_addr_copy(&cfg.fc_dst, prefix);
1797 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1798
1799 /* We should treat it as a default route if prefix length is 0. */
1800 if (!prefixlen)
1801 cfg.fc_flags |= RTF_DEFAULT;
1802
1803 ip6_route_add(&cfg);
1804
1805 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1806 }
1807 #endif
1808
1809 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1810 {
1811 struct rt6_info *rt;
1812 struct fib6_table *table;
1813
1814 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1815 if (table == NULL)
1816 return NULL;
1817
1818 write_lock_bh(&table->tb6_lock);
1819 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1820 if (dev == rt->rt6i_dev &&
1821 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1822 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1823 break;
1824 }
1825 if (rt)
1826 dst_hold(&rt->dst);
1827 write_unlock_bh(&table->tb6_lock);
1828 return rt;
1829 }
1830
1831 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1832 struct net_device *dev,
1833 unsigned int pref)
1834 {
1835 struct fib6_config cfg = {
1836 .fc_table = RT6_TABLE_DFLT,
1837 .fc_metric = IP6_RT_PRIO_USER,
1838 .fc_ifindex = dev->ifindex,
1839 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1840 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1841 .fc_nlinfo.pid = 0,
1842 .fc_nlinfo.nlh = NULL,
1843 .fc_nlinfo.nl_net = dev_net(dev),
1844 };
1845
1846 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1847
1848 ip6_route_add(&cfg);
1849
1850 return rt6_get_dflt_router(gwaddr, dev);
1851 }
1852
1853 void rt6_purge_dflt_routers(struct net *net)
1854 {
1855 struct rt6_info *rt;
1856 struct fib6_table *table;
1857
1858 /* NOTE: Keep consistent with rt6_get_dflt_router */
1859 table = fib6_get_table(net, RT6_TABLE_DFLT);
1860 if (table == NULL)
1861 return;
1862
1863 restart:
1864 read_lock_bh(&table->tb6_lock);
1865 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1866 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1867 dst_hold(&rt->dst);
1868 read_unlock_bh(&table->tb6_lock);
1869 ip6_del_rt(rt);
1870 goto restart;
1871 }
1872 }
1873 read_unlock_bh(&table->tb6_lock);
1874 }
1875
1876 static void rtmsg_to_fib6_config(struct net *net,
1877 struct in6_rtmsg *rtmsg,
1878 struct fib6_config *cfg)
1879 {
1880 memset(cfg, 0, sizeof(*cfg));
1881
1882 cfg->fc_table = RT6_TABLE_MAIN;
1883 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1884 cfg->fc_metric = rtmsg->rtmsg_metric;
1885 cfg->fc_expires = rtmsg->rtmsg_info;
1886 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1887 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1888 cfg->fc_flags = rtmsg->rtmsg_flags;
1889
1890 cfg->fc_nlinfo.nl_net = net;
1891
1892 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1893 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1894 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1895 }
1896
1897 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1898 {
1899 struct fib6_config cfg;
1900 struct in6_rtmsg rtmsg;
1901 int err;
1902
1903 switch(cmd) {
1904 case SIOCADDRT: /* Add a route */
1905 case SIOCDELRT: /* Delete a route */
1906 if (!capable(CAP_NET_ADMIN))
1907 return -EPERM;
1908 err = copy_from_user(&rtmsg, arg,
1909 sizeof(struct in6_rtmsg));
1910 if (err)
1911 return -EFAULT;
1912
1913 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1914
1915 rtnl_lock();
1916 switch (cmd) {
1917 case SIOCADDRT:
1918 err = ip6_route_add(&cfg);
1919 break;
1920 case SIOCDELRT:
1921 err = ip6_route_del(&cfg);
1922 break;
1923 default:
1924 err = -EINVAL;
1925 }
1926 rtnl_unlock();
1927
1928 return err;
1929 }
1930
1931 return -EINVAL;
1932 }
1933
1934 /*
1935 * Drop the packet on the floor
1936 */
1937
1938 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1939 {
1940 int type;
1941 struct dst_entry *dst = skb_dst(skb);
1942 switch (ipstats_mib_noroutes) {
1943 case IPSTATS_MIB_INNOROUTES:
1944 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1945 if (type == IPV6_ADDR_ANY) {
1946 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1947 IPSTATS_MIB_INADDRERRORS);
1948 break;
1949 }
1950 /* FALLTHROUGH */
1951 case IPSTATS_MIB_OUTNOROUTES:
1952 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
1953 ipstats_mib_noroutes);
1954 break;
1955 }
1956 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
1957 kfree_skb(skb);
1958 return 0;
1959 }
1960
1961 static int ip6_pkt_discard(struct sk_buff *skb)
1962 {
1963 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1964 }
1965
1966 static int ip6_pkt_discard_out(struct sk_buff *skb)
1967 {
1968 skb->dev = skb_dst(skb)->dev;
1969 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1970 }
1971
1972 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1973
1974 static int ip6_pkt_prohibit(struct sk_buff *skb)
1975 {
1976 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1977 }
1978
1979 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1980 {
1981 skb->dev = skb_dst(skb)->dev;
1982 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1983 }
1984
1985 #endif
1986
1987 /*
1988 * Allocate a dst for local (unicast / anycast) address.
1989 */
1990
1991 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1992 const struct in6_addr *addr,
1993 int anycast)
1994 {
1995 struct net *net = dev_net(idev->dev);
1996 struct rt6_info *rt = ip6_dst_alloc(&net->ipv6.ip6_dst_ops);
1997 struct neighbour *neigh;
1998
1999 if (rt == NULL) {
2000 if (net_ratelimit())
2001 pr_warning("IPv6: Maximum number of routes reached,"
2002 " consider increasing route/max_size.\n");
2003 return ERR_PTR(-ENOMEM);
2004 }
2005
2006 dev_hold(net->loopback_dev);
2007 in6_dev_hold(idev);
2008
2009 rt->dst.flags = DST_HOST;
2010 rt->dst.input = ip6_input;
2011 rt->dst.output = ip6_output;
2012 rt->rt6i_dev = net->loopback_dev;
2013 rt->rt6i_idev = idev;
2014 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1);
2015 rt->dst.obsolete = -1;
2016
2017 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2018 if (anycast)
2019 rt->rt6i_flags |= RTF_ANYCAST;
2020 else
2021 rt->rt6i_flags |= RTF_LOCAL;
2022 neigh = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
2023 if (IS_ERR(neigh)) {
2024 dst_free(&rt->dst);
2025
2026 /* We are casting this because that is the return
2027 * value type. But an errno encoded pointer is the
2028 * same regardless of the underlying pointer type,
2029 * and that's what we are returning. So this is OK.
2030 */
2031 return (struct rt6_info *) neigh;
2032 }
2033 rt->rt6i_nexthop = neigh;
2034
2035 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
2036 rt->rt6i_dst.plen = 128;
2037 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2038
2039 atomic_set(&rt->dst.__refcnt, 1);
2040
2041 return rt;
2042 }
2043
2044 struct arg_dev_net {
2045 struct net_device *dev;
2046 struct net *net;
2047 };
2048
2049 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2050 {
2051 const struct arg_dev_net *adn = arg;
2052 const struct net_device *dev = adn->dev;
2053
2054 if ((rt->rt6i_dev == dev || dev == NULL) &&
2055 rt != adn->net->ipv6.ip6_null_entry) {
2056 RT6_TRACE("deleted by ifdown %p\n", rt);
2057 return -1;
2058 }
2059 return 0;
2060 }
2061
2062 void rt6_ifdown(struct net *net, struct net_device *dev)
2063 {
2064 struct arg_dev_net adn = {
2065 .dev = dev,
2066 .net = net,
2067 };
2068
2069 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2070 icmp6_clean_all(fib6_ifdown, &adn);
2071 }
2072
2073 struct rt6_mtu_change_arg
2074 {
2075 struct net_device *dev;
2076 unsigned mtu;
2077 };
2078
2079 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2080 {
2081 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2082 struct inet6_dev *idev;
2083
2084 /* In IPv6 pmtu discovery is not optional,
2085 so that RTAX_MTU lock cannot disable it.
2086 We still use this lock to block changes
2087 caused by addrconf/ndisc.
2088 */
2089
2090 idev = __in6_dev_get(arg->dev);
2091 if (idev == NULL)
2092 return 0;
2093
2094 /* For administrative MTU increase, there is no way to discover
2095 IPv6 PMTU increase, so PMTU increase should be updated here.
2096 Since RFC 1981 doesn't include administrative MTU increase
2097 update PMTU increase is a MUST. (i.e. jumbo frame)
2098 */
2099 /*
2100 If new MTU is less than route PMTU, this new MTU will be the
2101 lowest MTU in the path, update the route PMTU to reflect PMTU
2102 decreases; if new MTU is greater than route PMTU, and the
2103 old MTU is the lowest MTU in the path, update the route PMTU
2104 to reflect the increase. In this case if the other nodes' MTU
2105 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2106 PMTU discouvery.
2107 */
2108 if (rt->rt6i_dev == arg->dev &&
2109 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2110 (dst_mtu(&rt->dst) >= arg->mtu ||
2111 (dst_mtu(&rt->dst) < arg->mtu &&
2112 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2113 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2114 }
2115 return 0;
2116 }
2117
2118 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
2119 {
2120 struct rt6_mtu_change_arg arg = {
2121 .dev = dev,
2122 .mtu = mtu,
2123 };
2124
2125 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2126 }
2127
2128 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2129 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2130 [RTA_OIF] = { .type = NLA_U32 },
2131 [RTA_IIF] = { .type = NLA_U32 },
2132 [RTA_PRIORITY] = { .type = NLA_U32 },
2133 [RTA_METRICS] = { .type = NLA_NESTED },
2134 };
2135
2136 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2137 struct fib6_config *cfg)
2138 {
2139 struct rtmsg *rtm;
2140 struct nlattr *tb[RTA_MAX+1];
2141 int err;
2142
2143 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2144 if (err < 0)
2145 goto errout;
2146
2147 err = -EINVAL;
2148 rtm = nlmsg_data(nlh);
2149 memset(cfg, 0, sizeof(*cfg));
2150
2151 cfg->fc_table = rtm->rtm_table;
2152 cfg->fc_dst_len = rtm->rtm_dst_len;
2153 cfg->fc_src_len = rtm->rtm_src_len;
2154 cfg->fc_flags = RTF_UP;
2155 cfg->fc_protocol = rtm->rtm_protocol;
2156
2157 if (rtm->rtm_type == RTN_UNREACHABLE)
2158 cfg->fc_flags |= RTF_REJECT;
2159
2160 if (rtm->rtm_type == RTN_LOCAL)
2161 cfg->fc_flags |= RTF_LOCAL;
2162
2163 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2164 cfg->fc_nlinfo.nlh = nlh;
2165 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2166
2167 if (tb[RTA_GATEWAY]) {
2168 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2169 cfg->fc_flags |= RTF_GATEWAY;
2170 }
2171
2172 if (tb[RTA_DST]) {
2173 int plen = (rtm->rtm_dst_len + 7) >> 3;
2174
2175 if (nla_len(tb[RTA_DST]) < plen)
2176 goto errout;
2177
2178 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2179 }
2180
2181 if (tb[RTA_SRC]) {
2182 int plen = (rtm->rtm_src_len + 7) >> 3;
2183
2184 if (nla_len(tb[RTA_SRC]) < plen)
2185 goto errout;
2186
2187 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2188 }
2189
2190 if (tb[RTA_OIF])
2191 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2192
2193 if (tb[RTA_PRIORITY])
2194 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2195
2196 if (tb[RTA_METRICS]) {
2197 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2198 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2199 }
2200
2201 if (tb[RTA_TABLE])
2202 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2203
2204 err = 0;
2205 errout:
2206 return err;
2207 }
2208
2209 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2210 {
2211 struct fib6_config cfg;
2212 int err;
2213
2214 err = rtm_to_fib6_config(skb, nlh, &cfg);
2215 if (err < 0)
2216 return err;
2217
2218 return ip6_route_del(&cfg);
2219 }
2220
2221 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2222 {
2223 struct fib6_config cfg;
2224 int err;
2225
2226 err = rtm_to_fib6_config(skb, nlh, &cfg);
2227 if (err < 0)
2228 return err;
2229
2230 return ip6_route_add(&cfg);
2231 }
2232
2233 static inline size_t rt6_nlmsg_size(void)
2234 {
2235 return NLMSG_ALIGN(sizeof(struct rtmsg))
2236 + nla_total_size(16) /* RTA_SRC */
2237 + nla_total_size(16) /* RTA_DST */
2238 + nla_total_size(16) /* RTA_GATEWAY */
2239 + nla_total_size(16) /* RTA_PREFSRC */
2240 + nla_total_size(4) /* RTA_TABLE */
2241 + nla_total_size(4) /* RTA_IIF */
2242 + nla_total_size(4) /* RTA_OIF */
2243 + nla_total_size(4) /* RTA_PRIORITY */
2244 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2245 + nla_total_size(sizeof(struct rta_cacheinfo));
2246 }
2247
2248 static int rt6_fill_node(struct net *net,
2249 struct sk_buff *skb, struct rt6_info *rt,
2250 struct in6_addr *dst, struct in6_addr *src,
2251 int iif, int type, u32 pid, u32 seq,
2252 int prefix, int nowait, unsigned int flags)
2253 {
2254 struct rtmsg *rtm;
2255 struct nlmsghdr *nlh;
2256 long expires;
2257 u32 table;
2258
2259 if (prefix) { /* user wants prefix routes only */
2260 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2261 /* success since this is not a prefix route */
2262 return 1;
2263 }
2264 }
2265
2266 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2267 if (nlh == NULL)
2268 return -EMSGSIZE;
2269
2270 rtm = nlmsg_data(nlh);
2271 rtm->rtm_family = AF_INET6;
2272 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2273 rtm->rtm_src_len = rt->rt6i_src.plen;
2274 rtm->rtm_tos = 0;
2275 if (rt->rt6i_table)
2276 table = rt->rt6i_table->tb6_id;
2277 else
2278 table = RT6_TABLE_UNSPEC;
2279 rtm->rtm_table = table;
2280 NLA_PUT_U32(skb, RTA_TABLE, table);
2281 if (rt->rt6i_flags&RTF_REJECT)
2282 rtm->rtm_type = RTN_UNREACHABLE;
2283 else if (rt->rt6i_flags&RTF_LOCAL)
2284 rtm->rtm_type = RTN_LOCAL;
2285 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2286 rtm->rtm_type = RTN_LOCAL;
2287 else
2288 rtm->rtm_type = RTN_UNICAST;
2289 rtm->rtm_flags = 0;
2290 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2291 rtm->rtm_protocol = rt->rt6i_protocol;
2292 if (rt->rt6i_flags&RTF_DYNAMIC)
2293 rtm->rtm_protocol = RTPROT_REDIRECT;
2294 else if (rt->rt6i_flags & RTF_ADDRCONF)
2295 rtm->rtm_protocol = RTPROT_KERNEL;
2296 else if (rt->rt6i_flags&RTF_DEFAULT)
2297 rtm->rtm_protocol = RTPROT_RA;
2298
2299 if (rt->rt6i_flags&RTF_CACHE)
2300 rtm->rtm_flags |= RTM_F_CLONED;
2301
2302 if (dst) {
2303 NLA_PUT(skb, RTA_DST, 16, dst);
2304 rtm->rtm_dst_len = 128;
2305 } else if (rtm->rtm_dst_len)
2306 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2307 #ifdef CONFIG_IPV6_SUBTREES
2308 if (src) {
2309 NLA_PUT(skb, RTA_SRC, 16, src);
2310 rtm->rtm_src_len = 128;
2311 } else if (rtm->rtm_src_len)
2312 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2313 #endif
2314 if (iif) {
2315 #ifdef CONFIG_IPV6_MROUTE
2316 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2317 int err = ip6mr_get_route(net, skb, rtm, nowait);
2318 if (err <= 0) {
2319 if (!nowait) {
2320 if (err == 0)
2321 return 0;
2322 goto nla_put_failure;
2323 } else {
2324 if (err == -EMSGSIZE)
2325 goto nla_put_failure;
2326 }
2327 }
2328 } else
2329 #endif
2330 NLA_PUT_U32(skb, RTA_IIF, iif);
2331 } else if (dst) {
2332 struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
2333 struct in6_addr saddr_buf;
2334 if (ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2335 dst, 0, &saddr_buf) == 0)
2336 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2337 }
2338
2339 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2340 goto nla_put_failure;
2341
2342 if (rt->dst.neighbour)
2343 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->dst.neighbour->primary_key);
2344
2345 if (rt->dst.dev)
2346 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2347
2348 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2349
2350 if (!(rt->rt6i_flags & RTF_EXPIRES))
2351 expires = 0;
2352 else if (rt->rt6i_expires - jiffies < INT_MAX)
2353 expires = rt->rt6i_expires - jiffies;
2354 else
2355 expires = INT_MAX;
2356
2357 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, 0, 0,
2358 expires, rt->dst.error) < 0)
2359 goto nla_put_failure;
2360
2361 return nlmsg_end(skb, nlh);
2362
2363 nla_put_failure:
2364 nlmsg_cancel(skb, nlh);
2365 return -EMSGSIZE;
2366 }
2367
2368 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2369 {
2370 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2371 int prefix;
2372
2373 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2374 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2375 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2376 } else
2377 prefix = 0;
2378
2379 return rt6_fill_node(arg->net,
2380 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2381 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2382 prefix, 0, NLM_F_MULTI);
2383 }
2384
2385 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2386 {
2387 struct net *net = sock_net(in_skb->sk);
2388 struct nlattr *tb[RTA_MAX+1];
2389 struct rt6_info *rt;
2390 struct sk_buff *skb;
2391 struct rtmsg *rtm;
2392 struct flowi fl;
2393 int err, iif = 0;
2394
2395 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2396 if (err < 0)
2397 goto errout;
2398
2399 err = -EINVAL;
2400 memset(&fl, 0, sizeof(fl));
2401
2402 if (tb[RTA_SRC]) {
2403 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2404 goto errout;
2405
2406 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2407 }
2408
2409 if (tb[RTA_DST]) {
2410 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2411 goto errout;
2412
2413 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2414 }
2415
2416 if (tb[RTA_IIF])
2417 iif = nla_get_u32(tb[RTA_IIF]);
2418
2419 if (tb[RTA_OIF])
2420 fl.oif = nla_get_u32(tb[RTA_OIF]);
2421
2422 if (iif) {
2423 struct net_device *dev;
2424 dev = __dev_get_by_index(net, iif);
2425 if (!dev) {
2426 err = -ENODEV;
2427 goto errout;
2428 }
2429 }
2430
2431 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2432 if (skb == NULL) {
2433 err = -ENOBUFS;
2434 goto errout;
2435 }
2436
2437 /* Reserve room for dummy headers, this skb can pass
2438 through good chunk of routing engine.
2439 */
2440 skb_reset_mac_header(skb);
2441 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2442
2443 rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
2444 skb_dst_set(skb, &rt->dst);
2445
2446 err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2447 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2448 nlh->nlmsg_seq, 0, 0, 0);
2449 if (err < 0) {
2450 kfree_skb(skb);
2451 goto errout;
2452 }
2453
2454 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2455 errout:
2456 return err;
2457 }
2458
2459 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2460 {
2461 struct sk_buff *skb;
2462 struct net *net = info->nl_net;
2463 u32 seq;
2464 int err;
2465
2466 err = -ENOBUFS;
2467 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2468
2469 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2470 if (skb == NULL)
2471 goto errout;
2472
2473 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2474 event, info->pid, seq, 0, 0, 0);
2475 if (err < 0) {
2476 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2477 WARN_ON(err == -EMSGSIZE);
2478 kfree_skb(skb);
2479 goto errout;
2480 }
2481 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2482 info->nlh, gfp_any());
2483 return;
2484 errout:
2485 if (err < 0)
2486 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2487 }
2488
2489 static int ip6_route_dev_notify(struct notifier_block *this,
2490 unsigned long event, void *data)
2491 {
2492 struct net_device *dev = (struct net_device *)data;
2493 struct net *net = dev_net(dev);
2494
2495 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2496 net->ipv6.ip6_null_entry->dst.dev = dev;
2497 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2498 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2499 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2500 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2501 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2502 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2503 #endif
2504 }
2505
2506 return NOTIFY_OK;
2507 }
2508
2509 /*
2510 * /proc
2511 */
2512
2513 #ifdef CONFIG_PROC_FS
2514
2515 struct rt6_proc_arg
2516 {
2517 char *buffer;
2518 int offset;
2519 int length;
2520 int skip;
2521 int len;
2522 };
2523
2524 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2525 {
2526 struct seq_file *m = p_arg;
2527
2528 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2529
2530 #ifdef CONFIG_IPV6_SUBTREES
2531 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2532 #else
2533 seq_puts(m, "00000000000000000000000000000000 00 ");
2534 #endif
2535
2536 if (rt->rt6i_nexthop) {
2537 seq_printf(m, "%pi6", rt->rt6i_nexthop->primary_key);
2538 } else {
2539 seq_puts(m, "00000000000000000000000000000000");
2540 }
2541 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2542 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2543 rt->dst.__use, rt->rt6i_flags,
2544 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2545 return 0;
2546 }
2547
2548 static int ipv6_route_show(struct seq_file *m, void *v)
2549 {
2550 struct net *net = (struct net *)m->private;
2551 fib6_clean_all(net, rt6_info_route, 0, m);
2552 return 0;
2553 }
2554
2555 static int ipv6_route_open(struct inode *inode, struct file *file)
2556 {
2557 return single_open_net(inode, file, ipv6_route_show);
2558 }
2559
2560 static const struct file_operations ipv6_route_proc_fops = {
2561 .owner = THIS_MODULE,
2562 .open = ipv6_route_open,
2563 .read = seq_read,
2564 .llseek = seq_lseek,
2565 .release = single_release_net,
2566 };
2567
2568 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2569 {
2570 struct net *net = (struct net *)seq->private;
2571 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2572 net->ipv6.rt6_stats->fib_nodes,
2573 net->ipv6.rt6_stats->fib_route_nodes,
2574 net->ipv6.rt6_stats->fib_rt_alloc,
2575 net->ipv6.rt6_stats->fib_rt_entries,
2576 net->ipv6.rt6_stats->fib_rt_cache,
2577 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2578 net->ipv6.rt6_stats->fib_discarded_routes);
2579
2580 return 0;
2581 }
2582
2583 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2584 {
2585 return single_open_net(inode, file, rt6_stats_seq_show);
2586 }
2587
2588 static const struct file_operations rt6_stats_seq_fops = {
2589 .owner = THIS_MODULE,
2590 .open = rt6_stats_seq_open,
2591 .read = seq_read,
2592 .llseek = seq_lseek,
2593 .release = single_release_net,
2594 };
2595 #endif /* CONFIG_PROC_FS */
2596
2597 #ifdef CONFIG_SYSCTL
2598
2599 static
2600 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2601 void __user *buffer, size_t *lenp, loff_t *ppos)
2602 {
2603 struct net *net = current->nsproxy->net_ns;
2604 int delay = net->ipv6.sysctl.flush_delay;
2605 if (write) {
2606 proc_dointvec(ctl, write, buffer, lenp, ppos);
2607 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2608 return 0;
2609 } else
2610 return -EINVAL;
2611 }
2612
2613 ctl_table ipv6_route_table_template[] = {
2614 {
2615 .procname = "flush",
2616 .data = &init_net.ipv6.sysctl.flush_delay,
2617 .maxlen = sizeof(int),
2618 .mode = 0200,
2619 .proc_handler = ipv6_sysctl_rtcache_flush
2620 },
2621 {
2622 .procname = "gc_thresh",
2623 .data = &ip6_dst_ops_template.gc_thresh,
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
2626 .proc_handler = proc_dointvec,
2627 },
2628 {
2629 .procname = "max_size",
2630 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2631 .maxlen = sizeof(int),
2632 .mode = 0644,
2633 .proc_handler = proc_dointvec,
2634 },
2635 {
2636 .procname = "gc_min_interval",
2637 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2638 .maxlen = sizeof(int),
2639 .mode = 0644,
2640 .proc_handler = proc_dointvec_jiffies,
2641 },
2642 {
2643 .procname = "gc_timeout",
2644 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2645 .maxlen = sizeof(int),
2646 .mode = 0644,
2647 .proc_handler = proc_dointvec_jiffies,
2648 },
2649 {
2650 .procname = "gc_interval",
2651 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2652 .maxlen = sizeof(int),
2653 .mode = 0644,
2654 .proc_handler = proc_dointvec_jiffies,
2655 },
2656 {
2657 .procname = "gc_elasticity",
2658 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2659 .maxlen = sizeof(int),
2660 .mode = 0644,
2661 .proc_handler = proc_dointvec,
2662 },
2663 {
2664 .procname = "mtu_expires",
2665 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2666 .maxlen = sizeof(int),
2667 .mode = 0644,
2668 .proc_handler = proc_dointvec_jiffies,
2669 },
2670 {
2671 .procname = "min_adv_mss",
2672 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2673 .maxlen = sizeof(int),
2674 .mode = 0644,
2675 .proc_handler = proc_dointvec,
2676 },
2677 {
2678 .procname = "gc_min_interval_ms",
2679 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2680 .maxlen = sizeof(int),
2681 .mode = 0644,
2682 .proc_handler = proc_dointvec_ms_jiffies,
2683 },
2684 { }
2685 };
2686
2687 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2688 {
2689 struct ctl_table *table;
2690
2691 table = kmemdup(ipv6_route_table_template,
2692 sizeof(ipv6_route_table_template),
2693 GFP_KERNEL);
2694
2695 if (table) {
2696 table[0].data = &net->ipv6.sysctl.flush_delay;
2697 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2698 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2699 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2700 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2701 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2702 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2703 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2704 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2705 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2706 }
2707
2708 return table;
2709 }
2710 #endif
2711
2712 static int __net_init ip6_route_net_init(struct net *net)
2713 {
2714 int ret = -ENOMEM;
2715
2716 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2717 sizeof(net->ipv6.ip6_dst_ops));
2718
2719 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2720 goto out_ip6_dst_ops;
2721
2722 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2723 sizeof(*net->ipv6.ip6_null_entry),
2724 GFP_KERNEL);
2725 if (!net->ipv6.ip6_null_entry)
2726 goto out_ip6_dst_entries;
2727 net->ipv6.ip6_null_entry->dst.path =
2728 (struct dst_entry *)net->ipv6.ip6_null_entry;
2729 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2730 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2731 ip6_template_metrics, true);
2732
2733 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2734 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2735 sizeof(*net->ipv6.ip6_prohibit_entry),
2736 GFP_KERNEL);
2737 if (!net->ipv6.ip6_prohibit_entry)
2738 goto out_ip6_null_entry;
2739 net->ipv6.ip6_prohibit_entry->dst.path =
2740 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2741 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2742 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2743 ip6_template_metrics, true);
2744
2745 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2746 sizeof(*net->ipv6.ip6_blk_hole_entry),
2747 GFP_KERNEL);
2748 if (!net->ipv6.ip6_blk_hole_entry)
2749 goto out_ip6_prohibit_entry;
2750 net->ipv6.ip6_blk_hole_entry->dst.path =
2751 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2752 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2753 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2754 ip6_template_metrics, true);
2755 #endif
2756
2757 net->ipv6.sysctl.flush_delay = 0;
2758 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2759 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2760 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2761 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2762 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2763 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2764 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2765
2766 #ifdef CONFIG_PROC_FS
2767 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2768 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2769 #endif
2770 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2771
2772 ret = 0;
2773 out:
2774 return ret;
2775
2776 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2777 out_ip6_prohibit_entry:
2778 kfree(net->ipv6.ip6_prohibit_entry);
2779 out_ip6_null_entry:
2780 kfree(net->ipv6.ip6_null_entry);
2781 #endif
2782 out_ip6_dst_entries:
2783 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2784 out_ip6_dst_ops:
2785 goto out;
2786 }
2787
2788 static void __net_exit ip6_route_net_exit(struct net *net)
2789 {
2790 #ifdef CONFIG_PROC_FS
2791 proc_net_remove(net, "ipv6_route");
2792 proc_net_remove(net, "rt6_stats");
2793 #endif
2794 kfree(net->ipv6.ip6_null_entry);
2795 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2796 kfree(net->ipv6.ip6_prohibit_entry);
2797 kfree(net->ipv6.ip6_blk_hole_entry);
2798 #endif
2799 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2800 }
2801
2802 static struct pernet_operations ip6_route_net_ops = {
2803 .init = ip6_route_net_init,
2804 .exit = ip6_route_net_exit,
2805 };
2806
2807 static struct notifier_block ip6_route_dev_notifier = {
2808 .notifier_call = ip6_route_dev_notify,
2809 .priority = 0,
2810 };
2811
2812 int __init ip6_route_init(void)
2813 {
2814 int ret;
2815
2816 ret = -ENOMEM;
2817 ip6_dst_ops_template.kmem_cachep =
2818 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2819 SLAB_HWCACHE_ALIGN, NULL);
2820 if (!ip6_dst_ops_template.kmem_cachep)
2821 goto out;
2822
2823 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2824 if (ret)
2825 goto out_kmem_cache;
2826
2827 ret = register_pernet_subsys(&ip6_route_net_ops);
2828 if (ret)
2829 goto out_dst_entries;
2830
2831 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2832
2833 /* Registering of the loopback is done before this portion of code,
2834 * the loopback reference in rt6_info will not be taken, do it
2835 * manually for init_net */
2836 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
2837 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2838 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2839 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
2840 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2841 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
2842 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
2843 #endif
2844 ret = fib6_init();
2845 if (ret)
2846 goto out_register_subsys;
2847
2848 ret = xfrm6_init();
2849 if (ret)
2850 goto out_fib6_init;
2851
2852 ret = fib6_rules_init();
2853 if (ret)
2854 goto xfrm6_init;
2855
2856 ret = -ENOBUFS;
2857 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2858 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2859 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2860 goto fib6_rules_init;
2861
2862 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
2863 if (ret)
2864 goto fib6_rules_init;
2865
2866 out:
2867 return ret;
2868
2869 fib6_rules_init:
2870 fib6_rules_cleanup();
2871 xfrm6_init:
2872 xfrm6_fini();
2873 out_fib6_init:
2874 fib6_gc_cleanup();
2875 out_register_subsys:
2876 unregister_pernet_subsys(&ip6_route_net_ops);
2877 out_dst_entries:
2878 dst_entries_destroy(&ip6_dst_blackhole_ops);
2879 out_kmem_cache:
2880 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2881 goto out;
2882 }
2883
2884 void ip6_route_cleanup(void)
2885 {
2886 unregister_netdevice_notifier(&ip6_route_dev_notifier);
2887 fib6_rules_cleanup();
2888 xfrm6_fini();
2889 fib6_gc_cleanup();
2890 unregister_pernet_subsys(&ip6_route_net_ops);
2891 dst_entries_destroy(&ip6_dst_blackhole_ops);
2892 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2893 }