]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - net/ipv6/route.c
[IPv6] route: Simplify ip6_del_rt()
[mirror_ubuntu-jammy-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
16 /* Changes:
17 *
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
25 */
26
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/if_arp.h>
39
40 #ifdef CONFIG_PROC_FS
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #endif
44
45 #include <net/snmp.h>
46 #include <net/ipv6.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
51 #include <net/tcp.h>
52 #include <linux/rtnetlink.h>
53 #include <net/dst.h>
54 #include <net/xfrm.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE 0x1
78 #define RT6_SELECT_F_REACHABLE 0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void ip6_dst_destroy(struct dst_entry *);
92 static void ip6_dst_ifdown(struct dst_entry *,
93 struct net_device *dev, int how);
94 static int ip6_dst_gc(void);
95
96 static int ip6_pkt_discard(struct sk_buff *skb);
97 static int ip6_pkt_discard_out(struct sk_buff *skb);
98 static void ip6_link_failure(struct sk_buff *skb);
99 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 #ifdef CONFIG_IPV6_ROUTE_INFO
102 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
103 struct in6_addr *gwaddr, int ifindex,
104 unsigned pref);
105 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
106 struct in6_addr *gwaddr, int ifindex);
107 #endif
108
109 static struct dst_ops ip6_dst_ops = {
110 .family = AF_INET6,
111 .protocol = __constant_htons(ETH_P_IPV6),
112 .gc = ip6_dst_gc,
113 .gc_thresh = 1024,
114 .check = ip6_dst_check,
115 .destroy = ip6_dst_destroy,
116 .ifdown = ip6_dst_ifdown,
117 .negative_advice = ip6_negative_advice,
118 .link_failure = ip6_link_failure,
119 .update_pmtu = ip6_rt_update_pmtu,
120 .entry_size = sizeof(struct rt6_info),
121 };
122
123 struct rt6_info ip6_null_entry = {
124 .u = {
125 .dst = {
126 .__refcnt = ATOMIC_INIT(1),
127 .__use = 1,
128 .dev = &loopback_dev,
129 .obsolete = -1,
130 .error = -ENETUNREACH,
131 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
132 .input = ip6_pkt_discard,
133 .output = ip6_pkt_discard_out,
134 .ops = &ip6_dst_ops,
135 .path = (struct dst_entry*)&ip6_null_entry,
136 }
137 },
138 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
139 .rt6i_metric = ~(u32) 0,
140 .rt6i_ref = ATOMIC_INIT(1),
141 };
142
143 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
144
145 struct rt6_info ip6_prohibit_entry = {
146 .u = {
147 .dst = {
148 .__refcnt = ATOMIC_INIT(1),
149 .__use = 1,
150 .dev = &loopback_dev,
151 .obsolete = -1,
152 .error = -EACCES,
153 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
154 .input = ip6_pkt_discard,
155 .output = ip6_pkt_discard_out,
156 .ops = &ip6_dst_ops,
157 .path = (struct dst_entry*)&ip6_prohibit_entry,
158 }
159 },
160 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
161 .rt6i_metric = ~(u32) 0,
162 .rt6i_ref = ATOMIC_INIT(1),
163 };
164
165 struct rt6_info ip6_blk_hole_entry = {
166 .u = {
167 .dst = {
168 .__refcnt = ATOMIC_INIT(1),
169 .__use = 1,
170 .dev = &loopback_dev,
171 .obsolete = -1,
172 .error = -EINVAL,
173 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
174 .input = ip6_pkt_discard,
175 .output = ip6_pkt_discard_out,
176 .ops = &ip6_dst_ops,
177 .path = (struct dst_entry*)&ip6_blk_hole_entry,
178 }
179 },
180 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
181 .rt6i_metric = ~(u32) 0,
182 .rt6i_ref = ATOMIC_INIT(1),
183 };
184
185 #endif
186
187 /* allocate dst with ip6_dst_ops */
188 static __inline__ struct rt6_info *ip6_dst_alloc(void)
189 {
190 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
191 }
192
193 static void ip6_dst_destroy(struct dst_entry *dst)
194 {
195 struct rt6_info *rt = (struct rt6_info *)dst;
196 struct inet6_dev *idev = rt->rt6i_idev;
197
198 if (idev != NULL) {
199 rt->rt6i_idev = NULL;
200 in6_dev_put(idev);
201 }
202 }
203
204 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
205 int how)
206 {
207 struct rt6_info *rt = (struct rt6_info *)dst;
208 struct inet6_dev *idev = rt->rt6i_idev;
209
210 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
211 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
212 if (loopback_idev != NULL) {
213 rt->rt6i_idev = loopback_idev;
214 in6_dev_put(idev);
215 }
216 }
217 }
218
219 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
220 {
221 return (rt->rt6i_flags & RTF_EXPIRES &&
222 time_after(jiffies, rt->rt6i_expires));
223 }
224
225 static inline int rt6_need_strict(struct in6_addr *daddr)
226 {
227 return (ipv6_addr_type(daddr) &
228 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
229 }
230
231 /*
232 * Route lookup. Any table->tb6_lock is implied.
233 */
234
235 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
236 int oif,
237 int strict)
238 {
239 struct rt6_info *local = NULL;
240 struct rt6_info *sprt;
241
242 if (oif) {
243 for (sprt = rt; sprt; sprt = sprt->u.next) {
244 struct net_device *dev = sprt->rt6i_dev;
245 if (dev->ifindex == oif)
246 return sprt;
247 if (dev->flags & IFF_LOOPBACK) {
248 if (sprt->rt6i_idev == NULL ||
249 sprt->rt6i_idev->dev->ifindex != oif) {
250 if (strict && oif)
251 continue;
252 if (local && (!oif ||
253 local->rt6i_idev->dev->ifindex == oif))
254 continue;
255 }
256 local = sprt;
257 }
258 }
259
260 if (local)
261 return local;
262
263 if (strict)
264 return &ip6_null_entry;
265 }
266 return rt;
267 }
268
269 #ifdef CONFIG_IPV6_ROUTER_PREF
270 static void rt6_probe(struct rt6_info *rt)
271 {
272 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
273 /*
274 * Okay, this does not seem to be appropriate
275 * for now, however, we need to check if it
276 * is really so; aka Router Reachability Probing.
277 *
278 * Router Reachability Probe MUST be rate-limited
279 * to no more than one per minute.
280 */
281 if (!neigh || (neigh->nud_state & NUD_VALID))
282 return;
283 read_lock_bh(&neigh->lock);
284 if (!(neigh->nud_state & NUD_VALID) &&
285 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
286 struct in6_addr mcaddr;
287 struct in6_addr *target;
288
289 neigh->updated = jiffies;
290 read_unlock_bh(&neigh->lock);
291
292 target = (struct in6_addr *)&neigh->primary_key;
293 addrconf_addr_solict_mult(target, &mcaddr);
294 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
295 } else
296 read_unlock_bh(&neigh->lock);
297 }
298 #else
299 static inline void rt6_probe(struct rt6_info *rt)
300 {
301 return;
302 }
303 #endif
304
305 /*
306 * Default Router Selection (RFC 2461 6.3.6)
307 */
308 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
309 {
310 struct net_device *dev = rt->rt6i_dev;
311 if (!oif || dev->ifindex == oif)
312 return 2;
313 if ((dev->flags & IFF_LOOPBACK) &&
314 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
315 return 1;
316 return 0;
317 }
318
319 static int inline rt6_check_neigh(struct rt6_info *rt)
320 {
321 struct neighbour *neigh = rt->rt6i_nexthop;
322 int m = 0;
323 if (rt->rt6i_flags & RTF_NONEXTHOP ||
324 !(rt->rt6i_flags & RTF_GATEWAY))
325 m = 1;
326 else if (neigh) {
327 read_lock_bh(&neigh->lock);
328 if (neigh->nud_state & NUD_VALID)
329 m = 2;
330 read_unlock_bh(&neigh->lock);
331 }
332 return m;
333 }
334
335 static int rt6_score_route(struct rt6_info *rt, int oif,
336 int strict)
337 {
338 int m, n;
339
340 m = rt6_check_dev(rt, oif);
341 if (!m && (strict & RT6_SELECT_F_IFACE))
342 return -1;
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
345 #endif
346 n = rt6_check_neigh(rt);
347 if (n > 1)
348 m |= 16;
349 else if (!n && strict & RT6_SELECT_F_REACHABLE)
350 return -1;
351 return m;
352 }
353
354 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
355 int strict)
356 {
357 struct rt6_info *match = NULL, *last = NULL;
358 struct rt6_info *rt, *rt0 = *head;
359 u32 metric;
360 int mpri = -1;
361
362 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
363 __FUNCTION__, head, head ? *head : NULL, oif);
364
365 for (rt = rt0, metric = rt0->rt6i_metric;
366 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
367 rt = rt->u.next) {
368 int m;
369
370 if (rt6_check_expired(rt))
371 continue;
372
373 last = rt;
374
375 m = rt6_score_route(rt, oif, strict);
376 if (m < 0)
377 continue;
378
379 if (m > mpri) {
380 rt6_probe(match);
381 match = rt;
382 mpri = m;
383 } else {
384 rt6_probe(rt);
385 }
386 }
387
388 if (!match &&
389 (strict & RT6_SELECT_F_REACHABLE) &&
390 last && last != rt0) {
391 /* no entries matched; do round-robin */
392 static DEFINE_SPINLOCK(lock);
393 spin_lock(&lock);
394 *head = rt0->u.next;
395 rt0->u.next = last->u.next;
396 last->u.next = rt0;
397 spin_unlock(&lock);
398 }
399
400 RT6_TRACE("%s() => %p, score=%d\n",
401 __FUNCTION__, match, mpri);
402
403 return (match ? match : &ip6_null_entry);
404 }
405
406 #ifdef CONFIG_IPV6_ROUTE_INFO
407 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
408 struct in6_addr *gwaddr)
409 {
410 struct route_info *rinfo = (struct route_info *) opt;
411 struct in6_addr prefix_buf, *prefix;
412 unsigned int pref;
413 u32 lifetime;
414 struct rt6_info *rt;
415
416 if (len < sizeof(struct route_info)) {
417 return -EINVAL;
418 }
419
420 /* Sanity check for prefix_len and length */
421 if (rinfo->length > 3) {
422 return -EINVAL;
423 } else if (rinfo->prefix_len > 128) {
424 return -EINVAL;
425 } else if (rinfo->prefix_len > 64) {
426 if (rinfo->length < 2) {
427 return -EINVAL;
428 }
429 } else if (rinfo->prefix_len > 0) {
430 if (rinfo->length < 1) {
431 return -EINVAL;
432 }
433 }
434
435 pref = rinfo->route_pref;
436 if (pref == ICMPV6_ROUTER_PREF_INVALID)
437 pref = ICMPV6_ROUTER_PREF_MEDIUM;
438
439 lifetime = htonl(rinfo->lifetime);
440 if (lifetime == 0xffffffff) {
441 /* infinity */
442 } else if (lifetime > 0x7fffffff/HZ) {
443 /* Avoid arithmetic overflow */
444 lifetime = 0x7fffffff/HZ - 1;
445 }
446
447 if (rinfo->length == 3)
448 prefix = (struct in6_addr *)rinfo->prefix;
449 else {
450 /* this function is safe */
451 ipv6_addr_prefix(&prefix_buf,
452 (struct in6_addr *)rinfo->prefix,
453 rinfo->prefix_len);
454 prefix = &prefix_buf;
455 }
456
457 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
458
459 if (rt && !lifetime) {
460 ip6_del_rt(rt);
461 rt = NULL;
462 }
463
464 if (!rt && lifetime)
465 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
466 pref);
467 else if (rt)
468 rt->rt6i_flags = RTF_ROUTEINFO |
469 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
470
471 if (rt) {
472 if (lifetime == 0xffffffff) {
473 rt->rt6i_flags &= ~RTF_EXPIRES;
474 } else {
475 rt->rt6i_expires = jiffies + HZ * lifetime;
476 rt->rt6i_flags |= RTF_EXPIRES;
477 }
478 dst_release(&rt->u.dst);
479 }
480 return 0;
481 }
482 #endif
483
484 #define BACKTRACK() \
485 if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \
486 while ((fn = fn->parent) != NULL) { \
487 if (fn->fn_flags & RTN_TL_ROOT) { \
488 dst_hold(&rt->u.dst); \
489 goto out; \
490 } \
491 if (fn->fn_flags & RTN_RTINFO) \
492 goto restart; \
493 } \
494 }
495
496 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
497 struct flowi *fl, int flags)
498 {
499 struct fib6_node *fn;
500 struct rt6_info *rt;
501
502 read_lock_bh(&table->tb6_lock);
503 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
504 restart:
505 rt = fn->leaf;
506 rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT);
507 BACKTRACK();
508 dst_hold(&rt->u.dst);
509 out:
510 read_unlock_bh(&table->tb6_lock);
511
512 rt->u.dst.lastuse = jiffies;
513 rt->u.dst.__use++;
514
515 return rt;
516
517 }
518
519 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
520 int oif, int strict)
521 {
522 struct flowi fl = {
523 .oif = oif,
524 .nl_u = {
525 .ip6_u = {
526 .daddr = *daddr,
527 /* TODO: saddr */
528 },
529 },
530 };
531 struct dst_entry *dst;
532 int flags = strict ? RT6_F_STRICT : 0;
533
534 dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
535 if (dst->error == 0)
536 return (struct rt6_info *) dst;
537
538 dst_release(dst);
539
540 return NULL;
541 }
542
543 /* ip6_ins_rt is called with FREE table->tb6_lock.
544 It takes new route entry, the addition fails by any reason the
545 route is freed. In any case, if caller does not hold it, it may
546 be destroyed.
547 */
548
549 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
550 void *_rtattr, struct netlink_skb_parms *req)
551 {
552 int err;
553 struct fib6_table *table;
554
555 table = rt->rt6i_table;
556 write_lock_bh(&table->tb6_lock);
557 err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req);
558 write_unlock_bh(&table->tb6_lock);
559
560 return err;
561 }
562
563 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
564 struct in6_addr *saddr)
565 {
566 struct rt6_info *rt;
567
568 /*
569 * Clone the route.
570 */
571
572 rt = ip6_rt_copy(ort);
573
574 if (rt) {
575 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
576 if (rt->rt6i_dst.plen != 128 &&
577 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
578 rt->rt6i_flags |= RTF_ANYCAST;
579 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
580 }
581
582 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
583 rt->rt6i_dst.plen = 128;
584 rt->rt6i_flags |= RTF_CACHE;
585 rt->u.dst.flags |= DST_HOST;
586
587 #ifdef CONFIG_IPV6_SUBTREES
588 if (rt->rt6i_src.plen && saddr) {
589 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
590 rt->rt6i_src.plen = 128;
591 }
592 #endif
593
594 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
595
596 }
597
598 return rt;
599 }
600
601 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
602 {
603 struct rt6_info *rt = ip6_rt_copy(ort);
604 if (rt) {
605 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
606 rt->rt6i_dst.plen = 128;
607 rt->rt6i_flags |= RTF_CACHE;
608 if (rt->rt6i_flags & RTF_REJECT)
609 rt->u.dst.error = ort->u.dst.error;
610 rt->u.dst.flags |= DST_HOST;
611 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
612 }
613 return rt;
614 }
615
616 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
617 struct flowi *fl, int flags)
618 {
619 struct fib6_node *fn;
620 struct rt6_info *rt, *nrt;
621 int strict = 0;
622 int attempts = 3;
623 int err;
624 int reachable = RT6_SELECT_F_REACHABLE;
625
626 if (flags & RT6_F_STRICT)
627 strict = RT6_SELECT_F_IFACE;
628
629 relookup:
630 read_lock_bh(&table->tb6_lock);
631
632 restart_2:
633 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
634
635 restart:
636 rt = rt6_select(&fn->leaf, fl->iif, strict | reachable);
637 BACKTRACK();
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
640 goto out;
641
642 dst_hold(&rt->u.dst);
643 read_unlock_bh(&table->tb6_lock);
644
645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
647 else {
648 #if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
650 #else
651 goto out2;
652 #endif
653 }
654
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
657
658 dst_hold(&rt->u.dst);
659 if (nrt) {
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
661 if (!err)
662 goto out2;
663 }
664
665 if (--attempts <= 0)
666 goto out2;
667
668 /*
669 * Race condition! In the gap, when table->tb6_lock was
670 * released someone could insert this route. Relookup.
671 */
672 dst_release(&rt->u.dst);
673 goto relookup;
674
675 out:
676 if (reachable) {
677 reachable = 0;
678 goto restart_2;
679 }
680 dst_hold(&rt->u.dst);
681 read_unlock_bh(&table->tb6_lock);
682 out2:
683 rt->u.dst.lastuse = jiffies;
684 rt->u.dst.__use++;
685
686 return rt;
687 }
688
689 void ip6_route_input(struct sk_buff *skb)
690 {
691 struct ipv6hdr *iph = skb->nh.ipv6h;
692 struct flowi fl = {
693 .iif = skb->dev->ifindex,
694 .nl_u = {
695 .ip6_u = {
696 .daddr = iph->daddr,
697 .saddr = iph->saddr,
698 .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK,
699 },
700 },
701 .proto = iph->nexthdr,
702 };
703 int flags = 0;
704
705 if (rt6_need_strict(&iph->daddr))
706 flags |= RT6_F_STRICT;
707
708 skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
709 }
710
711 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
712 struct flowi *fl, int flags)
713 {
714 struct fib6_node *fn;
715 struct rt6_info *rt, *nrt;
716 int strict = 0;
717 int attempts = 3;
718 int err;
719 int reachable = RT6_SELECT_F_REACHABLE;
720
721 if (flags & RT6_F_STRICT)
722 strict = RT6_SELECT_F_IFACE;
723
724 relookup:
725 read_lock_bh(&table->tb6_lock);
726
727 restart_2:
728 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
729
730 restart:
731 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
732 BACKTRACK();
733 if (rt == &ip6_null_entry ||
734 rt->rt6i_flags & RTF_CACHE)
735 goto out;
736
737 dst_hold(&rt->u.dst);
738 read_unlock_bh(&table->tb6_lock);
739
740 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
741 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
742 else {
743 #if CLONE_OFFLINK_ROUTE
744 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
745 #else
746 goto out2;
747 #endif
748 }
749
750 dst_release(&rt->u.dst);
751 rt = nrt ? : &ip6_null_entry;
752
753 dst_hold(&rt->u.dst);
754 if (nrt) {
755 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
756 if (!err)
757 goto out2;
758 }
759
760 if (--attempts <= 0)
761 goto out2;
762
763 /*
764 * Race condition! In the gap, when table->tb6_lock was
765 * released someone could insert this route. Relookup.
766 */
767 dst_release(&rt->u.dst);
768 goto relookup;
769
770 out:
771 if (reachable) {
772 reachable = 0;
773 goto restart_2;
774 }
775 dst_hold(&rt->u.dst);
776 read_unlock_bh(&table->tb6_lock);
777 out2:
778 rt->u.dst.lastuse = jiffies;
779 rt->u.dst.__use++;
780 return rt;
781 }
782
783 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
784 {
785 int flags = 0;
786
787 if (rt6_need_strict(&fl->fl6_dst))
788 flags |= RT6_F_STRICT;
789
790 return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
791 }
792
793
794 /*
795 * Destination cache support functions
796 */
797
798 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
799 {
800 struct rt6_info *rt;
801
802 rt = (struct rt6_info *) dst;
803
804 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
805 return dst;
806
807 return NULL;
808 }
809
810 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
811 {
812 struct rt6_info *rt = (struct rt6_info *) dst;
813
814 if (rt) {
815 if (rt->rt6i_flags & RTF_CACHE)
816 ip6_del_rt(rt);
817 else
818 dst_release(dst);
819 }
820 return NULL;
821 }
822
823 static void ip6_link_failure(struct sk_buff *skb)
824 {
825 struct rt6_info *rt;
826
827 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
828
829 rt = (struct rt6_info *) skb->dst;
830 if (rt) {
831 if (rt->rt6i_flags&RTF_CACHE) {
832 dst_set_expires(&rt->u.dst, 0);
833 rt->rt6i_flags |= RTF_EXPIRES;
834 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
835 rt->rt6i_node->fn_sernum = -1;
836 }
837 }
838
839 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
840 {
841 struct rt6_info *rt6 = (struct rt6_info*)dst;
842
843 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
844 rt6->rt6i_flags |= RTF_MODIFIED;
845 if (mtu < IPV6_MIN_MTU) {
846 mtu = IPV6_MIN_MTU;
847 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
848 }
849 dst->metrics[RTAX_MTU-1] = mtu;
850 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
851 }
852 }
853
854 static int ipv6_get_mtu(struct net_device *dev);
855
856 static inline unsigned int ipv6_advmss(unsigned int mtu)
857 {
858 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
859
860 if (mtu < ip6_rt_min_advmss)
861 mtu = ip6_rt_min_advmss;
862
863 /*
864 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
865 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
866 * IPV6_MAXPLEN is also valid and means: "any MSS,
867 * rely only on pmtu discovery"
868 */
869 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
870 mtu = IPV6_MAXPLEN;
871 return mtu;
872 }
873
874 static struct dst_entry *ndisc_dst_gc_list;
875 static DEFINE_SPINLOCK(ndisc_lock);
876
877 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
878 struct neighbour *neigh,
879 struct in6_addr *addr,
880 int (*output)(struct sk_buff *))
881 {
882 struct rt6_info *rt;
883 struct inet6_dev *idev = in6_dev_get(dev);
884
885 if (unlikely(idev == NULL))
886 return NULL;
887
888 rt = ip6_dst_alloc();
889 if (unlikely(rt == NULL)) {
890 in6_dev_put(idev);
891 goto out;
892 }
893
894 dev_hold(dev);
895 if (neigh)
896 neigh_hold(neigh);
897 else
898 neigh = ndisc_get_neigh(dev, addr);
899
900 rt->rt6i_dev = dev;
901 rt->rt6i_idev = idev;
902 rt->rt6i_nexthop = neigh;
903 atomic_set(&rt->u.dst.__refcnt, 1);
904 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
905 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
906 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
907 rt->u.dst.output = output;
908
909 #if 0 /* there's no chance to use these for ndisc */
910 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
911 ? DST_HOST
912 : 0;
913 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
914 rt->rt6i_dst.plen = 128;
915 #endif
916
917 spin_lock_bh(&ndisc_lock);
918 rt->u.dst.next = ndisc_dst_gc_list;
919 ndisc_dst_gc_list = &rt->u.dst;
920 spin_unlock_bh(&ndisc_lock);
921
922 fib6_force_start_gc();
923
924 out:
925 return (struct dst_entry *)rt;
926 }
927
928 int ndisc_dst_gc(int *more)
929 {
930 struct dst_entry *dst, *next, **pprev;
931 int freed;
932
933 next = NULL;
934 freed = 0;
935
936 spin_lock_bh(&ndisc_lock);
937 pprev = &ndisc_dst_gc_list;
938
939 while ((dst = *pprev) != NULL) {
940 if (!atomic_read(&dst->__refcnt)) {
941 *pprev = dst->next;
942 dst_free(dst);
943 freed++;
944 } else {
945 pprev = &dst->next;
946 (*more)++;
947 }
948 }
949
950 spin_unlock_bh(&ndisc_lock);
951
952 return freed;
953 }
954
955 static int ip6_dst_gc(void)
956 {
957 static unsigned expire = 30*HZ;
958 static unsigned long last_gc;
959 unsigned long now = jiffies;
960
961 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
962 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
963 goto out;
964
965 expire++;
966 fib6_run_gc(expire);
967 last_gc = now;
968 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
969 expire = ip6_rt_gc_timeout>>1;
970
971 out:
972 expire -= expire>>ip6_rt_gc_elasticity;
973 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
974 }
975
976 /* Clean host part of a prefix. Not necessary in radix tree,
977 but results in cleaner routing tables.
978
979 Remove it only when all the things will work!
980 */
981
982 static int ipv6_get_mtu(struct net_device *dev)
983 {
984 int mtu = IPV6_MIN_MTU;
985 struct inet6_dev *idev;
986
987 idev = in6_dev_get(dev);
988 if (idev) {
989 mtu = idev->cnf.mtu6;
990 in6_dev_put(idev);
991 }
992 return mtu;
993 }
994
995 int ipv6_get_hoplimit(struct net_device *dev)
996 {
997 int hoplimit = ipv6_devconf.hop_limit;
998 struct inet6_dev *idev;
999
1000 idev = in6_dev_get(dev);
1001 if (idev) {
1002 hoplimit = idev->cnf.hop_limit;
1003 in6_dev_put(idev);
1004 }
1005 return hoplimit;
1006 }
1007
1008 /*
1009 *
1010 */
1011
1012 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1013 void *_rtattr, struct netlink_skb_parms *req,
1014 u32 table_id)
1015 {
1016 int err;
1017 struct rtmsg *r;
1018 struct rtattr **rta;
1019 struct rt6_info *rt = NULL;
1020 struct net_device *dev = NULL;
1021 struct inet6_dev *idev = NULL;
1022 struct fib6_table *table;
1023 int addr_type;
1024
1025 rta = (struct rtattr **) _rtattr;
1026
1027 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
1028 return -EINVAL;
1029 #ifndef CONFIG_IPV6_SUBTREES
1030 if (rtmsg->rtmsg_src_len)
1031 return -EINVAL;
1032 #endif
1033 if (rtmsg->rtmsg_ifindex) {
1034 err = -ENODEV;
1035 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
1036 if (!dev)
1037 goto out;
1038 idev = in6_dev_get(dev);
1039 if (!idev)
1040 goto out;
1041 }
1042
1043 if (rtmsg->rtmsg_metric == 0)
1044 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
1045
1046 table = fib6_new_table(table_id);
1047 if (table == NULL) {
1048 err = -ENOBUFS;
1049 goto out;
1050 }
1051
1052 rt = ip6_dst_alloc();
1053
1054 if (rt == NULL) {
1055 err = -ENOMEM;
1056 goto out;
1057 }
1058
1059 rt->u.dst.obsolete = -1;
1060 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
1061 if (nlh && (r = NLMSG_DATA(nlh))) {
1062 rt->rt6i_protocol = r->rtm_protocol;
1063 } else {
1064 rt->rt6i_protocol = RTPROT_BOOT;
1065 }
1066
1067 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
1068
1069 if (addr_type & IPV6_ADDR_MULTICAST)
1070 rt->u.dst.input = ip6_mc_input;
1071 else
1072 rt->u.dst.input = ip6_forward;
1073
1074 rt->u.dst.output = ip6_output;
1075
1076 ipv6_addr_prefix(&rt->rt6i_dst.addr,
1077 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
1078 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
1079 if (rt->rt6i_dst.plen == 128)
1080 rt->u.dst.flags = DST_HOST;
1081
1082 #ifdef CONFIG_IPV6_SUBTREES
1083 ipv6_addr_prefix(&rt->rt6i_src.addr,
1084 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1085 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
1086 #endif
1087
1088 rt->rt6i_metric = rtmsg->rtmsg_metric;
1089
1090 /* We cannot add true routes via loopback here,
1091 they would result in kernel looping; promote them to reject routes
1092 */
1093 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
1094 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1095 /* hold loopback dev/idev if we haven't done so. */
1096 if (dev != &loopback_dev) {
1097 if (dev) {
1098 dev_put(dev);
1099 in6_dev_put(idev);
1100 }
1101 dev = &loopback_dev;
1102 dev_hold(dev);
1103 idev = in6_dev_get(dev);
1104 if (!idev) {
1105 err = -ENODEV;
1106 goto out;
1107 }
1108 }
1109 rt->u.dst.output = ip6_pkt_discard_out;
1110 rt->u.dst.input = ip6_pkt_discard;
1111 rt->u.dst.error = -ENETUNREACH;
1112 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1113 goto install_route;
1114 }
1115
1116 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
1117 struct in6_addr *gw_addr;
1118 int gwa_type;
1119
1120 gw_addr = &rtmsg->rtmsg_gateway;
1121 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1122 gwa_type = ipv6_addr_type(gw_addr);
1123
1124 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1125 struct rt6_info *grt;
1126
1127 /* IPv6 strictly inhibits using not link-local
1128 addresses as nexthop address.
1129 Otherwise, router will not able to send redirects.
1130 It is very good, but in some (rare!) circumstances
1131 (SIT, PtP, NBMA NOARP links) it is handy to allow
1132 some exceptions. --ANK
1133 */
1134 err = -EINVAL;
1135 if (!(gwa_type&IPV6_ADDR_UNICAST))
1136 goto out;
1137
1138 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1139
1140 err = -EHOSTUNREACH;
1141 if (grt == NULL)
1142 goto out;
1143 if (dev) {
1144 if (dev != grt->rt6i_dev) {
1145 dst_release(&grt->u.dst);
1146 goto out;
1147 }
1148 } else {
1149 dev = grt->rt6i_dev;
1150 idev = grt->rt6i_idev;
1151 dev_hold(dev);
1152 in6_dev_hold(grt->rt6i_idev);
1153 }
1154 if (!(grt->rt6i_flags&RTF_GATEWAY))
1155 err = 0;
1156 dst_release(&grt->u.dst);
1157
1158 if (err)
1159 goto out;
1160 }
1161 err = -EINVAL;
1162 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1163 goto out;
1164 }
1165
1166 err = -ENODEV;
1167 if (dev == NULL)
1168 goto out;
1169
1170 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1171 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1172 if (IS_ERR(rt->rt6i_nexthop)) {
1173 err = PTR_ERR(rt->rt6i_nexthop);
1174 rt->rt6i_nexthop = NULL;
1175 goto out;
1176 }
1177 }
1178
1179 rt->rt6i_flags = rtmsg->rtmsg_flags;
1180
1181 install_route:
1182 if (rta && rta[RTA_METRICS-1]) {
1183 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1184 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1185
1186 while (RTA_OK(attr, attrlen)) {
1187 unsigned flavor = attr->rta_type;
1188 if (flavor) {
1189 if (flavor > RTAX_MAX) {
1190 err = -EINVAL;
1191 goto out;
1192 }
1193 rt->u.dst.metrics[flavor-1] =
1194 *(u32 *)RTA_DATA(attr);
1195 }
1196 attr = RTA_NEXT(attr, attrlen);
1197 }
1198 }
1199
1200 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1201 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1202 if (!rt->u.dst.metrics[RTAX_MTU-1])
1203 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1204 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1205 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1206 rt->u.dst.dev = dev;
1207 rt->rt6i_idev = idev;
1208 rt->rt6i_table = table;
1209 return ip6_ins_rt(rt, nlh, _rtattr, req);
1210
1211 out:
1212 if (dev)
1213 dev_put(dev);
1214 if (idev)
1215 in6_dev_put(idev);
1216 if (rt)
1217 dst_free((struct dst_entry *) rt);
1218 return err;
1219 }
1220
1221 static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
1222 void *_rtattr, struct netlink_skb_parms *req)
1223 {
1224 int err;
1225 struct fib6_table *table;
1226
1227 if (rt == &ip6_null_entry)
1228 return -ENOENT;
1229
1230 table = rt->rt6i_table;
1231 write_lock_bh(&table->tb6_lock);
1232
1233 err = fib6_del(rt, nlh, _rtattr, req);
1234 dst_release(&rt->u.dst);
1235
1236 write_unlock_bh(&table->tb6_lock);
1237
1238 return err;
1239 }
1240
1241 int ip6_del_rt(struct rt6_info *rt)
1242 {
1243 return __ip6_del_rt(rt, NULL, NULL, NULL);
1244 }
1245
1246 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
1247 void *_rtattr, struct netlink_skb_parms *req,
1248 u32 table_id)
1249 {
1250 struct fib6_table *table;
1251 struct fib6_node *fn;
1252 struct rt6_info *rt;
1253 int err = -ESRCH;
1254
1255 table = fib6_get_table(table_id);
1256 if (table == NULL)
1257 return err;
1258
1259 read_lock_bh(&table->tb6_lock);
1260
1261 fn = fib6_locate(&table->tb6_root,
1262 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1263 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1264
1265 if (fn) {
1266 for (rt = fn->leaf; rt; rt = rt->u.next) {
1267 if (rtmsg->rtmsg_ifindex &&
1268 (rt->rt6i_dev == NULL ||
1269 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1270 continue;
1271 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1272 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1273 continue;
1274 if (rtmsg->rtmsg_metric &&
1275 rtmsg->rtmsg_metric != rt->rt6i_metric)
1276 continue;
1277 dst_hold(&rt->u.dst);
1278 read_unlock_bh(&table->tb6_lock);
1279
1280 return __ip6_del_rt(rt, nlh, _rtattr, req);
1281 }
1282 }
1283 read_unlock_bh(&table->tb6_lock);
1284
1285 return err;
1286 }
1287
1288 /*
1289 * Handle redirects
1290 */
1291 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1292 struct neighbour *neigh, u8 *lladdr, int on_link)
1293 {
1294 struct rt6_info *rt, *nrt = NULL;
1295 struct fib6_node *fn;
1296 struct fib6_table *table;
1297 struct netevent_redirect netevent;
1298
1299 /* TODO: Very lazy, might need to check all tables */
1300 table = fib6_get_table(RT6_TABLE_MAIN);
1301 if (table == NULL)
1302 return;
1303
1304 /*
1305 * Get the "current" route for this destination and
1306 * check if the redirect has come from approriate router.
1307 *
1308 * RFC 2461 specifies that redirects should only be
1309 * accepted if they come from the nexthop to the target.
1310 * Due to the way the routes are chosen, this notion
1311 * is a bit fuzzy and one might need to check all possible
1312 * routes.
1313 */
1314
1315 read_lock_bh(&table->tb6_lock);
1316 fn = fib6_lookup(&table->tb6_root, dest, NULL);
1317 restart:
1318 for (rt = fn->leaf; rt; rt = rt->u.next) {
1319 /*
1320 * Current route is on-link; redirect is always invalid.
1321 *
1322 * Seems, previous statement is not true. It could
1323 * be node, which looks for us as on-link (f.e. proxy ndisc)
1324 * But then router serving it might decide, that we should
1325 * know truth 8)8) --ANK (980726).
1326 */
1327 if (rt6_check_expired(rt))
1328 continue;
1329 if (!(rt->rt6i_flags & RTF_GATEWAY))
1330 continue;
1331 if (neigh->dev != rt->rt6i_dev)
1332 continue;
1333 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1334 continue;
1335 break;
1336 }
1337 if (rt)
1338 dst_hold(&rt->u.dst);
1339 else if (rt6_need_strict(dest)) {
1340 while ((fn = fn->parent) != NULL) {
1341 if (fn->fn_flags & RTN_ROOT)
1342 break;
1343 if (fn->fn_flags & RTN_RTINFO)
1344 goto restart;
1345 }
1346 }
1347 read_unlock_bh(&table->tb6_lock);
1348
1349 if (!rt) {
1350 if (net_ratelimit())
1351 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1352 "for redirect target\n");
1353 return;
1354 }
1355
1356 /*
1357 * We have finally decided to accept it.
1358 */
1359
1360 neigh_update(neigh, lladdr, NUD_STALE,
1361 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1362 NEIGH_UPDATE_F_OVERRIDE|
1363 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1364 NEIGH_UPDATE_F_ISROUTER))
1365 );
1366
1367 /*
1368 * Redirect received -> path was valid.
1369 * Look, redirects are sent only in response to data packets,
1370 * so that this nexthop apparently is reachable. --ANK
1371 */
1372 dst_confirm(&rt->u.dst);
1373
1374 /* Duplicate redirect: silently ignore. */
1375 if (neigh == rt->u.dst.neighbour)
1376 goto out;
1377
1378 nrt = ip6_rt_copy(rt);
1379 if (nrt == NULL)
1380 goto out;
1381
1382 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1383 if (on_link)
1384 nrt->rt6i_flags &= ~RTF_GATEWAY;
1385
1386 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1387 nrt->rt6i_dst.plen = 128;
1388 nrt->u.dst.flags |= DST_HOST;
1389
1390 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1391 nrt->rt6i_nexthop = neigh_clone(neigh);
1392 /* Reset pmtu, it may be better */
1393 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1394 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1395
1396 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1397 goto out;
1398
1399 netevent.old = &rt->u.dst;
1400 netevent.new = &nrt->u.dst;
1401 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1402
1403 if (rt->rt6i_flags&RTF_CACHE) {
1404 ip6_del_rt(rt);
1405 return;
1406 }
1407
1408 out:
1409 dst_release(&rt->u.dst);
1410 return;
1411 }
1412
1413 /*
1414 * Handle ICMP "packet too big" messages
1415 * i.e. Path MTU discovery
1416 */
1417
1418 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1419 struct net_device *dev, u32 pmtu)
1420 {
1421 struct rt6_info *rt, *nrt;
1422 int allfrag = 0;
1423
1424 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1425 if (rt == NULL)
1426 return;
1427
1428 if (pmtu >= dst_mtu(&rt->u.dst))
1429 goto out;
1430
1431 if (pmtu < IPV6_MIN_MTU) {
1432 /*
1433 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1434 * MTU (1280) and a fragment header should always be included
1435 * after a node receiving Too Big message reporting PMTU is
1436 * less than the IPv6 Minimum Link MTU.
1437 */
1438 pmtu = IPV6_MIN_MTU;
1439 allfrag = 1;
1440 }
1441
1442 /* New mtu received -> path was valid.
1443 They are sent only in response to data packets,
1444 so that this nexthop apparently is reachable. --ANK
1445 */
1446 dst_confirm(&rt->u.dst);
1447
1448 /* Host route. If it is static, it would be better
1449 not to override it, but add new one, so that
1450 when cache entry will expire old pmtu
1451 would return automatically.
1452 */
1453 if (rt->rt6i_flags & RTF_CACHE) {
1454 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1455 if (allfrag)
1456 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1457 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1458 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1459 goto out;
1460 }
1461
1462 /* Network route.
1463 Two cases are possible:
1464 1. It is connected route. Action: COW
1465 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1466 */
1467 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1468 nrt = rt6_alloc_cow(rt, daddr, saddr);
1469 else
1470 nrt = rt6_alloc_clone(rt, daddr);
1471
1472 if (nrt) {
1473 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1474 if (allfrag)
1475 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1476
1477 /* According to RFC 1981, detecting PMTU increase shouldn't be
1478 * happened within 5 mins, the recommended timer is 10 mins.
1479 * Here this route expiration time is set to ip6_rt_mtu_expires
1480 * which is 10 mins. After 10 mins the decreased pmtu is expired
1481 * and detecting PMTU increase will be automatically happened.
1482 */
1483 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1484 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1485
1486 ip6_ins_rt(nrt, NULL, NULL, NULL);
1487 }
1488 out:
1489 dst_release(&rt->u.dst);
1490 }
1491
1492 /*
1493 * Misc support functions
1494 */
1495
1496 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1497 {
1498 struct rt6_info *rt = ip6_dst_alloc();
1499
1500 if (rt) {
1501 rt->u.dst.input = ort->u.dst.input;
1502 rt->u.dst.output = ort->u.dst.output;
1503
1504 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1505 rt->u.dst.dev = ort->u.dst.dev;
1506 if (rt->u.dst.dev)
1507 dev_hold(rt->u.dst.dev);
1508 rt->rt6i_idev = ort->rt6i_idev;
1509 if (rt->rt6i_idev)
1510 in6_dev_hold(rt->rt6i_idev);
1511 rt->u.dst.lastuse = jiffies;
1512 rt->rt6i_expires = 0;
1513
1514 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1515 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1516 rt->rt6i_metric = 0;
1517
1518 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1519 #ifdef CONFIG_IPV6_SUBTREES
1520 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1521 #endif
1522 rt->rt6i_table = ort->rt6i_table;
1523 }
1524 return rt;
1525 }
1526
1527 #ifdef CONFIG_IPV6_ROUTE_INFO
1528 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1529 struct in6_addr *gwaddr, int ifindex)
1530 {
1531 struct fib6_node *fn;
1532 struct rt6_info *rt = NULL;
1533 struct fib6_table *table;
1534
1535 table = fib6_get_table(RT6_TABLE_INFO);
1536 if (table == NULL)
1537 return NULL;
1538
1539 write_lock_bh(&table->tb6_lock);
1540 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1541 if (!fn)
1542 goto out;
1543
1544 for (rt = fn->leaf; rt; rt = rt->u.next) {
1545 if (rt->rt6i_dev->ifindex != ifindex)
1546 continue;
1547 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1548 continue;
1549 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1550 continue;
1551 dst_hold(&rt->u.dst);
1552 break;
1553 }
1554 out:
1555 write_unlock_bh(&table->tb6_lock);
1556 return rt;
1557 }
1558
1559 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1560 struct in6_addr *gwaddr, int ifindex,
1561 unsigned pref)
1562 {
1563 struct in6_rtmsg rtmsg;
1564
1565 memset(&rtmsg, 0, sizeof(rtmsg));
1566 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1567 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1568 rtmsg.rtmsg_dst_len = prefixlen;
1569 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1570 rtmsg.rtmsg_metric = 1024;
1571 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1572 /* We should treat it as a default route if prefix length is 0. */
1573 if (!prefixlen)
1574 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1575 rtmsg.rtmsg_ifindex = ifindex;
1576
1577 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO);
1578
1579 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1580 }
1581 #endif
1582
1583 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1584 {
1585 struct rt6_info *rt;
1586 struct fib6_table *table;
1587
1588 table = fib6_get_table(RT6_TABLE_DFLT);
1589 if (table == NULL)
1590 return NULL;
1591
1592 write_lock_bh(&table->tb6_lock);
1593 for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) {
1594 if (dev == rt->rt6i_dev &&
1595 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1596 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1597 break;
1598 }
1599 if (rt)
1600 dst_hold(&rt->u.dst);
1601 write_unlock_bh(&table->tb6_lock);
1602 return rt;
1603 }
1604
1605 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1606 struct net_device *dev,
1607 unsigned int pref)
1608 {
1609 struct in6_rtmsg rtmsg;
1610
1611 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1612 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1613 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1614 rtmsg.rtmsg_metric = 1024;
1615 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1616 RTF_PREF(pref);
1617
1618 rtmsg.rtmsg_ifindex = dev->ifindex;
1619
1620 ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT);
1621 return rt6_get_dflt_router(gwaddr, dev);
1622 }
1623
1624 void rt6_purge_dflt_routers(void)
1625 {
1626 struct rt6_info *rt;
1627 struct fib6_table *table;
1628
1629 /* NOTE: Keep consistent with rt6_get_dflt_router */
1630 table = fib6_get_table(RT6_TABLE_DFLT);
1631 if (table == NULL)
1632 return;
1633
1634 restart:
1635 read_lock_bh(&table->tb6_lock);
1636 for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) {
1637 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1638 dst_hold(&rt->u.dst);
1639 read_unlock_bh(&table->tb6_lock);
1640 ip6_del_rt(rt);
1641 goto restart;
1642 }
1643 }
1644 read_unlock_bh(&table->tb6_lock);
1645 }
1646
1647 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1648 {
1649 struct in6_rtmsg rtmsg;
1650 int err;
1651
1652 switch(cmd) {
1653 case SIOCADDRT: /* Add a route */
1654 case SIOCDELRT: /* Delete a route */
1655 if (!capable(CAP_NET_ADMIN))
1656 return -EPERM;
1657 err = copy_from_user(&rtmsg, arg,
1658 sizeof(struct in6_rtmsg));
1659 if (err)
1660 return -EFAULT;
1661
1662 rtnl_lock();
1663 switch (cmd) {
1664 case SIOCADDRT:
1665 err = ip6_route_add(&rtmsg, NULL, NULL, NULL,
1666 RT6_TABLE_MAIN);
1667 break;
1668 case SIOCDELRT:
1669 err = ip6_route_del(&rtmsg, NULL, NULL, NULL,
1670 RT6_TABLE_MAIN);
1671 break;
1672 default:
1673 err = -EINVAL;
1674 }
1675 rtnl_unlock();
1676
1677 return err;
1678 };
1679
1680 return -EINVAL;
1681 }
1682
1683 /*
1684 * Drop the packet on the floor
1685 */
1686
1687 static int ip6_pkt_discard(struct sk_buff *skb)
1688 {
1689 int type = ipv6_addr_type(&skb->nh.ipv6h->daddr);
1690 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED)
1691 IP6_INC_STATS(IPSTATS_MIB_INADDRERRORS);
1692
1693 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1694 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1695 kfree_skb(skb);
1696 return 0;
1697 }
1698
1699 static int ip6_pkt_discard_out(struct sk_buff *skb)
1700 {
1701 skb->dev = skb->dst->dev;
1702 return ip6_pkt_discard(skb);
1703 }
1704
1705 /*
1706 * Allocate a dst for local (unicast / anycast) address.
1707 */
1708
1709 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1710 const struct in6_addr *addr,
1711 int anycast)
1712 {
1713 struct rt6_info *rt = ip6_dst_alloc();
1714
1715 if (rt == NULL)
1716 return ERR_PTR(-ENOMEM);
1717
1718 dev_hold(&loopback_dev);
1719 in6_dev_hold(idev);
1720
1721 rt->u.dst.flags = DST_HOST;
1722 rt->u.dst.input = ip6_input;
1723 rt->u.dst.output = ip6_output;
1724 rt->rt6i_dev = &loopback_dev;
1725 rt->rt6i_idev = idev;
1726 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1727 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1728 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1729 rt->u.dst.obsolete = -1;
1730
1731 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1732 if (anycast)
1733 rt->rt6i_flags |= RTF_ANYCAST;
1734 else
1735 rt->rt6i_flags |= RTF_LOCAL;
1736 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1737 if (rt->rt6i_nexthop == NULL) {
1738 dst_free((struct dst_entry *) rt);
1739 return ERR_PTR(-ENOMEM);
1740 }
1741
1742 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1743 rt->rt6i_dst.plen = 128;
1744 rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
1745
1746 atomic_set(&rt->u.dst.__refcnt, 1);
1747
1748 return rt;
1749 }
1750
1751 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1752 {
1753 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1754 rt != &ip6_null_entry) {
1755 RT6_TRACE("deleted by ifdown %p\n", rt);
1756 return -1;
1757 }
1758 return 0;
1759 }
1760
1761 void rt6_ifdown(struct net_device *dev)
1762 {
1763 fib6_clean_all(fib6_ifdown, 0, dev);
1764 }
1765
1766 struct rt6_mtu_change_arg
1767 {
1768 struct net_device *dev;
1769 unsigned mtu;
1770 };
1771
1772 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1773 {
1774 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1775 struct inet6_dev *idev;
1776
1777 /* In IPv6 pmtu discovery is not optional,
1778 so that RTAX_MTU lock cannot disable it.
1779 We still use this lock to block changes
1780 caused by addrconf/ndisc.
1781 */
1782
1783 idev = __in6_dev_get(arg->dev);
1784 if (idev == NULL)
1785 return 0;
1786
1787 /* For administrative MTU increase, there is no way to discover
1788 IPv6 PMTU increase, so PMTU increase should be updated here.
1789 Since RFC 1981 doesn't include administrative MTU increase
1790 update PMTU increase is a MUST. (i.e. jumbo frame)
1791 */
1792 /*
1793 If new MTU is less than route PMTU, this new MTU will be the
1794 lowest MTU in the path, update the route PMTU to reflect PMTU
1795 decreases; if new MTU is greater than route PMTU, and the
1796 old MTU is the lowest MTU in the path, update the route PMTU
1797 to reflect the increase. In this case if the other nodes' MTU
1798 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1799 PMTU discouvery.
1800 */
1801 if (rt->rt6i_dev == arg->dev &&
1802 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1803 (dst_mtu(&rt->u.dst) > arg->mtu ||
1804 (dst_mtu(&rt->u.dst) < arg->mtu &&
1805 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1806 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1807 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1808 return 0;
1809 }
1810
1811 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1812 {
1813 struct rt6_mtu_change_arg arg = {
1814 .dev = dev,
1815 .mtu = mtu,
1816 };
1817
1818 fib6_clean_all(rt6_mtu_change_route, 0, &arg);
1819 }
1820
1821 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1822 struct in6_rtmsg *rtmsg)
1823 {
1824 memset(rtmsg, 0, sizeof(*rtmsg));
1825
1826 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1827 rtmsg->rtmsg_src_len = r->rtm_src_len;
1828 rtmsg->rtmsg_flags = RTF_UP;
1829 if (r->rtm_type == RTN_UNREACHABLE)
1830 rtmsg->rtmsg_flags |= RTF_REJECT;
1831
1832 if (rta[RTA_GATEWAY-1]) {
1833 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1834 return -EINVAL;
1835 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1836 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1837 }
1838 if (rta[RTA_DST-1]) {
1839 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1840 return -EINVAL;
1841 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1842 }
1843 if (rta[RTA_SRC-1]) {
1844 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1845 return -EINVAL;
1846 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1847 }
1848 if (rta[RTA_OIF-1]) {
1849 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1850 return -EINVAL;
1851 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1852 }
1853 if (rta[RTA_PRIORITY-1]) {
1854 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1855 return -EINVAL;
1856 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1857 }
1858 return 0;
1859 }
1860
1861 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1862 {
1863 struct rtmsg *r = NLMSG_DATA(nlh);
1864 struct in6_rtmsg rtmsg;
1865
1866 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1867 return -EINVAL;
1868 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1869 rtm_get_table(arg, r->rtm_table));
1870 }
1871
1872 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1873 {
1874 struct rtmsg *r = NLMSG_DATA(nlh);
1875 struct in6_rtmsg rtmsg;
1876
1877 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1878 return -EINVAL;
1879 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb),
1880 rtm_get_table(arg, r->rtm_table));
1881 }
1882
1883 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1884 struct in6_addr *dst, struct in6_addr *src,
1885 int iif, int type, u32 pid, u32 seq,
1886 int prefix, unsigned int flags)
1887 {
1888 struct rtmsg *rtm;
1889 struct nlmsghdr *nlh;
1890 unsigned char *b = skb->tail;
1891 struct rta_cacheinfo ci;
1892 u32 table;
1893
1894 if (prefix) { /* user wants prefix routes only */
1895 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1896 /* success since this is not a prefix route */
1897 return 1;
1898 }
1899 }
1900
1901 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1902 rtm = NLMSG_DATA(nlh);
1903 rtm->rtm_family = AF_INET6;
1904 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1905 rtm->rtm_src_len = rt->rt6i_src.plen;
1906 rtm->rtm_tos = 0;
1907 if (rt->rt6i_table)
1908 table = rt->rt6i_table->tb6_id;
1909 else
1910 table = RT6_TABLE_UNSPEC;
1911 rtm->rtm_table = table;
1912 RTA_PUT_U32(skb, RTA_TABLE, table);
1913 if (rt->rt6i_flags&RTF_REJECT)
1914 rtm->rtm_type = RTN_UNREACHABLE;
1915 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1916 rtm->rtm_type = RTN_LOCAL;
1917 else
1918 rtm->rtm_type = RTN_UNICAST;
1919 rtm->rtm_flags = 0;
1920 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1921 rtm->rtm_protocol = rt->rt6i_protocol;
1922 if (rt->rt6i_flags&RTF_DYNAMIC)
1923 rtm->rtm_protocol = RTPROT_REDIRECT;
1924 else if (rt->rt6i_flags & RTF_ADDRCONF)
1925 rtm->rtm_protocol = RTPROT_KERNEL;
1926 else if (rt->rt6i_flags&RTF_DEFAULT)
1927 rtm->rtm_protocol = RTPROT_RA;
1928
1929 if (rt->rt6i_flags&RTF_CACHE)
1930 rtm->rtm_flags |= RTM_F_CLONED;
1931
1932 if (dst) {
1933 RTA_PUT(skb, RTA_DST, 16, dst);
1934 rtm->rtm_dst_len = 128;
1935 } else if (rtm->rtm_dst_len)
1936 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1937 #ifdef CONFIG_IPV6_SUBTREES
1938 if (src) {
1939 RTA_PUT(skb, RTA_SRC, 16, src);
1940 rtm->rtm_src_len = 128;
1941 } else if (rtm->rtm_src_len)
1942 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1943 #endif
1944 if (iif)
1945 RTA_PUT(skb, RTA_IIF, 4, &iif);
1946 else if (dst) {
1947 struct in6_addr saddr_buf;
1948 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1949 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1950 }
1951 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1952 goto rtattr_failure;
1953 if (rt->u.dst.neighbour)
1954 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1955 if (rt->u.dst.dev)
1956 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1957 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1958 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1959 if (rt->rt6i_expires)
1960 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1961 else
1962 ci.rta_expires = 0;
1963 ci.rta_used = rt->u.dst.__use;
1964 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1965 ci.rta_error = rt->u.dst.error;
1966 ci.rta_id = 0;
1967 ci.rta_ts = 0;
1968 ci.rta_tsage = 0;
1969 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1970 nlh->nlmsg_len = skb->tail - b;
1971 return skb->len;
1972
1973 nlmsg_failure:
1974 rtattr_failure:
1975 skb_trim(skb, b - skb->data);
1976 return -1;
1977 }
1978
1979 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1980 {
1981 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1982 int prefix;
1983
1984 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1985 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1986 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1987 } else
1988 prefix = 0;
1989
1990 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1991 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1992 prefix, NLM_F_MULTI);
1993 }
1994
1995 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1996 {
1997 struct rtattr **rta = arg;
1998 int iif = 0;
1999 int err = -ENOBUFS;
2000 struct sk_buff *skb;
2001 struct flowi fl;
2002 struct rt6_info *rt;
2003
2004 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2005 if (skb == NULL)
2006 goto out;
2007
2008 /* Reserve room for dummy headers, this skb can pass
2009 through good chunk of routing engine.
2010 */
2011 skb->mac.raw = skb->data;
2012 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2013
2014 memset(&fl, 0, sizeof(fl));
2015 if (rta[RTA_SRC-1])
2016 ipv6_addr_copy(&fl.fl6_src,
2017 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
2018 if (rta[RTA_DST-1])
2019 ipv6_addr_copy(&fl.fl6_dst,
2020 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
2021
2022 if (rta[RTA_IIF-1])
2023 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
2024
2025 if (iif) {
2026 struct net_device *dev;
2027 dev = __dev_get_by_index(iif);
2028 if (!dev) {
2029 err = -ENODEV;
2030 goto out_free;
2031 }
2032 }
2033
2034 fl.oif = 0;
2035 if (rta[RTA_OIF-1])
2036 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
2037
2038 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
2039
2040 skb->dst = &rt->u.dst;
2041
2042 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
2043 err = rt6_fill_node(skb, rt,
2044 &fl.fl6_dst, &fl.fl6_src,
2045 iif,
2046 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2047 nlh->nlmsg_seq, 0, 0);
2048 if (err < 0) {
2049 err = -EMSGSIZE;
2050 goto out_free;
2051 }
2052
2053 err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
2054 out:
2055 return err;
2056 out_free:
2057 kfree_skb(skb);
2058 goto out;
2059 }
2060
2061 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
2062 struct netlink_skb_parms *req)
2063 {
2064 struct sk_buff *skb;
2065 u32 pid = req ? req->pid : 0;
2066 u32 seq = nlh ? nlh->nlmsg_seq : 0;
2067 int payload = sizeof(struct rtmsg) + 256;
2068 int err = -ENOBUFS;
2069
2070 skb = nlmsg_new(nlmsg_total_size(payload), gfp_any());
2071 if (skb == NULL)
2072 goto errout;
2073
2074 err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
2075 if (err < 0) {
2076 kfree_skb(skb);
2077 goto errout;
2078 }
2079
2080 err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
2081 errout:
2082 if (err < 0)
2083 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
2084 }
2085
2086 /*
2087 * /proc
2088 */
2089
2090 #ifdef CONFIG_PROC_FS
2091
2092 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2093
2094 struct rt6_proc_arg
2095 {
2096 char *buffer;
2097 int offset;
2098 int length;
2099 int skip;
2100 int len;
2101 };
2102
2103 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2104 {
2105 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2106 int i;
2107
2108 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2109 arg->skip++;
2110 return 0;
2111 }
2112
2113 if (arg->len >= arg->length)
2114 return 0;
2115
2116 for (i=0; i<16; i++) {
2117 sprintf(arg->buffer + arg->len, "%02x",
2118 rt->rt6i_dst.addr.s6_addr[i]);
2119 arg->len += 2;
2120 }
2121 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2122 rt->rt6i_dst.plen);
2123
2124 #ifdef CONFIG_IPV6_SUBTREES
2125 for (i=0; i<16; i++) {
2126 sprintf(arg->buffer + arg->len, "%02x",
2127 rt->rt6i_src.addr.s6_addr[i]);
2128 arg->len += 2;
2129 }
2130 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2131 rt->rt6i_src.plen);
2132 #else
2133 sprintf(arg->buffer + arg->len,
2134 "00000000000000000000000000000000 00 ");
2135 arg->len += 36;
2136 #endif
2137
2138 if (rt->rt6i_nexthop) {
2139 for (i=0; i<16; i++) {
2140 sprintf(arg->buffer + arg->len, "%02x",
2141 rt->rt6i_nexthop->primary_key[i]);
2142 arg->len += 2;
2143 }
2144 } else {
2145 sprintf(arg->buffer + arg->len,
2146 "00000000000000000000000000000000");
2147 arg->len += 32;
2148 }
2149 arg->len += sprintf(arg->buffer + arg->len,
2150 " %08x %08x %08x %08x %8s\n",
2151 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2152 rt->u.dst.__use, rt->rt6i_flags,
2153 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2154 return 0;
2155 }
2156
2157 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2158 {
2159 struct rt6_proc_arg arg = {
2160 .buffer = buffer,
2161 .offset = offset,
2162 .length = length,
2163 };
2164
2165 fib6_clean_all(rt6_info_route, 0, &arg);
2166
2167 *start = buffer;
2168 if (offset)
2169 *start += offset % RT6_INFO_LEN;
2170
2171 arg.len -= offset % RT6_INFO_LEN;
2172
2173 if (arg.len > length)
2174 arg.len = length;
2175 if (arg.len < 0)
2176 arg.len = 0;
2177
2178 return arg.len;
2179 }
2180
2181 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2182 {
2183 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2184 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2185 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2186 rt6_stats.fib_rt_cache,
2187 atomic_read(&ip6_dst_ops.entries),
2188 rt6_stats.fib_discarded_routes);
2189
2190 return 0;
2191 }
2192
2193 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2194 {
2195 return single_open(file, rt6_stats_seq_show, NULL);
2196 }
2197
2198 static struct file_operations rt6_stats_seq_fops = {
2199 .owner = THIS_MODULE,
2200 .open = rt6_stats_seq_open,
2201 .read = seq_read,
2202 .llseek = seq_lseek,
2203 .release = single_release,
2204 };
2205 #endif /* CONFIG_PROC_FS */
2206
2207 #ifdef CONFIG_SYSCTL
2208
2209 static int flush_delay;
2210
2211 static
2212 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2213 void __user *buffer, size_t *lenp, loff_t *ppos)
2214 {
2215 if (write) {
2216 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2217 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2218 return 0;
2219 } else
2220 return -EINVAL;
2221 }
2222
2223 ctl_table ipv6_route_table[] = {
2224 {
2225 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2226 .procname = "flush",
2227 .data = &flush_delay,
2228 .maxlen = sizeof(int),
2229 .mode = 0200,
2230 .proc_handler = &ipv6_sysctl_rtcache_flush
2231 },
2232 {
2233 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2234 .procname = "gc_thresh",
2235 .data = &ip6_dst_ops.gc_thresh,
2236 .maxlen = sizeof(int),
2237 .mode = 0644,
2238 .proc_handler = &proc_dointvec,
2239 },
2240 {
2241 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2242 .procname = "max_size",
2243 .data = &ip6_rt_max_size,
2244 .maxlen = sizeof(int),
2245 .mode = 0644,
2246 .proc_handler = &proc_dointvec,
2247 },
2248 {
2249 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2250 .procname = "gc_min_interval",
2251 .data = &ip6_rt_gc_min_interval,
2252 .maxlen = sizeof(int),
2253 .mode = 0644,
2254 .proc_handler = &proc_dointvec_jiffies,
2255 .strategy = &sysctl_jiffies,
2256 },
2257 {
2258 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2259 .procname = "gc_timeout",
2260 .data = &ip6_rt_gc_timeout,
2261 .maxlen = sizeof(int),
2262 .mode = 0644,
2263 .proc_handler = &proc_dointvec_jiffies,
2264 .strategy = &sysctl_jiffies,
2265 },
2266 {
2267 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2268 .procname = "gc_interval",
2269 .data = &ip6_rt_gc_interval,
2270 .maxlen = sizeof(int),
2271 .mode = 0644,
2272 .proc_handler = &proc_dointvec_jiffies,
2273 .strategy = &sysctl_jiffies,
2274 },
2275 {
2276 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2277 .procname = "gc_elasticity",
2278 .data = &ip6_rt_gc_elasticity,
2279 .maxlen = sizeof(int),
2280 .mode = 0644,
2281 .proc_handler = &proc_dointvec_jiffies,
2282 .strategy = &sysctl_jiffies,
2283 },
2284 {
2285 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2286 .procname = "mtu_expires",
2287 .data = &ip6_rt_mtu_expires,
2288 .maxlen = sizeof(int),
2289 .mode = 0644,
2290 .proc_handler = &proc_dointvec_jiffies,
2291 .strategy = &sysctl_jiffies,
2292 },
2293 {
2294 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2295 .procname = "min_adv_mss",
2296 .data = &ip6_rt_min_advmss,
2297 .maxlen = sizeof(int),
2298 .mode = 0644,
2299 .proc_handler = &proc_dointvec_jiffies,
2300 .strategy = &sysctl_jiffies,
2301 },
2302 {
2303 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2304 .procname = "gc_min_interval_ms",
2305 .data = &ip6_rt_gc_min_interval,
2306 .maxlen = sizeof(int),
2307 .mode = 0644,
2308 .proc_handler = &proc_dointvec_ms_jiffies,
2309 .strategy = &sysctl_ms_jiffies,
2310 },
2311 { .ctl_name = 0 }
2312 };
2313
2314 #endif
2315
2316 void __init ip6_route_init(void)
2317 {
2318 struct proc_dir_entry *p;
2319
2320 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2321 sizeof(struct rt6_info),
2322 0, SLAB_HWCACHE_ALIGN,
2323 NULL, NULL);
2324 if (!ip6_dst_ops.kmem_cachep)
2325 panic("cannot create ip6_dst_cache");
2326
2327 fib6_init();
2328 #ifdef CONFIG_PROC_FS
2329 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2330 if (p)
2331 p->owner = THIS_MODULE;
2332
2333 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2334 #endif
2335 #ifdef CONFIG_XFRM
2336 xfrm6_init();
2337 #endif
2338 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2339 fib6_rules_init();
2340 #endif
2341 }
2342
2343 void ip6_route_cleanup(void)
2344 {
2345 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2346 fib6_rules_cleanup();
2347 #endif
2348 #ifdef CONFIG_PROC_FS
2349 proc_net_remove("ipv6_route");
2350 proc_net_remove("rt6_stats");
2351 #endif
2352 #ifdef CONFIG_XFRM
2353 xfrm6_fini();
2354 #endif
2355 rt6_ifdown(NULL);
2356 fib6_gc_cleanup();
2357 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2358 }