]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/route.c
rtnetlink: Remove ts/tsage args to rtnl_put_cacheinfo().
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
82
83 #ifdef CONFIG_IPV6_ROUTE_INFO
84 static struct rt6_info *rt6_add_route_info(struct net *net,
85 const struct in6_addr *prefix, int prefixlen,
86 const struct in6_addr *gwaddr, int ifindex,
87 unsigned int pref);
88 static struct rt6_info *rt6_get_route_info(struct net *net,
89 const struct in6_addr *prefix, int prefixlen,
90 const struct in6_addr *gwaddr, int ifindex);
91 #endif
92
93 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
94 {
95 struct rt6_info *rt = (struct rt6_info *) dst;
96 struct inet_peer *peer;
97 u32 *p = NULL;
98
99 if (!(rt->dst.flags & DST_HOST))
100 return NULL;
101
102 peer = rt6_get_peer_create(rt);
103 if (peer) {
104 u32 *old_p = __DST_METRICS_PTR(old);
105 unsigned long prev, new;
106
107 p = peer->metrics;
108 if (inet_metrics_new(peer))
109 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
110
111 new = (unsigned long) p;
112 prev = cmpxchg(&dst->_metrics, old, new);
113
114 if (prev != old) {
115 p = __DST_METRICS_PTR(prev);
116 if (prev & DST_METRICS_READ_ONLY)
117 p = NULL;
118 }
119 }
120 return p;
121 }
122
123 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
124 struct sk_buff *skb,
125 const void *daddr)
126 {
127 struct in6_addr *p = &rt->rt6i_gateway;
128
129 if (!ipv6_addr_any(p))
130 return (const void *) p;
131 else if (skb)
132 return &ipv6_hdr(skb)->daddr;
133 return daddr;
134 }
135
136 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
137 struct sk_buff *skb,
138 const void *daddr)
139 {
140 struct rt6_info *rt = (struct rt6_info *) dst;
141 struct neighbour *n;
142
143 daddr = choose_neigh_daddr(rt, skb, daddr);
144 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
145 if (n)
146 return n;
147 return neigh_create(&nd_tbl, daddr, dst->dev);
148 }
149
150 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
151 {
152 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
153 if (!n) {
154 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
155 if (IS_ERR(n))
156 return PTR_ERR(n);
157 }
158 rt->n = n;
159
160 return 0;
161 }
162
163 static struct dst_ops ip6_dst_ops_template = {
164 .family = AF_INET6,
165 .protocol = cpu_to_be16(ETH_P_IPV6),
166 .gc = ip6_dst_gc,
167 .gc_thresh = 1024,
168 .check = ip6_dst_check,
169 .default_advmss = ip6_default_advmss,
170 .mtu = ip6_mtu,
171 .cow_metrics = ipv6_cow_metrics,
172 .destroy = ip6_dst_destroy,
173 .ifdown = ip6_dst_ifdown,
174 .negative_advice = ip6_negative_advice,
175 .link_failure = ip6_link_failure,
176 .update_pmtu = ip6_rt_update_pmtu,
177 .local_out = __ip6_local_out,
178 .neigh_lookup = ip6_neigh_lookup,
179 };
180
181 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
182 {
183 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
184
185 return mtu ? : dst->dev->mtu;
186 }
187
188 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
189 {
190 }
191
192 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
193 unsigned long old)
194 {
195 return NULL;
196 }
197
198 static struct dst_ops ip6_dst_blackhole_ops = {
199 .family = AF_INET6,
200 .protocol = cpu_to_be16(ETH_P_IPV6),
201 .destroy = ip6_dst_destroy,
202 .check = ip6_dst_check,
203 .mtu = ip6_blackhole_mtu,
204 .default_advmss = ip6_default_advmss,
205 .update_pmtu = ip6_rt_blackhole_update_pmtu,
206 .cow_metrics = ip6_rt_blackhole_cow_metrics,
207 .neigh_lookup = ip6_neigh_lookup,
208 };
209
210 static const u32 ip6_template_metrics[RTAX_MAX] = {
211 [RTAX_HOPLIMIT - 1] = 255,
212 };
213
214 static struct rt6_info ip6_null_entry_template = {
215 .dst = {
216 .__refcnt = ATOMIC_INIT(1),
217 .__use = 1,
218 .obsolete = -1,
219 .error = -ENETUNREACH,
220 .input = ip6_pkt_discard,
221 .output = ip6_pkt_discard_out,
222 },
223 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
224 .rt6i_protocol = RTPROT_KERNEL,
225 .rt6i_metric = ~(u32) 0,
226 .rt6i_ref = ATOMIC_INIT(1),
227 };
228
229 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
230
231 static int ip6_pkt_prohibit(struct sk_buff *skb);
232 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
233
234 static struct rt6_info ip6_prohibit_entry_template = {
235 .dst = {
236 .__refcnt = ATOMIC_INIT(1),
237 .__use = 1,
238 .obsolete = -1,
239 .error = -EACCES,
240 .input = ip6_pkt_prohibit,
241 .output = ip6_pkt_prohibit_out,
242 },
243 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
244 .rt6i_protocol = RTPROT_KERNEL,
245 .rt6i_metric = ~(u32) 0,
246 .rt6i_ref = ATOMIC_INIT(1),
247 };
248
249 static struct rt6_info ip6_blk_hole_entry_template = {
250 .dst = {
251 .__refcnt = ATOMIC_INIT(1),
252 .__use = 1,
253 .obsolete = -1,
254 .error = -EINVAL,
255 .input = dst_discard,
256 .output = dst_discard,
257 },
258 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
259 .rt6i_protocol = RTPROT_KERNEL,
260 .rt6i_metric = ~(u32) 0,
261 .rt6i_ref = ATOMIC_INIT(1),
262 };
263
264 #endif
265
266 /* allocate dst with ip6_dst_ops */
267 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
268 struct net_device *dev,
269 int flags,
270 struct fib6_table *table)
271 {
272 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
273 0, 0, flags);
274
275 if (rt) {
276 memset(&rt->n, 0,
277 sizeof(*rt) - sizeof(struct dst_entry));
278 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
279 }
280 return rt;
281 }
282
283 static void ip6_dst_destroy(struct dst_entry *dst)
284 {
285 struct rt6_info *rt = (struct rt6_info *)dst;
286 struct inet6_dev *idev = rt->rt6i_idev;
287
288 if (rt->n)
289 neigh_release(rt->n);
290
291 if (!(rt->dst.flags & DST_HOST))
292 dst_destroy_metrics_generic(dst);
293
294 if (idev) {
295 rt->rt6i_idev = NULL;
296 in6_dev_put(idev);
297 }
298
299 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
300 dst_release(dst->from);
301
302 if (rt6_has_peer(rt)) {
303 struct inet_peer *peer = rt6_peer_ptr(rt);
304 inet_putpeer(peer);
305 }
306 }
307
308 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
309
310 static u32 rt6_peer_genid(void)
311 {
312 return atomic_read(&__rt6_peer_genid);
313 }
314
315 void rt6_bind_peer(struct rt6_info *rt, int create)
316 {
317 struct inet_peer_base *base;
318 struct inet_peer *peer;
319
320 base = inetpeer_base_ptr(rt->_rt6i_peer);
321 if (!base)
322 return;
323
324 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
325 if (peer) {
326 if (!rt6_set_peer(rt, peer))
327 inet_putpeer(peer);
328 else
329 rt->rt6i_peer_genid = rt6_peer_genid();
330 }
331 }
332
333 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
334 int how)
335 {
336 struct rt6_info *rt = (struct rt6_info *)dst;
337 struct inet6_dev *idev = rt->rt6i_idev;
338 struct net_device *loopback_dev =
339 dev_net(dev)->loopback_dev;
340
341 if (dev != loopback_dev) {
342 if (idev && idev->dev == dev) {
343 struct inet6_dev *loopback_idev =
344 in6_dev_get(loopback_dev);
345 if (loopback_idev) {
346 rt->rt6i_idev = loopback_idev;
347 in6_dev_put(idev);
348 }
349 }
350 if (rt->n && rt->n->dev == dev) {
351 rt->n->dev = loopback_dev;
352 dev_hold(loopback_dev);
353 dev_put(dev);
354 }
355 }
356 }
357
358 static bool rt6_check_expired(const struct rt6_info *rt)
359 {
360 struct rt6_info *ort = NULL;
361
362 if (rt->rt6i_flags & RTF_EXPIRES) {
363 if (time_after(jiffies, rt->dst.expires))
364 return true;
365 } else if (rt->dst.from) {
366 ort = (struct rt6_info *) rt->dst.from;
367 return (ort->rt6i_flags & RTF_EXPIRES) &&
368 time_after(jiffies, ort->dst.expires);
369 }
370 return false;
371 }
372
373 static bool rt6_need_strict(const struct in6_addr *daddr)
374 {
375 return ipv6_addr_type(daddr) &
376 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
377 }
378
379 /*
380 * Route lookup. Any table->tb6_lock is implied.
381 */
382
383 static inline struct rt6_info *rt6_device_match(struct net *net,
384 struct rt6_info *rt,
385 const struct in6_addr *saddr,
386 int oif,
387 int flags)
388 {
389 struct rt6_info *local = NULL;
390 struct rt6_info *sprt;
391
392 if (!oif && ipv6_addr_any(saddr))
393 goto out;
394
395 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
396 struct net_device *dev = sprt->dst.dev;
397
398 if (oif) {
399 if (dev->ifindex == oif)
400 return sprt;
401 if (dev->flags & IFF_LOOPBACK) {
402 if (!sprt->rt6i_idev ||
403 sprt->rt6i_idev->dev->ifindex != oif) {
404 if (flags & RT6_LOOKUP_F_IFACE && oif)
405 continue;
406 if (local && (!oif ||
407 local->rt6i_idev->dev->ifindex == oif))
408 continue;
409 }
410 local = sprt;
411 }
412 } else {
413 if (ipv6_chk_addr(net, saddr, dev,
414 flags & RT6_LOOKUP_F_IFACE))
415 return sprt;
416 }
417 }
418
419 if (oif) {
420 if (local)
421 return local;
422
423 if (flags & RT6_LOOKUP_F_IFACE)
424 return net->ipv6.ip6_null_entry;
425 }
426 out:
427 return rt;
428 }
429
430 #ifdef CONFIG_IPV6_ROUTER_PREF
431 static void rt6_probe(struct rt6_info *rt)
432 {
433 struct neighbour *neigh;
434 /*
435 * Okay, this does not seem to be appropriate
436 * for now, however, we need to check if it
437 * is really so; aka Router Reachability Probing.
438 *
439 * Router Reachability Probe MUST be rate-limited
440 * to no more than one per minute.
441 */
442 rcu_read_lock();
443 neigh = rt ? rt->n : NULL;
444 if (!neigh || (neigh->nud_state & NUD_VALID))
445 goto out;
446 read_lock_bh(&neigh->lock);
447 if (!(neigh->nud_state & NUD_VALID) &&
448 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
449 struct in6_addr mcaddr;
450 struct in6_addr *target;
451
452 neigh->updated = jiffies;
453 read_unlock_bh(&neigh->lock);
454
455 target = (struct in6_addr *)&neigh->primary_key;
456 addrconf_addr_solict_mult(target, &mcaddr);
457 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
458 } else {
459 read_unlock_bh(&neigh->lock);
460 }
461 out:
462 rcu_read_unlock();
463 }
464 #else
465 static inline void rt6_probe(struct rt6_info *rt)
466 {
467 }
468 #endif
469
470 /*
471 * Default Router Selection (RFC 2461 6.3.6)
472 */
473 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
474 {
475 struct net_device *dev = rt->dst.dev;
476 if (!oif || dev->ifindex == oif)
477 return 2;
478 if ((dev->flags & IFF_LOOPBACK) &&
479 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
480 return 1;
481 return 0;
482 }
483
484 static inline int rt6_check_neigh(struct rt6_info *rt)
485 {
486 struct neighbour *neigh;
487 int m;
488
489 rcu_read_lock();
490 neigh = rt->n;
491 if (rt->rt6i_flags & RTF_NONEXTHOP ||
492 !(rt->rt6i_flags & RTF_GATEWAY))
493 m = 1;
494 else if (neigh) {
495 read_lock_bh(&neigh->lock);
496 if (neigh->nud_state & NUD_VALID)
497 m = 2;
498 #ifdef CONFIG_IPV6_ROUTER_PREF
499 else if (neigh->nud_state & NUD_FAILED)
500 m = 0;
501 #endif
502 else
503 m = 1;
504 read_unlock_bh(&neigh->lock);
505 } else
506 m = 0;
507 rcu_read_unlock();
508 return m;
509 }
510
511 static int rt6_score_route(struct rt6_info *rt, int oif,
512 int strict)
513 {
514 int m, n;
515
516 m = rt6_check_dev(rt, oif);
517 if (!m && (strict & RT6_LOOKUP_F_IFACE))
518 return -1;
519 #ifdef CONFIG_IPV6_ROUTER_PREF
520 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
521 #endif
522 n = rt6_check_neigh(rt);
523 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
524 return -1;
525 return m;
526 }
527
528 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
529 int *mpri, struct rt6_info *match)
530 {
531 int m;
532
533 if (rt6_check_expired(rt))
534 goto out;
535
536 m = rt6_score_route(rt, oif, strict);
537 if (m < 0)
538 goto out;
539
540 if (m > *mpri) {
541 if (strict & RT6_LOOKUP_F_REACHABLE)
542 rt6_probe(match);
543 *mpri = m;
544 match = rt;
545 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
546 rt6_probe(rt);
547 }
548
549 out:
550 return match;
551 }
552
553 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
554 struct rt6_info *rr_head,
555 u32 metric, int oif, int strict)
556 {
557 struct rt6_info *rt, *match;
558 int mpri = -1;
559
560 match = NULL;
561 for (rt = rr_head; rt && rt->rt6i_metric == metric;
562 rt = rt->dst.rt6_next)
563 match = find_match(rt, oif, strict, &mpri, match);
564 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
565 rt = rt->dst.rt6_next)
566 match = find_match(rt, oif, strict, &mpri, match);
567
568 return match;
569 }
570
571 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
572 {
573 struct rt6_info *match, *rt0;
574 struct net *net;
575
576 rt0 = fn->rr_ptr;
577 if (!rt0)
578 fn->rr_ptr = rt0 = fn->leaf;
579
580 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
581
582 if (!match &&
583 (strict & RT6_LOOKUP_F_REACHABLE)) {
584 struct rt6_info *next = rt0->dst.rt6_next;
585
586 /* no entries matched; do round-robin */
587 if (!next || next->rt6i_metric != rt0->rt6i_metric)
588 next = fn->leaf;
589
590 if (next != rt0)
591 fn->rr_ptr = next;
592 }
593
594 net = dev_net(rt0->dst.dev);
595 return match ? match : net->ipv6.ip6_null_entry;
596 }
597
598 #ifdef CONFIG_IPV6_ROUTE_INFO
599 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
600 const struct in6_addr *gwaddr)
601 {
602 struct net *net = dev_net(dev);
603 struct route_info *rinfo = (struct route_info *) opt;
604 struct in6_addr prefix_buf, *prefix;
605 unsigned int pref;
606 unsigned long lifetime;
607 struct rt6_info *rt;
608
609 if (len < sizeof(struct route_info)) {
610 return -EINVAL;
611 }
612
613 /* Sanity check for prefix_len and length */
614 if (rinfo->length > 3) {
615 return -EINVAL;
616 } else if (rinfo->prefix_len > 128) {
617 return -EINVAL;
618 } else if (rinfo->prefix_len > 64) {
619 if (rinfo->length < 2) {
620 return -EINVAL;
621 }
622 } else if (rinfo->prefix_len > 0) {
623 if (rinfo->length < 1) {
624 return -EINVAL;
625 }
626 }
627
628 pref = rinfo->route_pref;
629 if (pref == ICMPV6_ROUTER_PREF_INVALID)
630 return -EINVAL;
631
632 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
633
634 if (rinfo->length == 3)
635 prefix = (struct in6_addr *)rinfo->prefix;
636 else {
637 /* this function is safe */
638 ipv6_addr_prefix(&prefix_buf,
639 (struct in6_addr *)rinfo->prefix,
640 rinfo->prefix_len);
641 prefix = &prefix_buf;
642 }
643
644 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
645 dev->ifindex);
646
647 if (rt && !lifetime) {
648 ip6_del_rt(rt);
649 rt = NULL;
650 }
651
652 if (!rt && lifetime)
653 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
654 pref);
655 else if (rt)
656 rt->rt6i_flags = RTF_ROUTEINFO |
657 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
658
659 if (rt) {
660 if (!addrconf_finite_timeout(lifetime))
661 rt6_clean_expires(rt);
662 else
663 rt6_set_expires(rt, jiffies + HZ * lifetime);
664
665 dst_release(&rt->dst);
666 }
667 return 0;
668 }
669 #endif
670
671 #define BACKTRACK(__net, saddr) \
672 do { \
673 if (rt == __net->ipv6.ip6_null_entry) { \
674 struct fib6_node *pn; \
675 while (1) { \
676 if (fn->fn_flags & RTN_TL_ROOT) \
677 goto out; \
678 pn = fn->parent; \
679 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
680 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
681 else \
682 fn = pn; \
683 if (fn->fn_flags & RTN_RTINFO) \
684 goto restart; \
685 } \
686 } \
687 } while (0)
688
689 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
690 struct fib6_table *table,
691 struct flowi6 *fl6, int flags)
692 {
693 struct fib6_node *fn;
694 struct rt6_info *rt;
695
696 read_lock_bh(&table->tb6_lock);
697 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
698 restart:
699 rt = fn->leaf;
700 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
701 BACKTRACK(net, &fl6->saddr);
702 out:
703 dst_use(&rt->dst, jiffies);
704 read_unlock_bh(&table->tb6_lock);
705 return rt;
706
707 }
708
709 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
710 int flags)
711 {
712 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
713 }
714 EXPORT_SYMBOL_GPL(ip6_route_lookup);
715
716 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
717 const struct in6_addr *saddr, int oif, int strict)
718 {
719 struct flowi6 fl6 = {
720 .flowi6_oif = oif,
721 .daddr = *daddr,
722 };
723 struct dst_entry *dst;
724 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
725
726 if (saddr) {
727 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
728 flags |= RT6_LOOKUP_F_HAS_SADDR;
729 }
730
731 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
732 if (dst->error == 0)
733 return (struct rt6_info *) dst;
734
735 dst_release(dst);
736
737 return NULL;
738 }
739
740 EXPORT_SYMBOL(rt6_lookup);
741
742 /* ip6_ins_rt is called with FREE table->tb6_lock.
743 It takes new route entry, the addition fails by any reason the
744 route is freed. In any case, if caller does not hold it, it may
745 be destroyed.
746 */
747
748 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
749 {
750 int err;
751 struct fib6_table *table;
752
753 table = rt->rt6i_table;
754 write_lock_bh(&table->tb6_lock);
755 err = fib6_add(&table->tb6_root, rt, info);
756 write_unlock_bh(&table->tb6_lock);
757
758 return err;
759 }
760
761 int ip6_ins_rt(struct rt6_info *rt)
762 {
763 struct nl_info info = {
764 .nl_net = dev_net(rt->dst.dev),
765 };
766 return __ip6_ins_rt(rt, &info);
767 }
768
769 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
770 const struct in6_addr *daddr,
771 const struct in6_addr *saddr)
772 {
773 struct rt6_info *rt;
774
775 /*
776 * Clone the route.
777 */
778
779 rt = ip6_rt_copy(ort, daddr);
780
781 if (rt) {
782 int attempts = !in_softirq();
783
784 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
785 if (ort->rt6i_dst.plen != 128 &&
786 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
787 rt->rt6i_flags |= RTF_ANYCAST;
788 rt->rt6i_gateway = *daddr;
789 }
790
791 rt->rt6i_flags |= RTF_CACHE;
792
793 #ifdef CONFIG_IPV6_SUBTREES
794 if (rt->rt6i_src.plen && saddr) {
795 rt->rt6i_src.addr = *saddr;
796 rt->rt6i_src.plen = 128;
797 }
798 #endif
799
800 retry:
801 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
802 struct net *net = dev_net(rt->dst.dev);
803 int saved_rt_min_interval =
804 net->ipv6.sysctl.ip6_rt_gc_min_interval;
805 int saved_rt_elasticity =
806 net->ipv6.sysctl.ip6_rt_gc_elasticity;
807
808 if (attempts-- > 0) {
809 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
810 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
811
812 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
813
814 net->ipv6.sysctl.ip6_rt_gc_elasticity =
815 saved_rt_elasticity;
816 net->ipv6.sysctl.ip6_rt_gc_min_interval =
817 saved_rt_min_interval;
818 goto retry;
819 }
820
821 net_warn_ratelimited("Neighbour table overflow\n");
822 dst_free(&rt->dst);
823 return NULL;
824 }
825 }
826
827 return rt;
828 }
829
830 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
831 const struct in6_addr *daddr)
832 {
833 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
834
835 if (rt) {
836 rt->rt6i_flags |= RTF_CACHE;
837 rt->n = neigh_clone(ort->n);
838 }
839 return rt;
840 }
841
842 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
843 struct flowi6 *fl6, int flags)
844 {
845 struct fib6_node *fn;
846 struct rt6_info *rt, *nrt;
847 int strict = 0;
848 int attempts = 3;
849 int err;
850 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
851
852 strict |= flags & RT6_LOOKUP_F_IFACE;
853
854 relookup:
855 read_lock_bh(&table->tb6_lock);
856
857 restart_2:
858 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
859
860 restart:
861 rt = rt6_select(fn, oif, strict | reachable);
862
863 BACKTRACK(net, &fl6->saddr);
864 if (rt == net->ipv6.ip6_null_entry ||
865 rt->rt6i_flags & RTF_CACHE)
866 goto out;
867
868 dst_hold(&rt->dst);
869 read_unlock_bh(&table->tb6_lock);
870
871 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
872 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
873 else if (!(rt->dst.flags & DST_HOST))
874 nrt = rt6_alloc_clone(rt, &fl6->daddr);
875 else
876 goto out2;
877
878 dst_release(&rt->dst);
879 rt = nrt ? : net->ipv6.ip6_null_entry;
880
881 dst_hold(&rt->dst);
882 if (nrt) {
883 err = ip6_ins_rt(nrt);
884 if (!err)
885 goto out2;
886 }
887
888 if (--attempts <= 0)
889 goto out2;
890
891 /*
892 * Race condition! In the gap, when table->tb6_lock was
893 * released someone could insert this route. Relookup.
894 */
895 dst_release(&rt->dst);
896 goto relookup;
897
898 out:
899 if (reachable) {
900 reachable = 0;
901 goto restart_2;
902 }
903 dst_hold(&rt->dst);
904 read_unlock_bh(&table->tb6_lock);
905 out2:
906 rt->dst.lastuse = jiffies;
907 rt->dst.__use++;
908
909 return rt;
910 }
911
912 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
913 struct flowi6 *fl6, int flags)
914 {
915 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
916 }
917
918 static struct dst_entry *ip6_route_input_lookup(struct net *net,
919 struct net_device *dev,
920 struct flowi6 *fl6, int flags)
921 {
922 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
923 flags |= RT6_LOOKUP_F_IFACE;
924
925 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
926 }
927
928 void ip6_route_input(struct sk_buff *skb)
929 {
930 const struct ipv6hdr *iph = ipv6_hdr(skb);
931 struct net *net = dev_net(skb->dev);
932 int flags = RT6_LOOKUP_F_HAS_SADDR;
933 struct flowi6 fl6 = {
934 .flowi6_iif = skb->dev->ifindex,
935 .daddr = iph->daddr,
936 .saddr = iph->saddr,
937 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
938 .flowi6_mark = skb->mark,
939 .flowi6_proto = iph->nexthdr,
940 };
941
942 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
943 }
944
945 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
946 struct flowi6 *fl6, int flags)
947 {
948 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
949 }
950
951 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
952 struct flowi6 *fl6)
953 {
954 int flags = 0;
955
956 fl6->flowi6_iif = net->loopback_dev->ifindex;
957
958 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
959 flags |= RT6_LOOKUP_F_IFACE;
960
961 if (!ipv6_addr_any(&fl6->saddr))
962 flags |= RT6_LOOKUP_F_HAS_SADDR;
963 else if (sk)
964 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
965
966 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
967 }
968
969 EXPORT_SYMBOL(ip6_route_output);
970
971 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
972 {
973 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
974 struct dst_entry *new = NULL;
975
976 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, 0, 0);
977 if (rt) {
978 memset(&rt->rt6i_table, 0, sizeof(*rt) - sizeof(struct dst_entry));
979 rt6_init_peer(rt, net->ipv6.peers);
980
981 new = &rt->dst;
982
983 new->__use = 1;
984 new->input = dst_discard;
985 new->output = dst_discard;
986
987 if (dst_metrics_read_only(&ort->dst))
988 new->_metrics = ort->dst._metrics;
989 else
990 dst_copy_metrics(new, &ort->dst);
991 rt->rt6i_idev = ort->rt6i_idev;
992 if (rt->rt6i_idev)
993 in6_dev_hold(rt->rt6i_idev);
994
995 rt->rt6i_gateway = ort->rt6i_gateway;
996 rt->rt6i_flags = ort->rt6i_flags;
997 rt6_clean_expires(rt);
998 rt->rt6i_metric = 0;
999
1000 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1001 #ifdef CONFIG_IPV6_SUBTREES
1002 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1003 #endif
1004
1005 dst_free(new);
1006 }
1007
1008 dst_release(dst_orig);
1009 return new ? new : ERR_PTR(-ENOMEM);
1010 }
1011
1012 /*
1013 * Destination cache support functions
1014 */
1015
1016 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1017 {
1018 struct rt6_info *rt;
1019
1020 rt = (struct rt6_info *) dst;
1021
1022 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1023 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1024 if (!rt6_has_peer(rt))
1025 rt6_bind_peer(rt, 0);
1026 rt->rt6i_peer_genid = rt6_peer_genid();
1027 }
1028 return dst;
1029 }
1030 return NULL;
1031 }
1032
1033 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1034 {
1035 struct rt6_info *rt = (struct rt6_info *) dst;
1036
1037 if (rt) {
1038 if (rt->rt6i_flags & RTF_CACHE) {
1039 if (rt6_check_expired(rt)) {
1040 ip6_del_rt(rt);
1041 dst = NULL;
1042 }
1043 } else {
1044 dst_release(dst);
1045 dst = NULL;
1046 }
1047 }
1048 return dst;
1049 }
1050
1051 static void ip6_link_failure(struct sk_buff *skb)
1052 {
1053 struct rt6_info *rt;
1054
1055 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1056
1057 rt = (struct rt6_info *) skb_dst(skb);
1058 if (rt) {
1059 if (rt->rt6i_flags & RTF_CACHE)
1060 rt6_update_expires(rt, 0);
1061 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1062 rt->rt6i_node->fn_sernum = -1;
1063 }
1064 }
1065
1066 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
1067 {
1068 struct rt6_info *rt6 = (struct rt6_info*)dst;
1069
1070 dst_confirm(dst);
1071 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1072 struct net *net = dev_net(dst->dev);
1073
1074 rt6->rt6i_flags |= RTF_MODIFIED;
1075 if (mtu < IPV6_MIN_MTU) {
1076 u32 features = dst_metric(dst, RTAX_FEATURES);
1077 mtu = IPV6_MIN_MTU;
1078 features |= RTAX_FEATURE_ALLFRAG;
1079 dst_metric_set(dst, RTAX_FEATURES, features);
1080 }
1081 dst_metric_set(dst, RTAX_MTU, mtu);
1082 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1083 }
1084 }
1085
1086 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1087 int oif, u32 mark)
1088 {
1089 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1090 struct dst_entry *dst;
1091 struct flowi6 fl6;
1092
1093 memset(&fl6, 0, sizeof(fl6));
1094 fl6.flowi6_oif = oif;
1095 fl6.flowi6_mark = mark;
1096 fl6.flowi6_flags = 0;
1097 fl6.daddr = iph->daddr;
1098 fl6.saddr = iph->saddr;
1099 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1100
1101 dst = ip6_route_output(net, NULL, &fl6);
1102 if (!dst->error)
1103 ip6_rt_update_pmtu(dst, ntohl(mtu));
1104 dst_release(dst);
1105 }
1106 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1107
1108 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1109 {
1110 ip6_update_pmtu(skb, sock_net(sk), mtu,
1111 sk->sk_bound_dev_if, sk->sk_mark);
1112 }
1113 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1114
1115 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1116 {
1117 struct net_device *dev = dst->dev;
1118 unsigned int mtu = dst_mtu(dst);
1119 struct net *net = dev_net(dev);
1120
1121 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1122
1123 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1124 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1125
1126 /*
1127 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1128 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1129 * IPV6_MAXPLEN is also valid and means: "any MSS,
1130 * rely only on pmtu discovery"
1131 */
1132 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1133 mtu = IPV6_MAXPLEN;
1134 return mtu;
1135 }
1136
1137 static unsigned int ip6_mtu(const struct dst_entry *dst)
1138 {
1139 struct inet6_dev *idev;
1140 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1141
1142 if (mtu)
1143 return mtu;
1144
1145 mtu = IPV6_MIN_MTU;
1146
1147 rcu_read_lock();
1148 idev = __in6_dev_get(dst->dev);
1149 if (idev)
1150 mtu = idev->cnf.mtu6;
1151 rcu_read_unlock();
1152
1153 return mtu;
1154 }
1155
1156 static struct dst_entry *icmp6_dst_gc_list;
1157 static DEFINE_SPINLOCK(icmp6_dst_lock);
1158
1159 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1160 struct neighbour *neigh,
1161 struct flowi6 *fl6)
1162 {
1163 struct dst_entry *dst;
1164 struct rt6_info *rt;
1165 struct inet6_dev *idev = in6_dev_get(dev);
1166 struct net *net = dev_net(dev);
1167
1168 if (unlikely(!idev))
1169 return ERR_PTR(-ENODEV);
1170
1171 rt = ip6_dst_alloc(net, dev, 0, NULL);
1172 if (unlikely(!rt)) {
1173 in6_dev_put(idev);
1174 dst = ERR_PTR(-ENOMEM);
1175 goto out;
1176 }
1177
1178 if (neigh)
1179 neigh_hold(neigh);
1180 else {
1181 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1182 if (IS_ERR(neigh)) {
1183 in6_dev_put(idev);
1184 dst_free(&rt->dst);
1185 return ERR_CAST(neigh);
1186 }
1187 }
1188
1189 rt->dst.flags |= DST_HOST;
1190 rt->dst.output = ip6_output;
1191 rt->n = neigh;
1192 atomic_set(&rt->dst.__refcnt, 1);
1193 rt->rt6i_dst.addr = fl6->daddr;
1194 rt->rt6i_dst.plen = 128;
1195 rt->rt6i_idev = idev;
1196 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1197
1198 spin_lock_bh(&icmp6_dst_lock);
1199 rt->dst.next = icmp6_dst_gc_list;
1200 icmp6_dst_gc_list = &rt->dst;
1201 spin_unlock_bh(&icmp6_dst_lock);
1202
1203 fib6_force_start_gc(net);
1204
1205 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1206
1207 out:
1208 return dst;
1209 }
1210
1211 int icmp6_dst_gc(void)
1212 {
1213 struct dst_entry *dst, **pprev;
1214 int more = 0;
1215
1216 spin_lock_bh(&icmp6_dst_lock);
1217 pprev = &icmp6_dst_gc_list;
1218
1219 while ((dst = *pprev) != NULL) {
1220 if (!atomic_read(&dst->__refcnt)) {
1221 *pprev = dst->next;
1222 dst_free(dst);
1223 } else {
1224 pprev = &dst->next;
1225 ++more;
1226 }
1227 }
1228
1229 spin_unlock_bh(&icmp6_dst_lock);
1230
1231 return more;
1232 }
1233
1234 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1235 void *arg)
1236 {
1237 struct dst_entry *dst, **pprev;
1238
1239 spin_lock_bh(&icmp6_dst_lock);
1240 pprev = &icmp6_dst_gc_list;
1241 while ((dst = *pprev) != NULL) {
1242 struct rt6_info *rt = (struct rt6_info *) dst;
1243 if (func(rt, arg)) {
1244 *pprev = dst->next;
1245 dst_free(dst);
1246 } else {
1247 pprev = &dst->next;
1248 }
1249 }
1250 spin_unlock_bh(&icmp6_dst_lock);
1251 }
1252
1253 static int ip6_dst_gc(struct dst_ops *ops)
1254 {
1255 unsigned long now = jiffies;
1256 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1257 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1258 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1259 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1260 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1261 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1262 int entries;
1263
1264 entries = dst_entries_get_fast(ops);
1265 if (time_after(rt_last_gc + rt_min_interval, now) &&
1266 entries <= rt_max_size)
1267 goto out;
1268
1269 net->ipv6.ip6_rt_gc_expire++;
1270 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1271 net->ipv6.ip6_rt_last_gc = now;
1272 entries = dst_entries_get_slow(ops);
1273 if (entries < ops->gc_thresh)
1274 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1275 out:
1276 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1277 return entries > rt_max_size;
1278 }
1279
1280 /* Clean host part of a prefix. Not necessary in radix tree,
1281 but results in cleaner routing tables.
1282
1283 Remove it only when all the things will work!
1284 */
1285
1286 int ip6_dst_hoplimit(struct dst_entry *dst)
1287 {
1288 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1289 if (hoplimit == 0) {
1290 struct net_device *dev = dst->dev;
1291 struct inet6_dev *idev;
1292
1293 rcu_read_lock();
1294 idev = __in6_dev_get(dev);
1295 if (idev)
1296 hoplimit = idev->cnf.hop_limit;
1297 else
1298 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1299 rcu_read_unlock();
1300 }
1301 return hoplimit;
1302 }
1303 EXPORT_SYMBOL(ip6_dst_hoplimit);
1304
1305 /*
1306 *
1307 */
1308
1309 int ip6_route_add(struct fib6_config *cfg)
1310 {
1311 int err;
1312 struct net *net = cfg->fc_nlinfo.nl_net;
1313 struct rt6_info *rt = NULL;
1314 struct net_device *dev = NULL;
1315 struct inet6_dev *idev = NULL;
1316 struct fib6_table *table;
1317 int addr_type;
1318
1319 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1320 return -EINVAL;
1321 #ifndef CONFIG_IPV6_SUBTREES
1322 if (cfg->fc_src_len)
1323 return -EINVAL;
1324 #endif
1325 if (cfg->fc_ifindex) {
1326 err = -ENODEV;
1327 dev = dev_get_by_index(net, cfg->fc_ifindex);
1328 if (!dev)
1329 goto out;
1330 idev = in6_dev_get(dev);
1331 if (!idev)
1332 goto out;
1333 }
1334
1335 if (cfg->fc_metric == 0)
1336 cfg->fc_metric = IP6_RT_PRIO_USER;
1337
1338 err = -ENOBUFS;
1339 if (cfg->fc_nlinfo.nlh &&
1340 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1341 table = fib6_get_table(net, cfg->fc_table);
1342 if (!table) {
1343 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1344 table = fib6_new_table(net, cfg->fc_table);
1345 }
1346 } else {
1347 table = fib6_new_table(net, cfg->fc_table);
1348 }
1349
1350 if (!table)
1351 goto out;
1352
1353 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1354
1355 if (!rt) {
1356 err = -ENOMEM;
1357 goto out;
1358 }
1359
1360 rt->dst.obsolete = -1;
1361
1362 if (cfg->fc_flags & RTF_EXPIRES)
1363 rt6_set_expires(rt, jiffies +
1364 clock_t_to_jiffies(cfg->fc_expires));
1365 else
1366 rt6_clean_expires(rt);
1367
1368 if (cfg->fc_protocol == RTPROT_UNSPEC)
1369 cfg->fc_protocol = RTPROT_BOOT;
1370 rt->rt6i_protocol = cfg->fc_protocol;
1371
1372 addr_type = ipv6_addr_type(&cfg->fc_dst);
1373
1374 if (addr_type & IPV6_ADDR_MULTICAST)
1375 rt->dst.input = ip6_mc_input;
1376 else if (cfg->fc_flags & RTF_LOCAL)
1377 rt->dst.input = ip6_input;
1378 else
1379 rt->dst.input = ip6_forward;
1380
1381 rt->dst.output = ip6_output;
1382
1383 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1384 rt->rt6i_dst.plen = cfg->fc_dst_len;
1385 if (rt->rt6i_dst.plen == 128)
1386 rt->dst.flags |= DST_HOST;
1387
1388 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1389 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1390 if (!metrics) {
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394 dst_init_metrics(&rt->dst, metrics, 0);
1395 }
1396 #ifdef CONFIG_IPV6_SUBTREES
1397 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1398 rt->rt6i_src.plen = cfg->fc_src_len;
1399 #endif
1400
1401 rt->rt6i_metric = cfg->fc_metric;
1402
1403 /* We cannot add true routes via loopback here,
1404 they would result in kernel looping; promote them to reject routes
1405 */
1406 if ((cfg->fc_flags & RTF_REJECT) ||
1407 (dev && (dev->flags & IFF_LOOPBACK) &&
1408 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1409 !(cfg->fc_flags & RTF_LOCAL))) {
1410 /* hold loopback dev/idev if we haven't done so. */
1411 if (dev != net->loopback_dev) {
1412 if (dev) {
1413 dev_put(dev);
1414 in6_dev_put(idev);
1415 }
1416 dev = net->loopback_dev;
1417 dev_hold(dev);
1418 idev = in6_dev_get(dev);
1419 if (!idev) {
1420 err = -ENODEV;
1421 goto out;
1422 }
1423 }
1424 rt->dst.output = ip6_pkt_discard_out;
1425 rt->dst.input = ip6_pkt_discard;
1426 rt->dst.error = -ENETUNREACH;
1427 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1428 goto install_route;
1429 }
1430
1431 if (cfg->fc_flags & RTF_GATEWAY) {
1432 const struct in6_addr *gw_addr;
1433 int gwa_type;
1434
1435 gw_addr = &cfg->fc_gateway;
1436 rt->rt6i_gateway = *gw_addr;
1437 gwa_type = ipv6_addr_type(gw_addr);
1438
1439 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1440 struct rt6_info *grt;
1441
1442 /* IPv6 strictly inhibits using not link-local
1443 addresses as nexthop address.
1444 Otherwise, router will not able to send redirects.
1445 It is very good, but in some (rare!) circumstances
1446 (SIT, PtP, NBMA NOARP links) it is handy to allow
1447 some exceptions. --ANK
1448 */
1449 err = -EINVAL;
1450 if (!(gwa_type & IPV6_ADDR_UNICAST))
1451 goto out;
1452
1453 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1454
1455 err = -EHOSTUNREACH;
1456 if (!grt)
1457 goto out;
1458 if (dev) {
1459 if (dev != grt->dst.dev) {
1460 dst_release(&grt->dst);
1461 goto out;
1462 }
1463 } else {
1464 dev = grt->dst.dev;
1465 idev = grt->rt6i_idev;
1466 dev_hold(dev);
1467 in6_dev_hold(grt->rt6i_idev);
1468 }
1469 if (!(grt->rt6i_flags & RTF_GATEWAY))
1470 err = 0;
1471 dst_release(&grt->dst);
1472
1473 if (err)
1474 goto out;
1475 }
1476 err = -EINVAL;
1477 if (!dev || (dev->flags & IFF_LOOPBACK))
1478 goto out;
1479 }
1480
1481 err = -ENODEV;
1482 if (!dev)
1483 goto out;
1484
1485 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1486 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1487 err = -EINVAL;
1488 goto out;
1489 }
1490 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1491 rt->rt6i_prefsrc.plen = 128;
1492 } else
1493 rt->rt6i_prefsrc.plen = 0;
1494
1495 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1496 err = rt6_bind_neighbour(rt, dev);
1497 if (err)
1498 goto out;
1499 }
1500
1501 rt->rt6i_flags = cfg->fc_flags;
1502
1503 install_route:
1504 if (cfg->fc_mx) {
1505 struct nlattr *nla;
1506 int remaining;
1507
1508 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1509 int type = nla_type(nla);
1510
1511 if (type) {
1512 if (type > RTAX_MAX) {
1513 err = -EINVAL;
1514 goto out;
1515 }
1516
1517 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1518 }
1519 }
1520 }
1521
1522 rt->dst.dev = dev;
1523 rt->rt6i_idev = idev;
1524 rt->rt6i_table = table;
1525
1526 cfg->fc_nlinfo.nl_net = dev_net(dev);
1527
1528 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1529
1530 out:
1531 if (dev)
1532 dev_put(dev);
1533 if (idev)
1534 in6_dev_put(idev);
1535 if (rt)
1536 dst_free(&rt->dst);
1537 return err;
1538 }
1539
1540 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1541 {
1542 int err;
1543 struct fib6_table *table;
1544 struct net *net = dev_net(rt->dst.dev);
1545
1546 if (rt == net->ipv6.ip6_null_entry)
1547 return -ENOENT;
1548
1549 table = rt->rt6i_table;
1550 write_lock_bh(&table->tb6_lock);
1551
1552 err = fib6_del(rt, info);
1553 dst_release(&rt->dst);
1554
1555 write_unlock_bh(&table->tb6_lock);
1556
1557 return err;
1558 }
1559
1560 int ip6_del_rt(struct rt6_info *rt)
1561 {
1562 struct nl_info info = {
1563 .nl_net = dev_net(rt->dst.dev),
1564 };
1565 return __ip6_del_rt(rt, &info);
1566 }
1567
1568 static int ip6_route_del(struct fib6_config *cfg)
1569 {
1570 struct fib6_table *table;
1571 struct fib6_node *fn;
1572 struct rt6_info *rt;
1573 int err = -ESRCH;
1574
1575 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1576 if (!table)
1577 return err;
1578
1579 read_lock_bh(&table->tb6_lock);
1580
1581 fn = fib6_locate(&table->tb6_root,
1582 &cfg->fc_dst, cfg->fc_dst_len,
1583 &cfg->fc_src, cfg->fc_src_len);
1584
1585 if (fn) {
1586 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1587 if (cfg->fc_ifindex &&
1588 (!rt->dst.dev ||
1589 rt->dst.dev->ifindex != cfg->fc_ifindex))
1590 continue;
1591 if (cfg->fc_flags & RTF_GATEWAY &&
1592 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1593 continue;
1594 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1595 continue;
1596 dst_hold(&rt->dst);
1597 read_unlock_bh(&table->tb6_lock);
1598
1599 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1600 }
1601 }
1602 read_unlock_bh(&table->tb6_lock);
1603
1604 return err;
1605 }
1606
1607 /*
1608 * Handle redirects
1609 */
1610 struct ip6rd_flowi {
1611 struct flowi6 fl6;
1612 struct in6_addr gateway;
1613 };
1614
1615 static struct rt6_info *__ip6_route_redirect(struct net *net,
1616 struct fib6_table *table,
1617 struct flowi6 *fl6,
1618 int flags)
1619 {
1620 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
1621 struct rt6_info *rt;
1622 struct fib6_node *fn;
1623
1624 /*
1625 * Get the "current" route for this destination and
1626 * check if the redirect has come from approriate router.
1627 *
1628 * RFC 2461 specifies that redirects should only be
1629 * accepted if they come from the nexthop to the target.
1630 * Due to the way the routes are chosen, this notion
1631 * is a bit fuzzy and one might need to check all possible
1632 * routes.
1633 */
1634
1635 read_lock_bh(&table->tb6_lock);
1636 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
1637 restart:
1638 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1639 /*
1640 * Current route is on-link; redirect is always invalid.
1641 *
1642 * Seems, previous statement is not true. It could
1643 * be node, which looks for us as on-link (f.e. proxy ndisc)
1644 * But then router serving it might decide, that we should
1645 * know truth 8)8) --ANK (980726).
1646 */
1647 if (rt6_check_expired(rt))
1648 continue;
1649 if (!(rt->rt6i_flags & RTF_GATEWAY))
1650 continue;
1651 if (fl6->flowi6_oif != rt->dst.dev->ifindex)
1652 continue;
1653 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1654 continue;
1655 break;
1656 }
1657
1658 if (!rt)
1659 rt = net->ipv6.ip6_null_entry;
1660 BACKTRACK(net, &fl6->saddr);
1661 out:
1662 dst_hold(&rt->dst);
1663
1664 read_unlock_bh(&table->tb6_lock);
1665
1666 return rt;
1667 };
1668
1669 static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
1670 const struct in6_addr *src,
1671 const struct in6_addr *gateway,
1672 struct net_device *dev)
1673 {
1674 int flags = RT6_LOOKUP_F_HAS_SADDR;
1675 struct net *net = dev_net(dev);
1676 struct ip6rd_flowi rdfl = {
1677 .fl6 = {
1678 .flowi6_oif = dev->ifindex,
1679 .daddr = *dest,
1680 .saddr = *src,
1681 },
1682 };
1683
1684 rdfl.gateway = *gateway;
1685
1686 if (rt6_need_strict(dest))
1687 flags |= RT6_LOOKUP_F_IFACE;
1688
1689 return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
1690 flags, __ip6_route_redirect);
1691 }
1692
1693 void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
1694 const struct in6_addr *saddr,
1695 struct neighbour *neigh, u8 *lladdr, int on_link)
1696 {
1697 struct rt6_info *rt, *nrt = NULL;
1698 struct netevent_redirect netevent;
1699 struct net *net = dev_net(neigh->dev);
1700 struct neighbour *old_neigh;
1701
1702 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1703
1704 if (rt == net->ipv6.ip6_null_entry) {
1705 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1706 goto out;
1707 }
1708
1709 /*
1710 * We have finally decided to accept it.
1711 */
1712
1713 neigh_update(neigh, lladdr, NUD_STALE,
1714 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1715 NEIGH_UPDATE_F_OVERRIDE|
1716 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1717 NEIGH_UPDATE_F_ISROUTER))
1718 );
1719
1720 /*
1721 * Redirect received -> path was valid.
1722 * Look, redirects are sent only in response to data packets,
1723 * so that this nexthop apparently is reachable. --ANK
1724 */
1725 dst_confirm(&rt->dst);
1726
1727 /* Duplicate redirect: silently ignore. */
1728 old_neigh = rt->n;
1729 if (neigh == old_neigh)
1730 goto out;
1731
1732 nrt = ip6_rt_copy(rt, dest);
1733 if (!nrt)
1734 goto out;
1735
1736 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1737 if (on_link)
1738 nrt->rt6i_flags &= ~RTF_GATEWAY;
1739
1740 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1741 nrt->n = neigh_clone(neigh);
1742
1743 if (ip6_ins_rt(nrt))
1744 goto out;
1745
1746 netevent.old = &rt->dst;
1747 netevent.old_neigh = old_neigh;
1748 netevent.new = &nrt->dst;
1749 netevent.new_neigh = neigh;
1750 netevent.daddr = dest;
1751 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1752
1753 if (rt->rt6i_flags & RTF_CACHE) {
1754 ip6_del_rt(rt);
1755 return;
1756 }
1757
1758 out:
1759 dst_release(&rt->dst);
1760 }
1761
1762 /*
1763 * Misc support functions
1764 */
1765
1766 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1767 const struct in6_addr *dest)
1768 {
1769 struct net *net = dev_net(ort->dst.dev);
1770 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1771 ort->rt6i_table);
1772
1773 if (rt) {
1774 rt->dst.input = ort->dst.input;
1775 rt->dst.output = ort->dst.output;
1776 rt->dst.flags |= DST_HOST;
1777
1778 rt->rt6i_dst.addr = *dest;
1779 rt->rt6i_dst.plen = 128;
1780 dst_copy_metrics(&rt->dst, &ort->dst);
1781 rt->dst.error = ort->dst.error;
1782 rt->rt6i_idev = ort->rt6i_idev;
1783 if (rt->rt6i_idev)
1784 in6_dev_hold(rt->rt6i_idev);
1785 rt->dst.lastuse = jiffies;
1786
1787 rt->rt6i_gateway = ort->rt6i_gateway;
1788 rt->rt6i_flags = ort->rt6i_flags;
1789 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1790 (RTF_DEFAULT | RTF_ADDRCONF))
1791 rt6_set_from(rt, ort);
1792 else
1793 rt6_clean_expires(rt);
1794 rt->rt6i_metric = 0;
1795
1796 #ifdef CONFIG_IPV6_SUBTREES
1797 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1798 #endif
1799 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1800 rt->rt6i_table = ort->rt6i_table;
1801 }
1802 return rt;
1803 }
1804
1805 #ifdef CONFIG_IPV6_ROUTE_INFO
1806 static struct rt6_info *rt6_get_route_info(struct net *net,
1807 const struct in6_addr *prefix, int prefixlen,
1808 const struct in6_addr *gwaddr, int ifindex)
1809 {
1810 struct fib6_node *fn;
1811 struct rt6_info *rt = NULL;
1812 struct fib6_table *table;
1813
1814 table = fib6_get_table(net, RT6_TABLE_INFO);
1815 if (!table)
1816 return NULL;
1817
1818 write_lock_bh(&table->tb6_lock);
1819 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1820 if (!fn)
1821 goto out;
1822
1823 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1824 if (rt->dst.dev->ifindex != ifindex)
1825 continue;
1826 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1827 continue;
1828 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1829 continue;
1830 dst_hold(&rt->dst);
1831 break;
1832 }
1833 out:
1834 write_unlock_bh(&table->tb6_lock);
1835 return rt;
1836 }
1837
1838 static struct rt6_info *rt6_add_route_info(struct net *net,
1839 const struct in6_addr *prefix, int prefixlen,
1840 const struct in6_addr *gwaddr, int ifindex,
1841 unsigned int pref)
1842 {
1843 struct fib6_config cfg = {
1844 .fc_table = RT6_TABLE_INFO,
1845 .fc_metric = IP6_RT_PRIO_USER,
1846 .fc_ifindex = ifindex,
1847 .fc_dst_len = prefixlen,
1848 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1849 RTF_UP | RTF_PREF(pref),
1850 .fc_nlinfo.pid = 0,
1851 .fc_nlinfo.nlh = NULL,
1852 .fc_nlinfo.nl_net = net,
1853 };
1854
1855 cfg.fc_dst = *prefix;
1856 cfg.fc_gateway = *gwaddr;
1857
1858 /* We should treat it as a default route if prefix length is 0. */
1859 if (!prefixlen)
1860 cfg.fc_flags |= RTF_DEFAULT;
1861
1862 ip6_route_add(&cfg);
1863
1864 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1865 }
1866 #endif
1867
1868 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1869 {
1870 struct rt6_info *rt;
1871 struct fib6_table *table;
1872
1873 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1874 if (!table)
1875 return NULL;
1876
1877 write_lock_bh(&table->tb6_lock);
1878 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1879 if (dev == rt->dst.dev &&
1880 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1881 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1882 break;
1883 }
1884 if (rt)
1885 dst_hold(&rt->dst);
1886 write_unlock_bh(&table->tb6_lock);
1887 return rt;
1888 }
1889
1890 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1891 struct net_device *dev,
1892 unsigned int pref)
1893 {
1894 struct fib6_config cfg = {
1895 .fc_table = RT6_TABLE_DFLT,
1896 .fc_metric = IP6_RT_PRIO_USER,
1897 .fc_ifindex = dev->ifindex,
1898 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1899 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1900 .fc_nlinfo.pid = 0,
1901 .fc_nlinfo.nlh = NULL,
1902 .fc_nlinfo.nl_net = dev_net(dev),
1903 };
1904
1905 cfg.fc_gateway = *gwaddr;
1906
1907 ip6_route_add(&cfg);
1908
1909 return rt6_get_dflt_router(gwaddr, dev);
1910 }
1911
1912 void rt6_purge_dflt_routers(struct net *net)
1913 {
1914 struct rt6_info *rt;
1915 struct fib6_table *table;
1916
1917 /* NOTE: Keep consistent with rt6_get_dflt_router */
1918 table = fib6_get_table(net, RT6_TABLE_DFLT);
1919 if (!table)
1920 return;
1921
1922 restart:
1923 read_lock_bh(&table->tb6_lock);
1924 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1925 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1926 dst_hold(&rt->dst);
1927 read_unlock_bh(&table->tb6_lock);
1928 ip6_del_rt(rt);
1929 goto restart;
1930 }
1931 }
1932 read_unlock_bh(&table->tb6_lock);
1933 }
1934
1935 static void rtmsg_to_fib6_config(struct net *net,
1936 struct in6_rtmsg *rtmsg,
1937 struct fib6_config *cfg)
1938 {
1939 memset(cfg, 0, sizeof(*cfg));
1940
1941 cfg->fc_table = RT6_TABLE_MAIN;
1942 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1943 cfg->fc_metric = rtmsg->rtmsg_metric;
1944 cfg->fc_expires = rtmsg->rtmsg_info;
1945 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1946 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1947 cfg->fc_flags = rtmsg->rtmsg_flags;
1948
1949 cfg->fc_nlinfo.nl_net = net;
1950
1951 cfg->fc_dst = rtmsg->rtmsg_dst;
1952 cfg->fc_src = rtmsg->rtmsg_src;
1953 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1954 }
1955
1956 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1957 {
1958 struct fib6_config cfg;
1959 struct in6_rtmsg rtmsg;
1960 int err;
1961
1962 switch(cmd) {
1963 case SIOCADDRT: /* Add a route */
1964 case SIOCDELRT: /* Delete a route */
1965 if (!capable(CAP_NET_ADMIN))
1966 return -EPERM;
1967 err = copy_from_user(&rtmsg, arg,
1968 sizeof(struct in6_rtmsg));
1969 if (err)
1970 return -EFAULT;
1971
1972 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1973
1974 rtnl_lock();
1975 switch (cmd) {
1976 case SIOCADDRT:
1977 err = ip6_route_add(&cfg);
1978 break;
1979 case SIOCDELRT:
1980 err = ip6_route_del(&cfg);
1981 break;
1982 default:
1983 err = -EINVAL;
1984 }
1985 rtnl_unlock();
1986
1987 return err;
1988 }
1989
1990 return -EINVAL;
1991 }
1992
1993 /*
1994 * Drop the packet on the floor
1995 */
1996
1997 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
1998 {
1999 int type;
2000 struct dst_entry *dst = skb_dst(skb);
2001 switch (ipstats_mib_noroutes) {
2002 case IPSTATS_MIB_INNOROUTES:
2003 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2004 if (type == IPV6_ADDR_ANY) {
2005 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2006 IPSTATS_MIB_INADDRERRORS);
2007 break;
2008 }
2009 /* FALLTHROUGH */
2010 case IPSTATS_MIB_OUTNOROUTES:
2011 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2012 ipstats_mib_noroutes);
2013 break;
2014 }
2015 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2016 kfree_skb(skb);
2017 return 0;
2018 }
2019
2020 static int ip6_pkt_discard(struct sk_buff *skb)
2021 {
2022 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2023 }
2024
2025 static int ip6_pkt_discard_out(struct sk_buff *skb)
2026 {
2027 skb->dev = skb_dst(skb)->dev;
2028 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2029 }
2030
2031 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2032
2033 static int ip6_pkt_prohibit(struct sk_buff *skb)
2034 {
2035 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2036 }
2037
2038 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2039 {
2040 skb->dev = skb_dst(skb)->dev;
2041 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2042 }
2043
2044 #endif
2045
2046 /*
2047 * Allocate a dst for local (unicast / anycast) address.
2048 */
2049
2050 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2051 const struct in6_addr *addr,
2052 bool anycast)
2053 {
2054 struct net *net = dev_net(idev->dev);
2055 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2056 int err;
2057
2058 if (!rt) {
2059 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2060 return ERR_PTR(-ENOMEM);
2061 }
2062
2063 in6_dev_hold(idev);
2064
2065 rt->dst.flags |= DST_HOST;
2066 rt->dst.input = ip6_input;
2067 rt->dst.output = ip6_output;
2068 rt->rt6i_idev = idev;
2069 rt->dst.obsolete = -1;
2070
2071 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2072 if (anycast)
2073 rt->rt6i_flags |= RTF_ANYCAST;
2074 else
2075 rt->rt6i_flags |= RTF_LOCAL;
2076 err = rt6_bind_neighbour(rt, rt->dst.dev);
2077 if (err) {
2078 dst_free(&rt->dst);
2079 return ERR_PTR(err);
2080 }
2081
2082 rt->rt6i_dst.addr = *addr;
2083 rt->rt6i_dst.plen = 128;
2084 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2085
2086 atomic_set(&rt->dst.__refcnt, 1);
2087
2088 return rt;
2089 }
2090
2091 int ip6_route_get_saddr(struct net *net,
2092 struct rt6_info *rt,
2093 const struct in6_addr *daddr,
2094 unsigned int prefs,
2095 struct in6_addr *saddr)
2096 {
2097 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2098 int err = 0;
2099 if (rt->rt6i_prefsrc.plen)
2100 *saddr = rt->rt6i_prefsrc.addr;
2101 else
2102 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2103 daddr, prefs, saddr);
2104 return err;
2105 }
2106
2107 /* remove deleted ip from prefsrc entries */
2108 struct arg_dev_net_ip {
2109 struct net_device *dev;
2110 struct net *net;
2111 struct in6_addr *addr;
2112 };
2113
2114 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2115 {
2116 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2117 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2118 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2119
2120 if (((void *)rt->dst.dev == dev || !dev) &&
2121 rt != net->ipv6.ip6_null_entry &&
2122 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2123 /* remove prefsrc entry */
2124 rt->rt6i_prefsrc.plen = 0;
2125 }
2126 return 0;
2127 }
2128
2129 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2130 {
2131 struct net *net = dev_net(ifp->idev->dev);
2132 struct arg_dev_net_ip adni = {
2133 .dev = ifp->idev->dev,
2134 .net = net,
2135 .addr = &ifp->addr,
2136 };
2137 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2138 }
2139
2140 struct arg_dev_net {
2141 struct net_device *dev;
2142 struct net *net;
2143 };
2144
2145 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2146 {
2147 const struct arg_dev_net *adn = arg;
2148 const struct net_device *dev = adn->dev;
2149
2150 if ((rt->dst.dev == dev || !dev) &&
2151 rt != adn->net->ipv6.ip6_null_entry)
2152 return -1;
2153
2154 return 0;
2155 }
2156
2157 void rt6_ifdown(struct net *net, struct net_device *dev)
2158 {
2159 struct arg_dev_net adn = {
2160 .dev = dev,
2161 .net = net,
2162 };
2163
2164 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2165 icmp6_clean_all(fib6_ifdown, &adn);
2166 }
2167
2168 struct rt6_mtu_change_arg {
2169 struct net_device *dev;
2170 unsigned int mtu;
2171 };
2172
2173 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2174 {
2175 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2176 struct inet6_dev *idev;
2177
2178 /* In IPv6 pmtu discovery is not optional,
2179 so that RTAX_MTU lock cannot disable it.
2180 We still use this lock to block changes
2181 caused by addrconf/ndisc.
2182 */
2183
2184 idev = __in6_dev_get(arg->dev);
2185 if (!idev)
2186 return 0;
2187
2188 /* For administrative MTU increase, there is no way to discover
2189 IPv6 PMTU increase, so PMTU increase should be updated here.
2190 Since RFC 1981 doesn't include administrative MTU increase
2191 update PMTU increase is a MUST. (i.e. jumbo frame)
2192 */
2193 /*
2194 If new MTU is less than route PMTU, this new MTU will be the
2195 lowest MTU in the path, update the route PMTU to reflect PMTU
2196 decreases; if new MTU is greater than route PMTU, and the
2197 old MTU is the lowest MTU in the path, update the route PMTU
2198 to reflect the increase. In this case if the other nodes' MTU
2199 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2200 PMTU discouvery.
2201 */
2202 if (rt->dst.dev == arg->dev &&
2203 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2204 (dst_mtu(&rt->dst) >= arg->mtu ||
2205 (dst_mtu(&rt->dst) < arg->mtu &&
2206 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2207 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2208 }
2209 return 0;
2210 }
2211
2212 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2213 {
2214 struct rt6_mtu_change_arg arg = {
2215 .dev = dev,
2216 .mtu = mtu,
2217 };
2218
2219 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2220 }
2221
2222 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2223 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2224 [RTA_OIF] = { .type = NLA_U32 },
2225 [RTA_IIF] = { .type = NLA_U32 },
2226 [RTA_PRIORITY] = { .type = NLA_U32 },
2227 [RTA_METRICS] = { .type = NLA_NESTED },
2228 };
2229
2230 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2231 struct fib6_config *cfg)
2232 {
2233 struct rtmsg *rtm;
2234 struct nlattr *tb[RTA_MAX+1];
2235 int err;
2236
2237 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2238 if (err < 0)
2239 goto errout;
2240
2241 err = -EINVAL;
2242 rtm = nlmsg_data(nlh);
2243 memset(cfg, 0, sizeof(*cfg));
2244
2245 cfg->fc_table = rtm->rtm_table;
2246 cfg->fc_dst_len = rtm->rtm_dst_len;
2247 cfg->fc_src_len = rtm->rtm_src_len;
2248 cfg->fc_flags = RTF_UP;
2249 cfg->fc_protocol = rtm->rtm_protocol;
2250
2251 if (rtm->rtm_type == RTN_UNREACHABLE)
2252 cfg->fc_flags |= RTF_REJECT;
2253
2254 if (rtm->rtm_type == RTN_LOCAL)
2255 cfg->fc_flags |= RTF_LOCAL;
2256
2257 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2258 cfg->fc_nlinfo.nlh = nlh;
2259 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2260
2261 if (tb[RTA_GATEWAY]) {
2262 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2263 cfg->fc_flags |= RTF_GATEWAY;
2264 }
2265
2266 if (tb[RTA_DST]) {
2267 int plen = (rtm->rtm_dst_len + 7) >> 3;
2268
2269 if (nla_len(tb[RTA_DST]) < plen)
2270 goto errout;
2271
2272 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2273 }
2274
2275 if (tb[RTA_SRC]) {
2276 int plen = (rtm->rtm_src_len + 7) >> 3;
2277
2278 if (nla_len(tb[RTA_SRC]) < plen)
2279 goto errout;
2280
2281 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2282 }
2283
2284 if (tb[RTA_PREFSRC])
2285 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2286
2287 if (tb[RTA_OIF])
2288 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2289
2290 if (tb[RTA_PRIORITY])
2291 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2292
2293 if (tb[RTA_METRICS]) {
2294 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2295 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2296 }
2297
2298 if (tb[RTA_TABLE])
2299 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2300
2301 err = 0;
2302 errout:
2303 return err;
2304 }
2305
2306 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2307 {
2308 struct fib6_config cfg;
2309 int err;
2310
2311 err = rtm_to_fib6_config(skb, nlh, &cfg);
2312 if (err < 0)
2313 return err;
2314
2315 return ip6_route_del(&cfg);
2316 }
2317
2318 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2319 {
2320 struct fib6_config cfg;
2321 int err;
2322
2323 err = rtm_to_fib6_config(skb, nlh, &cfg);
2324 if (err < 0)
2325 return err;
2326
2327 return ip6_route_add(&cfg);
2328 }
2329
2330 static inline size_t rt6_nlmsg_size(void)
2331 {
2332 return NLMSG_ALIGN(sizeof(struct rtmsg))
2333 + nla_total_size(16) /* RTA_SRC */
2334 + nla_total_size(16) /* RTA_DST */
2335 + nla_total_size(16) /* RTA_GATEWAY */
2336 + nla_total_size(16) /* RTA_PREFSRC */
2337 + nla_total_size(4) /* RTA_TABLE */
2338 + nla_total_size(4) /* RTA_IIF */
2339 + nla_total_size(4) /* RTA_OIF */
2340 + nla_total_size(4) /* RTA_PRIORITY */
2341 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2342 + nla_total_size(sizeof(struct rta_cacheinfo));
2343 }
2344
2345 static int rt6_fill_node(struct net *net,
2346 struct sk_buff *skb, struct rt6_info *rt,
2347 struct in6_addr *dst, struct in6_addr *src,
2348 int iif, int type, u32 pid, u32 seq,
2349 int prefix, int nowait, unsigned int flags)
2350 {
2351 struct rtmsg *rtm;
2352 struct nlmsghdr *nlh;
2353 long expires;
2354 u32 table;
2355 struct neighbour *n;
2356
2357 if (prefix) { /* user wants prefix routes only */
2358 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2359 /* success since this is not a prefix route */
2360 return 1;
2361 }
2362 }
2363
2364 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2365 if (!nlh)
2366 return -EMSGSIZE;
2367
2368 rtm = nlmsg_data(nlh);
2369 rtm->rtm_family = AF_INET6;
2370 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2371 rtm->rtm_src_len = rt->rt6i_src.plen;
2372 rtm->rtm_tos = 0;
2373 if (rt->rt6i_table)
2374 table = rt->rt6i_table->tb6_id;
2375 else
2376 table = RT6_TABLE_UNSPEC;
2377 rtm->rtm_table = table;
2378 if (nla_put_u32(skb, RTA_TABLE, table))
2379 goto nla_put_failure;
2380 if (rt->rt6i_flags & RTF_REJECT)
2381 rtm->rtm_type = RTN_UNREACHABLE;
2382 else if (rt->rt6i_flags & RTF_LOCAL)
2383 rtm->rtm_type = RTN_LOCAL;
2384 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2385 rtm->rtm_type = RTN_LOCAL;
2386 else
2387 rtm->rtm_type = RTN_UNICAST;
2388 rtm->rtm_flags = 0;
2389 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2390 rtm->rtm_protocol = rt->rt6i_protocol;
2391 if (rt->rt6i_flags & RTF_DYNAMIC)
2392 rtm->rtm_protocol = RTPROT_REDIRECT;
2393 else if (rt->rt6i_flags & RTF_ADDRCONF)
2394 rtm->rtm_protocol = RTPROT_KERNEL;
2395 else if (rt->rt6i_flags & RTF_DEFAULT)
2396 rtm->rtm_protocol = RTPROT_RA;
2397
2398 if (rt->rt6i_flags & RTF_CACHE)
2399 rtm->rtm_flags |= RTM_F_CLONED;
2400
2401 if (dst) {
2402 if (nla_put(skb, RTA_DST, 16, dst))
2403 goto nla_put_failure;
2404 rtm->rtm_dst_len = 128;
2405 } else if (rtm->rtm_dst_len)
2406 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2407 goto nla_put_failure;
2408 #ifdef CONFIG_IPV6_SUBTREES
2409 if (src) {
2410 if (nla_put(skb, RTA_SRC, 16, src))
2411 goto nla_put_failure;
2412 rtm->rtm_src_len = 128;
2413 } else if (rtm->rtm_src_len &&
2414 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2415 goto nla_put_failure;
2416 #endif
2417 if (iif) {
2418 #ifdef CONFIG_IPV6_MROUTE
2419 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2420 int err = ip6mr_get_route(net, skb, rtm, nowait);
2421 if (err <= 0) {
2422 if (!nowait) {
2423 if (err == 0)
2424 return 0;
2425 goto nla_put_failure;
2426 } else {
2427 if (err == -EMSGSIZE)
2428 goto nla_put_failure;
2429 }
2430 }
2431 } else
2432 #endif
2433 if (nla_put_u32(skb, RTA_IIF, iif))
2434 goto nla_put_failure;
2435 } else if (dst) {
2436 struct in6_addr saddr_buf;
2437 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2438 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2439 goto nla_put_failure;
2440 }
2441
2442 if (rt->rt6i_prefsrc.plen) {
2443 struct in6_addr saddr_buf;
2444 saddr_buf = rt->rt6i_prefsrc.addr;
2445 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2446 goto nla_put_failure;
2447 }
2448
2449 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2450 goto nla_put_failure;
2451
2452 rcu_read_lock();
2453 n = rt->n;
2454 if (n) {
2455 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
2456 rcu_read_unlock();
2457 goto nla_put_failure;
2458 }
2459 }
2460 rcu_read_unlock();
2461
2462 if (rt->dst.dev &&
2463 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2464 goto nla_put_failure;
2465 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2466 goto nla_put_failure;
2467 if (!(rt->rt6i_flags & RTF_EXPIRES))
2468 expires = 0;
2469 else if (rt->dst.expires - jiffies < INT_MAX)
2470 expires = rt->dst.expires - jiffies;
2471 else
2472 expires = INT_MAX;
2473
2474 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2475 goto nla_put_failure;
2476
2477 return nlmsg_end(skb, nlh);
2478
2479 nla_put_failure:
2480 nlmsg_cancel(skb, nlh);
2481 return -EMSGSIZE;
2482 }
2483
2484 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2485 {
2486 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2487 int prefix;
2488
2489 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2490 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2491 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2492 } else
2493 prefix = 0;
2494
2495 return rt6_fill_node(arg->net,
2496 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2497 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2498 prefix, 0, NLM_F_MULTI);
2499 }
2500
2501 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2502 {
2503 struct net *net = sock_net(in_skb->sk);
2504 struct nlattr *tb[RTA_MAX+1];
2505 struct rt6_info *rt;
2506 struct sk_buff *skb;
2507 struct rtmsg *rtm;
2508 struct flowi6 fl6;
2509 int err, iif = 0, oif = 0;
2510
2511 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2512 if (err < 0)
2513 goto errout;
2514
2515 err = -EINVAL;
2516 memset(&fl6, 0, sizeof(fl6));
2517
2518 if (tb[RTA_SRC]) {
2519 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2520 goto errout;
2521
2522 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2523 }
2524
2525 if (tb[RTA_DST]) {
2526 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2527 goto errout;
2528
2529 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2530 }
2531
2532 if (tb[RTA_IIF])
2533 iif = nla_get_u32(tb[RTA_IIF]);
2534
2535 if (tb[RTA_OIF])
2536 oif = nla_get_u32(tb[RTA_OIF]);
2537
2538 if (iif) {
2539 struct net_device *dev;
2540 int flags = 0;
2541
2542 dev = __dev_get_by_index(net, iif);
2543 if (!dev) {
2544 err = -ENODEV;
2545 goto errout;
2546 }
2547
2548 fl6.flowi6_iif = iif;
2549
2550 if (!ipv6_addr_any(&fl6.saddr))
2551 flags |= RT6_LOOKUP_F_HAS_SADDR;
2552
2553 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2554 flags);
2555 } else {
2556 fl6.flowi6_oif = oif;
2557
2558 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2559 }
2560
2561 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2562 if (!skb) {
2563 dst_release(&rt->dst);
2564 err = -ENOBUFS;
2565 goto errout;
2566 }
2567
2568 /* Reserve room for dummy headers, this skb can pass
2569 through good chunk of routing engine.
2570 */
2571 skb_reset_mac_header(skb);
2572 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2573
2574 skb_dst_set(skb, &rt->dst);
2575
2576 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2577 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2578 nlh->nlmsg_seq, 0, 0, 0);
2579 if (err < 0) {
2580 kfree_skb(skb);
2581 goto errout;
2582 }
2583
2584 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2585 errout:
2586 return err;
2587 }
2588
2589 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2590 {
2591 struct sk_buff *skb;
2592 struct net *net = info->nl_net;
2593 u32 seq;
2594 int err;
2595
2596 err = -ENOBUFS;
2597 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2598
2599 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2600 if (!skb)
2601 goto errout;
2602
2603 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2604 event, info->pid, seq, 0, 0, 0);
2605 if (err < 0) {
2606 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2607 WARN_ON(err == -EMSGSIZE);
2608 kfree_skb(skb);
2609 goto errout;
2610 }
2611 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2612 info->nlh, gfp_any());
2613 return;
2614 errout:
2615 if (err < 0)
2616 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2617 }
2618
2619 static int ip6_route_dev_notify(struct notifier_block *this,
2620 unsigned long event, void *data)
2621 {
2622 struct net_device *dev = (struct net_device *)data;
2623 struct net *net = dev_net(dev);
2624
2625 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2626 net->ipv6.ip6_null_entry->dst.dev = dev;
2627 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2628 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2629 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2630 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2631 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2632 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2633 #endif
2634 }
2635
2636 return NOTIFY_OK;
2637 }
2638
2639 /*
2640 * /proc
2641 */
2642
2643 #ifdef CONFIG_PROC_FS
2644
2645 struct rt6_proc_arg
2646 {
2647 char *buffer;
2648 int offset;
2649 int length;
2650 int skip;
2651 int len;
2652 };
2653
2654 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2655 {
2656 struct seq_file *m = p_arg;
2657 struct neighbour *n;
2658
2659 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2660
2661 #ifdef CONFIG_IPV6_SUBTREES
2662 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2663 #else
2664 seq_puts(m, "00000000000000000000000000000000 00 ");
2665 #endif
2666 rcu_read_lock();
2667 n = rt->n;
2668 if (n) {
2669 seq_printf(m, "%pi6", n->primary_key);
2670 } else {
2671 seq_puts(m, "00000000000000000000000000000000");
2672 }
2673 rcu_read_unlock();
2674 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2675 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2676 rt->dst.__use, rt->rt6i_flags,
2677 rt->dst.dev ? rt->dst.dev->name : "");
2678 return 0;
2679 }
2680
2681 static int ipv6_route_show(struct seq_file *m, void *v)
2682 {
2683 struct net *net = (struct net *)m->private;
2684 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2685 return 0;
2686 }
2687
2688 static int ipv6_route_open(struct inode *inode, struct file *file)
2689 {
2690 return single_open_net(inode, file, ipv6_route_show);
2691 }
2692
2693 static const struct file_operations ipv6_route_proc_fops = {
2694 .owner = THIS_MODULE,
2695 .open = ipv6_route_open,
2696 .read = seq_read,
2697 .llseek = seq_lseek,
2698 .release = single_release_net,
2699 };
2700
2701 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2702 {
2703 struct net *net = (struct net *)seq->private;
2704 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2705 net->ipv6.rt6_stats->fib_nodes,
2706 net->ipv6.rt6_stats->fib_route_nodes,
2707 net->ipv6.rt6_stats->fib_rt_alloc,
2708 net->ipv6.rt6_stats->fib_rt_entries,
2709 net->ipv6.rt6_stats->fib_rt_cache,
2710 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2711 net->ipv6.rt6_stats->fib_discarded_routes);
2712
2713 return 0;
2714 }
2715
2716 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2717 {
2718 return single_open_net(inode, file, rt6_stats_seq_show);
2719 }
2720
2721 static const struct file_operations rt6_stats_seq_fops = {
2722 .owner = THIS_MODULE,
2723 .open = rt6_stats_seq_open,
2724 .read = seq_read,
2725 .llseek = seq_lseek,
2726 .release = single_release_net,
2727 };
2728 #endif /* CONFIG_PROC_FS */
2729
2730 #ifdef CONFIG_SYSCTL
2731
2732 static
2733 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2734 void __user *buffer, size_t *lenp, loff_t *ppos)
2735 {
2736 struct net *net;
2737 int delay;
2738 if (!write)
2739 return -EINVAL;
2740
2741 net = (struct net *)ctl->extra1;
2742 delay = net->ipv6.sysctl.flush_delay;
2743 proc_dointvec(ctl, write, buffer, lenp, ppos);
2744 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2745 return 0;
2746 }
2747
2748 ctl_table ipv6_route_table_template[] = {
2749 {
2750 .procname = "flush",
2751 .data = &init_net.ipv6.sysctl.flush_delay,
2752 .maxlen = sizeof(int),
2753 .mode = 0200,
2754 .proc_handler = ipv6_sysctl_rtcache_flush
2755 },
2756 {
2757 .procname = "gc_thresh",
2758 .data = &ip6_dst_ops_template.gc_thresh,
2759 .maxlen = sizeof(int),
2760 .mode = 0644,
2761 .proc_handler = proc_dointvec,
2762 },
2763 {
2764 .procname = "max_size",
2765 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2766 .maxlen = sizeof(int),
2767 .mode = 0644,
2768 .proc_handler = proc_dointvec,
2769 },
2770 {
2771 .procname = "gc_min_interval",
2772 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2773 .maxlen = sizeof(int),
2774 .mode = 0644,
2775 .proc_handler = proc_dointvec_jiffies,
2776 },
2777 {
2778 .procname = "gc_timeout",
2779 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2780 .maxlen = sizeof(int),
2781 .mode = 0644,
2782 .proc_handler = proc_dointvec_jiffies,
2783 },
2784 {
2785 .procname = "gc_interval",
2786 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2787 .maxlen = sizeof(int),
2788 .mode = 0644,
2789 .proc_handler = proc_dointvec_jiffies,
2790 },
2791 {
2792 .procname = "gc_elasticity",
2793 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2794 .maxlen = sizeof(int),
2795 .mode = 0644,
2796 .proc_handler = proc_dointvec,
2797 },
2798 {
2799 .procname = "mtu_expires",
2800 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2801 .maxlen = sizeof(int),
2802 .mode = 0644,
2803 .proc_handler = proc_dointvec_jiffies,
2804 },
2805 {
2806 .procname = "min_adv_mss",
2807 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2808 .maxlen = sizeof(int),
2809 .mode = 0644,
2810 .proc_handler = proc_dointvec,
2811 },
2812 {
2813 .procname = "gc_min_interval_ms",
2814 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2815 .maxlen = sizeof(int),
2816 .mode = 0644,
2817 .proc_handler = proc_dointvec_ms_jiffies,
2818 },
2819 { }
2820 };
2821
2822 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2823 {
2824 struct ctl_table *table;
2825
2826 table = kmemdup(ipv6_route_table_template,
2827 sizeof(ipv6_route_table_template),
2828 GFP_KERNEL);
2829
2830 if (table) {
2831 table[0].data = &net->ipv6.sysctl.flush_delay;
2832 table[0].extra1 = net;
2833 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2834 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2835 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2836 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2837 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2838 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2839 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2840 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2841 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2842 }
2843
2844 return table;
2845 }
2846 #endif
2847
2848 static int __net_init ip6_route_net_init(struct net *net)
2849 {
2850 int ret = -ENOMEM;
2851
2852 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2853 sizeof(net->ipv6.ip6_dst_ops));
2854
2855 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2856 goto out_ip6_dst_ops;
2857
2858 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2859 sizeof(*net->ipv6.ip6_null_entry),
2860 GFP_KERNEL);
2861 if (!net->ipv6.ip6_null_entry)
2862 goto out_ip6_dst_entries;
2863 net->ipv6.ip6_null_entry->dst.path =
2864 (struct dst_entry *)net->ipv6.ip6_null_entry;
2865 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2866 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2867 ip6_template_metrics, true);
2868
2869 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2870 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2871 sizeof(*net->ipv6.ip6_prohibit_entry),
2872 GFP_KERNEL);
2873 if (!net->ipv6.ip6_prohibit_entry)
2874 goto out_ip6_null_entry;
2875 net->ipv6.ip6_prohibit_entry->dst.path =
2876 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2877 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2878 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2879 ip6_template_metrics, true);
2880
2881 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2882 sizeof(*net->ipv6.ip6_blk_hole_entry),
2883 GFP_KERNEL);
2884 if (!net->ipv6.ip6_blk_hole_entry)
2885 goto out_ip6_prohibit_entry;
2886 net->ipv6.ip6_blk_hole_entry->dst.path =
2887 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2888 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2889 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2890 ip6_template_metrics, true);
2891 #endif
2892
2893 net->ipv6.sysctl.flush_delay = 0;
2894 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2895 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2896 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2897 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2898 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2899 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2900 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2901
2902 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2903
2904 ret = 0;
2905 out:
2906 return ret;
2907
2908 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2909 out_ip6_prohibit_entry:
2910 kfree(net->ipv6.ip6_prohibit_entry);
2911 out_ip6_null_entry:
2912 kfree(net->ipv6.ip6_null_entry);
2913 #endif
2914 out_ip6_dst_entries:
2915 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2916 out_ip6_dst_ops:
2917 goto out;
2918 }
2919
2920 static void __net_exit ip6_route_net_exit(struct net *net)
2921 {
2922 kfree(net->ipv6.ip6_null_entry);
2923 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2924 kfree(net->ipv6.ip6_prohibit_entry);
2925 kfree(net->ipv6.ip6_blk_hole_entry);
2926 #endif
2927 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2928 }
2929
2930 static int __net_init ip6_route_net_init_late(struct net *net)
2931 {
2932 #ifdef CONFIG_PROC_FS
2933 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2934 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2935 #endif
2936 return 0;
2937 }
2938
2939 static void __net_exit ip6_route_net_exit_late(struct net *net)
2940 {
2941 #ifdef CONFIG_PROC_FS
2942 proc_net_remove(net, "ipv6_route");
2943 proc_net_remove(net, "rt6_stats");
2944 #endif
2945 }
2946
2947 static struct pernet_operations ip6_route_net_ops = {
2948 .init = ip6_route_net_init,
2949 .exit = ip6_route_net_exit,
2950 };
2951
2952 static int __net_init ipv6_inetpeer_init(struct net *net)
2953 {
2954 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2955
2956 if (!bp)
2957 return -ENOMEM;
2958 inet_peer_base_init(bp);
2959 net->ipv6.peers = bp;
2960 return 0;
2961 }
2962
2963 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2964 {
2965 struct inet_peer_base *bp = net->ipv6.peers;
2966
2967 net->ipv6.peers = NULL;
2968 inetpeer_invalidate_tree(bp);
2969 kfree(bp);
2970 }
2971
2972 static struct pernet_operations ipv6_inetpeer_ops = {
2973 .init = ipv6_inetpeer_init,
2974 .exit = ipv6_inetpeer_exit,
2975 };
2976
2977 static struct pernet_operations ip6_route_net_late_ops = {
2978 .init = ip6_route_net_init_late,
2979 .exit = ip6_route_net_exit_late,
2980 };
2981
2982 static struct notifier_block ip6_route_dev_notifier = {
2983 .notifier_call = ip6_route_dev_notify,
2984 .priority = 0,
2985 };
2986
2987 int __init ip6_route_init(void)
2988 {
2989 int ret;
2990
2991 ret = -ENOMEM;
2992 ip6_dst_ops_template.kmem_cachep =
2993 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2994 SLAB_HWCACHE_ALIGN, NULL);
2995 if (!ip6_dst_ops_template.kmem_cachep)
2996 goto out;
2997
2998 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2999 if (ret)
3000 goto out_kmem_cache;
3001
3002 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3003 if (ret)
3004 goto out_dst_entries;
3005
3006 ret = register_pernet_subsys(&ip6_route_net_ops);
3007 if (ret)
3008 goto out_register_inetpeer;
3009
3010 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3011
3012 /* Registering of the loopback is done before this portion of code,
3013 * the loopback reference in rt6_info will not be taken, do it
3014 * manually for init_net */
3015 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3016 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3017 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3018 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3019 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3020 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3021 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3022 #endif
3023 ret = fib6_init();
3024 if (ret)
3025 goto out_register_subsys;
3026
3027 ret = xfrm6_init();
3028 if (ret)
3029 goto out_fib6_init;
3030
3031 ret = fib6_rules_init();
3032 if (ret)
3033 goto xfrm6_init;
3034
3035 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3036 if (ret)
3037 goto fib6_rules_init;
3038
3039 ret = -ENOBUFS;
3040 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3041 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3042 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3043 goto out_register_late_subsys;
3044
3045 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3046 if (ret)
3047 goto out_register_late_subsys;
3048
3049 out:
3050 return ret;
3051
3052 out_register_late_subsys:
3053 unregister_pernet_subsys(&ip6_route_net_late_ops);
3054 fib6_rules_init:
3055 fib6_rules_cleanup();
3056 xfrm6_init:
3057 xfrm6_fini();
3058 out_fib6_init:
3059 fib6_gc_cleanup();
3060 out_register_subsys:
3061 unregister_pernet_subsys(&ip6_route_net_ops);
3062 out_register_inetpeer:
3063 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3064 out_dst_entries:
3065 dst_entries_destroy(&ip6_dst_blackhole_ops);
3066 out_kmem_cache:
3067 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3068 goto out;
3069 }
3070
3071 void ip6_route_cleanup(void)
3072 {
3073 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3074 unregister_pernet_subsys(&ip6_route_net_late_ops);
3075 fib6_rules_cleanup();
3076 xfrm6_fini();
3077 fib6_gc_cleanup();
3078 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3079 unregister_pernet_subsys(&ip6_route_net_ops);
3080 dst_entries_destroy(&ip6_dst_blackhole_ops);
3081 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3082 }