]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv6/route.c
ipv6: replace write lock with read lock when get route info
[mirror_ubuntu-artful-kernel.git] / net / ipv6 / route.c
1 /*
2 * Linux INET6 implementation
3 * FIB front-end.
4 *
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 */
13
14 /* Changes:
15 *
16 * YOSHIFUJI Hideaki @USAGI
17 * reworked default router selection.
18 * - respect outgoing interface
19 * - select from (probably) reachable routers (i.e.
20 * routers in REACHABLE, STALE, DELAY or PROBE states).
21 * - always select the same router if it is (probably)
22 * reachable. otherwise, round-robin the list.
23 * Ville Nuorvala
24 * Fixed routing subtrees.
25 */
26
27 #define pr_fmt(fmt) "IPv6: " fmt
28
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/export.h>
32 #include <linux/types.h>
33 #include <linux/times.h>
34 #include <linux/socket.h>
35 #include <linux/sockios.h>
36 #include <linux/net.h>
37 #include <linux/route.h>
38 #include <linux/netdevice.h>
39 #include <linux/in6.h>
40 #include <linux/mroute6.h>
41 #include <linux/init.h>
42 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #include <linux/nsproxy.h>
46 #include <linux/slab.h>
47 #include <net/net_namespace.h>
48 #include <net/snmp.h>
49 #include <net/ipv6.h>
50 #include <net/ip6_fib.h>
51 #include <net/ip6_route.h>
52 #include <net/ndisc.h>
53 #include <net/addrconf.h>
54 #include <net/tcp.h>
55 #include <linux/rtnetlink.h>
56 #include <net/dst.h>
57 #include <net/xfrm.h>
58 #include <net/netevent.h>
59 #include <net/netlink.h>
60
61 #include <asm/uaccess.h>
62
63 #ifdef CONFIG_SYSCTL
64 #include <linux/sysctl.h>
65 #endif
66
67 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
68 const struct in6_addr *dest);
69 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
70 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
71 static unsigned int ip6_mtu(const struct dst_entry *dst);
72 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
73 static void ip6_dst_destroy(struct dst_entry *);
74 static void ip6_dst_ifdown(struct dst_entry *,
75 struct net_device *dev, int how);
76 static int ip6_dst_gc(struct dst_ops *ops);
77
78 static int ip6_pkt_discard(struct sk_buff *skb);
79 static int ip6_pkt_discard_out(struct sk_buff *skb);
80 static void ip6_link_failure(struct sk_buff *skb);
81 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
82 struct sk_buff *skb, u32 mtu);
83 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
84 struct sk_buff *skb);
85
86 #ifdef CONFIG_IPV6_ROUTE_INFO
87 static struct rt6_info *rt6_add_route_info(struct net *net,
88 const struct in6_addr *prefix, int prefixlen,
89 const struct in6_addr *gwaddr, int ifindex,
90 unsigned int pref);
91 static struct rt6_info *rt6_get_route_info(struct net *net,
92 const struct in6_addr *prefix, int prefixlen,
93 const struct in6_addr *gwaddr, int ifindex);
94 #endif
95
96 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
97 {
98 struct rt6_info *rt = (struct rt6_info *) dst;
99 struct inet_peer *peer;
100 u32 *p = NULL;
101
102 if (!(rt->dst.flags & DST_HOST))
103 return NULL;
104
105 peer = rt6_get_peer_create(rt);
106 if (peer) {
107 u32 *old_p = __DST_METRICS_PTR(old);
108 unsigned long prev, new;
109
110 p = peer->metrics;
111 if (inet_metrics_new(peer))
112 memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
113
114 new = (unsigned long) p;
115 prev = cmpxchg(&dst->_metrics, old, new);
116
117 if (prev != old) {
118 p = __DST_METRICS_PTR(prev);
119 if (prev & DST_METRICS_READ_ONLY)
120 p = NULL;
121 }
122 }
123 return p;
124 }
125
126 static inline const void *choose_neigh_daddr(struct rt6_info *rt,
127 struct sk_buff *skb,
128 const void *daddr)
129 {
130 struct in6_addr *p = &rt->rt6i_gateway;
131
132 if (!ipv6_addr_any(p))
133 return (const void *) p;
134 else if (skb)
135 return &ipv6_hdr(skb)->daddr;
136 return daddr;
137 }
138
139 static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
140 struct sk_buff *skb,
141 const void *daddr)
142 {
143 struct rt6_info *rt = (struct rt6_info *) dst;
144 struct neighbour *n;
145
146 daddr = choose_neigh_daddr(rt, skb, daddr);
147 n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
148 if (n)
149 return n;
150 return neigh_create(&nd_tbl, daddr, dst->dev);
151 }
152
153 static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
154 {
155 struct neighbour *n = __ipv6_neigh_lookup(&nd_tbl, dev, &rt->rt6i_gateway);
156 if (!n) {
157 n = neigh_create(&nd_tbl, &rt->rt6i_gateway, dev);
158 if (IS_ERR(n))
159 return PTR_ERR(n);
160 }
161 rt->n = n;
162
163 return 0;
164 }
165
166 static struct dst_ops ip6_dst_ops_template = {
167 .family = AF_INET6,
168 .protocol = cpu_to_be16(ETH_P_IPV6),
169 .gc = ip6_dst_gc,
170 .gc_thresh = 1024,
171 .check = ip6_dst_check,
172 .default_advmss = ip6_default_advmss,
173 .mtu = ip6_mtu,
174 .cow_metrics = ipv6_cow_metrics,
175 .destroy = ip6_dst_destroy,
176 .ifdown = ip6_dst_ifdown,
177 .negative_advice = ip6_negative_advice,
178 .link_failure = ip6_link_failure,
179 .update_pmtu = ip6_rt_update_pmtu,
180 .redirect = rt6_do_redirect,
181 .local_out = __ip6_local_out,
182 .neigh_lookup = ip6_neigh_lookup,
183 };
184
185 static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
186 {
187 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
188
189 return mtu ? : dst->dev->mtu;
190 }
191
192 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
193 struct sk_buff *skb, u32 mtu)
194 {
195 }
196
197 static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
198 struct sk_buff *skb)
199 {
200 }
201
202 static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst,
203 unsigned long old)
204 {
205 return NULL;
206 }
207
208 static struct dst_ops ip6_dst_blackhole_ops = {
209 .family = AF_INET6,
210 .protocol = cpu_to_be16(ETH_P_IPV6),
211 .destroy = ip6_dst_destroy,
212 .check = ip6_dst_check,
213 .mtu = ip6_blackhole_mtu,
214 .default_advmss = ip6_default_advmss,
215 .update_pmtu = ip6_rt_blackhole_update_pmtu,
216 .redirect = ip6_rt_blackhole_redirect,
217 .cow_metrics = ip6_rt_blackhole_cow_metrics,
218 .neigh_lookup = ip6_neigh_lookup,
219 };
220
221 static const u32 ip6_template_metrics[RTAX_MAX] = {
222 [RTAX_HOPLIMIT - 1] = 255,
223 };
224
225 static const struct rt6_info ip6_null_entry_template = {
226 .dst = {
227 .__refcnt = ATOMIC_INIT(1),
228 .__use = 1,
229 .obsolete = -1,
230 .error = -ENETUNREACH,
231 .input = ip6_pkt_discard,
232 .output = ip6_pkt_discard_out,
233 },
234 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
235 .rt6i_protocol = RTPROT_KERNEL,
236 .rt6i_metric = ~(u32) 0,
237 .rt6i_ref = ATOMIC_INIT(1),
238 };
239
240 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
241
242 static int ip6_pkt_prohibit(struct sk_buff *skb);
243 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
244
245 static const struct rt6_info ip6_prohibit_entry_template = {
246 .dst = {
247 .__refcnt = ATOMIC_INIT(1),
248 .__use = 1,
249 .obsolete = -1,
250 .error = -EACCES,
251 .input = ip6_pkt_prohibit,
252 .output = ip6_pkt_prohibit_out,
253 },
254 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
255 .rt6i_protocol = RTPROT_KERNEL,
256 .rt6i_metric = ~(u32) 0,
257 .rt6i_ref = ATOMIC_INIT(1),
258 };
259
260 static const struct rt6_info ip6_blk_hole_entry_template = {
261 .dst = {
262 .__refcnt = ATOMIC_INIT(1),
263 .__use = 1,
264 .obsolete = -1,
265 .error = -EINVAL,
266 .input = dst_discard,
267 .output = dst_discard,
268 },
269 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
270 .rt6i_protocol = RTPROT_KERNEL,
271 .rt6i_metric = ~(u32) 0,
272 .rt6i_ref = ATOMIC_INIT(1),
273 };
274
275 #endif
276
277 /* allocate dst with ip6_dst_ops */
278 static inline struct rt6_info *ip6_dst_alloc(struct net *net,
279 struct net_device *dev,
280 int flags,
281 struct fib6_table *table)
282 {
283 struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
284 0, DST_OBSOLETE_NONE, flags);
285
286 if (rt) {
287 struct dst_entry *dst = &rt->dst;
288
289 memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
290 rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
291 }
292 return rt;
293 }
294
295 static void ip6_dst_destroy(struct dst_entry *dst)
296 {
297 struct rt6_info *rt = (struct rt6_info *)dst;
298 struct inet6_dev *idev = rt->rt6i_idev;
299
300 if (rt->n)
301 neigh_release(rt->n);
302
303 if (!(rt->dst.flags & DST_HOST))
304 dst_destroy_metrics_generic(dst);
305
306 if (idev) {
307 rt->rt6i_idev = NULL;
308 in6_dev_put(idev);
309 }
310
311 if (!(rt->rt6i_flags & RTF_EXPIRES) && dst->from)
312 dst_release(dst->from);
313
314 if (rt6_has_peer(rt)) {
315 struct inet_peer *peer = rt6_peer_ptr(rt);
316 inet_putpeer(peer);
317 }
318 }
319
320 static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
321
322 static u32 rt6_peer_genid(void)
323 {
324 return atomic_read(&__rt6_peer_genid);
325 }
326
327 void rt6_bind_peer(struct rt6_info *rt, int create)
328 {
329 struct inet_peer_base *base;
330 struct inet_peer *peer;
331
332 base = inetpeer_base_ptr(rt->_rt6i_peer);
333 if (!base)
334 return;
335
336 peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create);
337 if (peer) {
338 if (!rt6_set_peer(rt, peer))
339 inet_putpeer(peer);
340 else
341 rt->rt6i_peer_genid = rt6_peer_genid();
342 }
343 }
344
345 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
346 int how)
347 {
348 struct rt6_info *rt = (struct rt6_info *)dst;
349 struct inet6_dev *idev = rt->rt6i_idev;
350 struct net_device *loopback_dev =
351 dev_net(dev)->loopback_dev;
352
353 if (dev != loopback_dev) {
354 if (idev && idev->dev == dev) {
355 struct inet6_dev *loopback_idev =
356 in6_dev_get(loopback_dev);
357 if (loopback_idev) {
358 rt->rt6i_idev = loopback_idev;
359 in6_dev_put(idev);
360 }
361 }
362 if (rt->n && rt->n->dev == dev) {
363 rt->n->dev = loopback_dev;
364 dev_hold(loopback_dev);
365 dev_put(dev);
366 }
367 }
368 }
369
370 static bool rt6_check_expired(const struct rt6_info *rt)
371 {
372 struct rt6_info *ort = NULL;
373
374 if (rt->rt6i_flags & RTF_EXPIRES) {
375 if (time_after(jiffies, rt->dst.expires))
376 return true;
377 } else if (rt->dst.from) {
378 ort = (struct rt6_info *) rt->dst.from;
379 return (ort->rt6i_flags & RTF_EXPIRES) &&
380 time_after(jiffies, ort->dst.expires);
381 }
382 return false;
383 }
384
385 static bool rt6_need_strict(const struct in6_addr *daddr)
386 {
387 return ipv6_addr_type(daddr) &
388 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
389 }
390
391 /*
392 * Route lookup. Any table->tb6_lock is implied.
393 */
394
395 static inline struct rt6_info *rt6_device_match(struct net *net,
396 struct rt6_info *rt,
397 const struct in6_addr *saddr,
398 int oif,
399 int flags)
400 {
401 struct rt6_info *local = NULL;
402 struct rt6_info *sprt;
403
404 if (!oif && ipv6_addr_any(saddr))
405 goto out;
406
407 for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
408 struct net_device *dev = sprt->dst.dev;
409
410 if (oif) {
411 if (dev->ifindex == oif)
412 return sprt;
413 if (dev->flags & IFF_LOOPBACK) {
414 if (!sprt->rt6i_idev ||
415 sprt->rt6i_idev->dev->ifindex != oif) {
416 if (flags & RT6_LOOKUP_F_IFACE && oif)
417 continue;
418 if (local && (!oif ||
419 local->rt6i_idev->dev->ifindex == oif))
420 continue;
421 }
422 local = sprt;
423 }
424 } else {
425 if (ipv6_chk_addr(net, saddr, dev,
426 flags & RT6_LOOKUP_F_IFACE))
427 return sprt;
428 }
429 }
430
431 if (oif) {
432 if (local)
433 return local;
434
435 if (flags & RT6_LOOKUP_F_IFACE)
436 return net->ipv6.ip6_null_entry;
437 }
438 out:
439 return rt;
440 }
441
442 #ifdef CONFIG_IPV6_ROUTER_PREF
443 static void rt6_probe(struct rt6_info *rt)
444 {
445 struct neighbour *neigh;
446 /*
447 * Okay, this does not seem to be appropriate
448 * for now, however, we need to check if it
449 * is really so; aka Router Reachability Probing.
450 *
451 * Router Reachability Probe MUST be rate-limited
452 * to no more than one per minute.
453 */
454 neigh = rt ? rt->n : NULL;
455 if (!neigh || (neigh->nud_state & NUD_VALID))
456 return;
457 read_lock_bh(&neigh->lock);
458 if (!(neigh->nud_state & NUD_VALID) &&
459 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
460 struct in6_addr mcaddr;
461 struct in6_addr *target;
462
463 neigh->updated = jiffies;
464 read_unlock_bh(&neigh->lock);
465
466 target = (struct in6_addr *)&neigh->primary_key;
467 addrconf_addr_solict_mult(target, &mcaddr);
468 ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
469 } else {
470 read_unlock_bh(&neigh->lock);
471 }
472 }
473 #else
474 static inline void rt6_probe(struct rt6_info *rt)
475 {
476 }
477 #endif
478
479 /*
480 * Default Router Selection (RFC 2461 6.3.6)
481 */
482 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
483 {
484 struct net_device *dev = rt->dst.dev;
485 if (!oif || dev->ifindex == oif)
486 return 2;
487 if ((dev->flags & IFF_LOOPBACK) &&
488 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
489 return 1;
490 return 0;
491 }
492
493 static inline int rt6_check_neigh(struct rt6_info *rt)
494 {
495 struct neighbour *neigh;
496 int m;
497
498 neigh = rt->n;
499 if (rt->rt6i_flags & RTF_NONEXTHOP ||
500 !(rt->rt6i_flags & RTF_GATEWAY))
501 m = 1;
502 else if (neigh) {
503 read_lock_bh(&neigh->lock);
504 if (neigh->nud_state & NUD_VALID)
505 m = 2;
506 #ifdef CONFIG_IPV6_ROUTER_PREF
507 else if (neigh->nud_state & NUD_FAILED)
508 m = 0;
509 #endif
510 else
511 m = 1;
512 read_unlock_bh(&neigh->lock);
513 } else
514 m = 0;
515 return m;
516 }
517
518 static int rt6_score_route(struct rt6_info *rt, int oif,
519 int strict)
520 {
521 int m, n;
522
523 m = rt6_check_dev(rt, oif);
524 if (!m && (strict & RT6_LOOKUP_F_IFACE))
525 return -1;
526 #ifdef CONFIG_IPV6_ROUTER_PREF
527 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
528 #endif
529 n = rt6_check_neigh(rt);
530 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
531 return -1;
532 return m;
533 }
534
535 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
536 int *mpri, struct rt6_info *match)
537 {
538 int m;
539
540 if (rt6_check_expired(rt))
541 goto out;
542
543 m = rt6_score_route(rt, oif, strict);
544 if (m < 0)
545 goto out;
546
547 if (m > *mpri) {
548 if (strict & RT6_LOOKUP_F_REACHABLE)
549 rt6_probe(match);
550 *mpri = m;
551 match = rt;
552 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
553 rt6_probe(rt);
554 }
555
556 out:
557 return match;
558 }
559
560 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
561 struct rt6_info *rr_head,
562 u32 metric, int oif, int strict)
563 {
564 struct rt6_info *rt, *match;
565 int mpri = -1;
566
567 match = NULL;
568 for (rt = rr_head; rt && rt->rt6i_metric == metric;
569 rt = rt->dst.rt6_next)
570 match = find_match(rt, oif, strict, &mpri, match);
571 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
572 rt = rt->dst.rt6_next)
573 match = find_match(rt, oif, strict, &mpri, match);
574
575 return match;
576 }
577
578 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
579 {
580 struct rt6_info *match, *rt0;
581 struct net *net;
582
583 rt0 = fn->rr_ptr;
584 if (!rt0)
585 fn->rr_ptr = rt0 = fn->leaf;
586
587 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
588
589 if (!match &&
590 (strict & RT6_LOOKUP_F_REACHABLE)) {
591 struct rt6_info *next = rt0->dst.rt6_next;
592
593 /* no entries matched; do round-robin */
594 if (!next || next->rt6i_metric != rt0->rt6i_metric)
595 next = fn->leaf;
596
597 if (next != rt0)
598 fn->rr_ptr = next;
599 }
600
601 net = dev_net(rt0->dst.dev);
602 return match ? match : net->ipv6.ip6_null_entry;
603 }
604
605 #ifdef CONFIG_IPV6_ROUTE_INFO
606 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
607 const struct in6_addr *gwaddr)
608 {
609 struct net *net = dev_net(dev);
610 struct route_info *rinfo = (struct route_info *) opt;
611 struct in6_addr prefix_buf, *prefix;
612 unsigned int pref;
613 unsigned long lifetime;
614 struct rt6_info *rt;
615
616 if (len < sizeof(struct route_info)) {
617 return -EINVAL;
618 }
619
620 /* Sanity check for prefix_len and length */
621 if (rinfo->length > 3) {
622 return -EINVAL;
623 } else if (rinfo->prefix_len > 128) {
624 return -EINVAL;
625 } else if (rinfo->prefix_len > 64) {
626 if (rinfo->length < 2) {
627 return -EINVAL;
628 }
629 } else if (rinfo->prefix_len > 0) {
630 if (rinfo->length < 1) {
631 return -EINVAL;
632 }
633 }
634
635 pref = rinfo->route_pref;
636 if (pref == ICMPV6_ROUTER_PREF_INVALID)
637 return -EINVAL;
638
639 lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
640
641 if (rinfo->length == 3)
642 prefix = (struct in6_addr *)rinfo->prefix;
643 else {
644 /* this function is safe */
645 ipv6_addr_prefix(&prefix_buf,
646 (struct in6_addr *)rinfo->prefix,
647 rinfo->prefix_len);
648 prefix = &prefix_buf;
649 }
650
651 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
652 dev->ifindex);
653
654 if (rt && !lifetime) {
655 ip6_del_rt(rt);
656 rt = NULL;
657 }
658
659 if (!rt && lifetime)
660 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
661 pref);
662 else if (rt)
663 rt->rt6i_flags = RTF_ROUTEINFO |
664 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
665
666 if (rt) {
667 if (!addrconf_finite_timeout(lifetime))
668 rt6_clean_expires(rt);
669 else
670 rt6_set_expires(rt, jiffies + HZ * lifetime);
671
672 dst_release(&rt->dst);
673 }
674 return 0;
675 }
676 #endif
677
678 #define BACKTRACK(__net, saddr) \
679 do { \
680 if (rt == __net->ipv6.ip6_null_entry) { \
681 struct fib6_node *pn; \
682 while (1) { \
683 if (fn->fn_flags & RTN_TL_ROOT) \
684 goto out; \
685 pn = fn->parent; \
686 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
687 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
688 else \
689 fn = pn; \
690 if (fn->fn_flags & RTN_RTINFO) \
691 goto restart; \
692 } \
693 } \
694 } while (0)
695
696 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
697 struct fib6_table *table,
698 struct flowi6 *fl6, int flags)
699 {
700 struct fib6_node *fn;
701 struct rt6_info *rt;
702
703 read_lock_bh(&table->tb6_lock);
704 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
705 restart:
706 rt = fn->leaf;
707 rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
708 BACKTRACK(net, &fl6->saddr);
709 out:
710 dst_use(&rt->dst, jiffies);
711 read_unlock_bh(&table->tb6_lock);
712 return rt;
713
714 }
715
716 struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6,
717 int flags)
718 {
719 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
720 }
721 EXPORT_SYMBOL_GPL(ip6_route_lookup);
722
723 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
724 const struct in6_addr *saddr, int oif, int strict)
725 {
726 struct flowi6 fl6 = {
727 .flowi6_oif = oif,
728 .daddr = *daddr,
729 };
730 struct dst_entry *dst;
731 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
732
733 if (saddr) {
734 memcpy(&fl6.saddr, saddr, sizeof(*saddr));
735 flags |= RT6_LOOKUP_F_HAS_SADDR;
736 }
737
738 dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
739 if (dst->error == 0)
740 return (struct rt6_info *) dst;
741
742 dst_release(dst);
743
744 return NULL;
745 }
746
747 EXPORT_SYMBOL(rt6_lookup);
748
749 /* ip6_ins_rt is called with FREE table->tb6_lock.
750 It takes new route entry, the addition fails by any reason the
751 route is freed. In any case, if caller does not hold it, it may
752 be destroyed.
753 */
754
755 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
756 {
757 int err;
758 struct fib6_table *table;
759
760 table = rt->rt6i_table;
761 write_lock_bh(&table->tb6_lock);
762 err = fib6_add(&table->tb6_root, rt, info);
763 write_unlock_bh(&table->tb6_lock);
764
765 return err;
766 }
767
768 int ip6_ins_rt(struct rt6_info *rt)
769 {
770 struct nl_info info = {
771 .nl_net = dev_net(rt->dst.dev),
772 };
773 return __ip6_ins_rt(rt, &info);
774 }
775
776 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
777 const struct in6_addr *daddr,
778 const struct in6_addr *saddr)
779 {
780 struct rt6_info *rt;
781
782 /*
783 * Clone the route.
784 */
785
786 rt = ip6_rt_copy(ort, daddr);
787
788 if (rt) {
789 int attempts = !in_softirq();
790
791 if (!(rt->rt6i_flags & RTF_GATEWAY)) {
792 if (ort->rt6i_dst.plen != 128 &&
793 ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
794 rt->rt6i_flags |= RTF_ANYCAST;
795 rt->rt6i_gateway = *daddr;
796 }
797
798 rt->rt6i_flags |= RTF_CACHE;
799
800 #ifdef CONFIG_IPV6_SUBTREES
801 if (rt->rt6i_src.plen && saddr) {
802 rt->rt6i_src.addr = *saddr;
803 rt->rt6i_src.plen = 128;
804 }
805 #endif
806
807 retry:
808 if (rt6_bind_neighbour(rt, rt->dst.dev)) {
809 struct net *net = dev_net(rt->dst.dev);
810 int saved_rt_min_interval =
811 net->ipv6.sysctl.ip6_rt_gc_min_interval;
812 int saved_rt_elasticity =
813 net->ipv6.sysctl.ip6_rt_gc_elasticity;
814
815 if (attempts-- > 0) {
816 net->ipv6.sysctl.ip6_rt_gc_elasticity = 1;
817 net->ipv6.sysctl.ip6_rt_gc_min_interval = 0;
818
819 ip6_dst_gc(&net->ipv6.ip6_dst_ops);
820
821 net->ipv6.sysctl.ip6_rt_gc_elasticity =
822 saved_rt_elasticity;
823 net->ipv6.sysctl.ip6_rt_gc_min_interval =
824 saved_rt_min_interval;
825 goto retry;
826 }
827
828 net_warn_ratelimited("Neighbour table overflow\n");
829 dst_free(&rt->dst);
830 return NULL;
831 }
832 }
833
834 return rt;
835 }
836
837 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
838 const struct in6_addr *daddr)
839 {
840 struct rt6_info *rt = ip6_rt_copy(ort, daddr);
841
842 if (rt) {
843 rt->rt6i_flags |= RTF_CACHE;
844 rt->n = neigh_clone(ort->n);
845 }
846 return rt;
847 }
848
849 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
850 struct flowi6 *fl6, int flags)
851 {
852 struct fib6_node *fn;
853 struct rt6_info *rt, *nrt;
854 int strict = 0;
855 int attempts = 3;
856 int err;
857 int reachable = net->ipv6.devconf_all->forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
858
859 strict |= flags & RT6_LOOKUP_F_IFACE;
860
861 relookup:
862 read_lock_bh(&table->tb6_lock);
863
864 restart_2:
865 fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
866
867 restart:
868 rt = rt6_select(fn, oif, strict | reachable);
869
870 BACKTRACK(net, &fl6->saddr);
871 if (rt == net->ipv6.ip6_null_entry ||
872 rt->rt6i_flags & RTF_CACHE)
873 goto out;
874
875 dst_hold(&rt->dst);
876 read_unlock_bh(&table->tb6_lock);
877
878 if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
879 nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
880 else if (!(rt->dst.flags & DST_HOST))
881 nrt = rt6_alloc_clone(rt, &fl6->daddr);
882 else
883 goto out2;
884
885 dst_release(&rt->dst);
886 rt = nrt ? : net->ipv6.ip6_null_entry;
887
888 dst_hold(&rt->dst);
889 if (nrt) {
890 err = ip6_ins_rt(nrt);
891 if (!err)
892 goto out2;
893 }
894
895 if (--attempts <= 0)
896 goto out2;
897
898 /*
899 * Race condition! In the gap, when table->tb6_lock was
900 * released someone could insert this route. Relookup.
901 */
902 dst_release(&rt->dst);
903 goto relookup;
904
905 out:
906 if (reachable) {
907 reachable = 0;
908 goto restart_2;
909 }
910 dst_hold(&rt->dst);
911 read_unlock_bh(&table->tb6_lock);
912 out2:
913 rt->dst.lastuse = jiffies;
914 rt->dst.__use++;
915
916 return rt;
917 }
918
919 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
920 struct flowi6 *fl6, int flags)
921 {
922 return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
923 }
924
925 static struct dst_entry *ip6_route_input_lookup(struct net *net,
926 struct net_device *dev,
927 struct flowi6 *fl6, int flags)
928 {
929 if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
930 flags |= RT6_LOOKUP_F_IFACE;
931
932 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
933 }
934
935 void ip6_route_input(struct sk_buff *skb)
936 {
937 const struct ipv6hdr *iph = ipv6_hdr(skb);
938 struct net *net = dev_net(skb->dev);
939 int flags = RT6_LOOKUP_F_HAS_SADDR;
940 struct flowi6 fl6 = {
941 .flowi6_iif = skb->dev->ifindex,
942 .daddr = iph->daddr,
943 .saddr = iph->saddr,
944 .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,
945 .flowi6_mark = skb->mark,
946 .flowi6_proto = iph->nexthdr,
947 };
948
949 skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
950 }
951
952 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
953 struct flowi6 *fl6, int flags)
954 {
955 return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
956 }
957
958 struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
959 struct flowi6 *fl6)
960 {
961 int flags = 0;
962
963 fl6->flowi6_iif = LOOPBACK_IFINDEX;
964
965 if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
966 flags |= RT6_LOOKUP_F_IFACE;
967
968 if (!ipv6_addr_any(&fl6->saddr))
969 flags |= RT6_LOOKUP_F_HAS_SADDR;
970 else if (sk)
971 flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
972
973 return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
974 }
975
976 EXPORT_SYMBOL(ip6_route_output);
977
978 struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
979 {
980 struct rt6_info *rt, *ort = (struct rt6_info *) dst_orig;
981 struct dst_entry *new = NULL;
982
983 rt = dst_alloc(&ip6_dst_blackhole_ops, ort->dst.dev, 1, DST_OBSOLETE_NONE, 0);
984 if (rt) {
985 new = &rt->dst;
986
987 memset(new + 1, 0, sizeof(*rt) - sizeof(*new));
988 rt6_init_peer(rt, net->ipv6.peers);
989
990 new->__use = 1;
991 new->input = dst_discard;
992 new->output = dst_discard;
993
994 if (dst_metrics_read_only(&ort->dst))
995 new->_metrics = ort->dst._metrics;
996 else
997 dst_copy_metrics(new, &ort->dst);
998 rt->rt6i_idev = ort->rt6i_idev;
999 if (rt->rt6i_idev)
1000 in6_dev_hold(rt->rt6i_idev);
1001
1002 rt->rt6i_gateway = ort->rt6i_gateway;
1003 rt->rt6i_flags = ort->rt6i_flags;
1004 rt6_clean_expires(rt);
1005 rt->rt6i_metric = 0;
1006
1007 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1008 #ifdef CONFIG_IPV6_SUBTREES
1009 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1010 #endif
1011
1012 dst_free(new);
1013 }
1014
1015 dst_release(dst_orig);
1016 return new ? new : ERR_PTR(-ENOMEM);
1017 }
1018
1019 /*
1020 * Destination cache support functions
1021 */
1022
1023 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
1024 {
1025 struct rt6_info *rt;
1026
1027 rt = (struct rt6_info *) dst;
1028
1029 if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
1030 if (rt->rt6i_peer_genid != rt6_peer_genid()) {
1031 if (!rt6_has_peer(rt))
1032 rt6_bind_peer(rt, 0);
1033 rt->rt6i_peer_genid = rt6_peer_genid();
1034 }
1035 return dst;
1036 }
1037 return NULL;
1038 }
1039
1040 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
1041 {
1042 struct rt6_info *rt = (struct rt6_info *) dst;
1043
1044 if (rt) {
1045 if (rt->rt6i_flags & RTF_CACHE) {
1046 if (rt6_check_expired(rt)) {
1047 ip6_del_rt(rt);
1048 dst = NULL;
1049 }
1050 } else {
1051 dst_release(dst);
1052 dst = NULL;
1053 }
1054 }
1055 return dst;
1056 }
1057
1058 static void ip6_link_failure(struct sk_buff *skb)
1059 {
1060 struct rt6_info *rt;
1061
1062 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
1063
1064 rt = (struct rt6_info *) skb_dst(skb);
1065 if (rt) {
1066 if (rt->rt6i_flags & RTF_CACHE)
1067 rt6_update_expires(rt, 0);
1068 else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
1069 rt->rt6i_node->fn_sernum = -1;
1070 }
1071 }
1072
1073 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1074 struct sk_buff *skb, u32 mtu)
1075 {
1076 struct rt6_info *rt6 = (struct rt6_info*)dst;
1077
1078 dst_confirm(dst);
1079 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
1080 struct net *net = dev_net(dst->dev);
1081
1082 rt6->rt6i_flags |= RTF_MODIFIED;
1083 if (mtu < IPV6_MIN_MTU) {
1084 u32 features = dst_metric(dst, RTAX_FEATURES);
1085 mtu = IPV6_MIN_MTU;
1086 features |= RTAX_FEATURE_ALLFRAG;
1087 dst_metric_set(dst, RTAX_FEATURES, features);
1088 }
1089 dst_metric_set(dst, RTAX_MTU, mtu);
1090 rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires);
1091 }
1092 }
1093
1094 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
1095 int oif, u32 mark)
1096 {
1097 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1098 struct dst_entry *dst;
1099 struct flowi6 fl6;
1100
1101 memset(&fl6, 0, sizeof(fl6));
1102 fl6.flowi6_oif = oif;
1103 fl6.flowi6_mark = mark;
1104 fl6.flowi6_flags = 0;
1105 fl6.daddr = iph->daddr;
1106 fl6.saddr = iph->saddr;
1107 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1108
1109 dst = ip6_route_output(net, NULL, &fl6);
1110 if (!dst->error)
1111 ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu));
1112 dst_release(dst);
1113 }
1114 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
1115
1116 void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
1117 {
1118 ip6_update_pmtu(skb, sock_net(sk), mtu,
1119 sk->sk_bound_dev_if, sk->sk_mark);
1120 }
1121 EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
1122
1123 void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)
1124 {
1125 const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data;
1126 struct dst_entry *dst;
1127 struct flowi6 fl6;
1128
1129 memset(&fl6, 0, sizeof(fl6));
1130 fl6.flowi6_oif = oif;
1131 fl6.flowi6_mark = mark;
1132 fl6.flowi6_flags = 0;
1133 fl6.daddr = iph->daddr;
1134 fl6.saddr = iph->saddr;
1135 fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK;
1136
1137 dst = ip6_route_output(net, NULL, &fl6);
1138 if (!dst->error)
1139 rt6_do_redirect(dst, NULL, skb);
1140 dst_release(dst);
1141 }
1142 EXPORT_SYMBOL_GPL(ip6_redirect);
1143
1144 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
1145 {
1146 ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
1147 }
1148 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
1149
1150 static unsigned int ip6_default_advmss(const struct dst_entry *dst)
1151 {
1152 struct net_device *dev = dst->dev;
1153 unsigned int mtu = dst_mtu(dst);
1154 struct net *net = dev_net(dev);
1155
1156 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
1157
1158 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
1159 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
1160
1161 /*
1162 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
1163 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
1164 * IPV6_MAXPLEN is also valid and means: "any MSS,
1165 * rely only on pmtu discovery"
1166 */
1167 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
1168 mtu = IPV6_MAXPLEN;
1169 return mtu;
1170 }
1171
1172 static unsigned int ip6_mtu(const struct dst_entry *dst)
1173 {
1174 struct inet6_dev *idev;
1175 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
1176
1177 if (mtu)
1178 return mtu;
1179
1180 mtu = IPV6_MIN_MTU;
1181
1182 rcu_read_lock();
1183 idev = __in6_dev_get(dst->dev);
1184 if (idev)
1185 mtu = idev->cnf.mtu6;
1186 rcu_read_unlock();
1187
1188 return mtu;
1189 }
1190
1191 static struct dst_entry *icmp6_dst_gc_list;
1192 static DEFINE_SPINLOCK(icmp6_dst_lock);
1193
1194 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
1195 struct neighbour *neigh,
1196 struct flowi6 *fl6)
1197 {
1198 struct dst_entry *dst;
1199 struct rt6_info *rt;
1200 struct inet6_dev *idev = in6_dev_get(dev);
1201 struct net *net = dev_net(dev);
1202
1203 if (unlikely(!idev))
1204 return ERR_PTR(-ENODEV);
1205
1206 rt = ip6_dst_alloc(net, dev, 0, NULL);
1207 if (unlikely(!rt)) {
1208 in6_dev_put(idev);
1209 dst = ERR_PTR(-ENOMEM);
1210 goto out;
1211 }
1212
1213 if (neigh)
1214 neigh_hold(neigh);
1215 else {
1216 neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
1217 if (IS_ERR(neigh)) {
1218 in6_dev_put(idev);
1219 dst_free(&rt->dst);
1220 return ERR_CAST(neigh);
1221 }
1222 }
1223
1224 rt->dst.flags |= DST_HOST;
1225 rt->dst.output = ip6_output;
1226 rt->n = neigh;
1227 atomic_set(&rt->dst.__refcnt, 1);
1228 rt->rt6i_dst.addr = fl6->daddr;
1229 rt->rt6i_dst.plen = 128;
1230 rt->rt6i_idev = idev;
1231 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
1232
1233 spin_lock_bh(&icmp6_dst_lock);
1234 rt->dst.next = icmp6_dst_gc_list;
1235 icmp6_dst_gc_list = &rt->dst;
1236 spin_unlock_bh(&icmp6_dst_lock);
1237
1238 fib6_force_start_gc(net);
1239
1240 dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
1241
1242 out:
1243 return dst;
1244 }
1245
1246 int icmp6_dst_gc(void)
1247 {
1248 struct dst_entry *dst, **pprev;
1249 int more = 0;
1250
1251 spin_lock_bh(&icmp6_dst_lock);
1252 pprev = &icmp6_dst_gc_list;
1253
1254 while ((dst = *pprev) != NULL) {
1255 if (!atomic_read(&dst->__refcnt)) {
1256 *pprev = dst->next;
1257 dst_free(dst);
1258 } else {
1259 pprev = &dst->next;
1260 ++more;
1261 }
1262 }
1263
1264 spin_unlock_bh(&icmp6_dst_lock);
1265
1266 return more;
1267 }
1268
1269 static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg),
1270 void *arg)
1271 {
1272 struct dst_entry *dst, **pprev;
1273
1274 spin_lock_bh(&icmp6_dst_lock);
1275 pprev = &icmp6_dst_gc_list;
1276 while ((dst = *pprev) != NULL) {
1277 struct rt6_info *rt = (struct rt6_info *) dst;
1278 if (func(rt, arg)) {
1279 *pprev = dst->next;
1280 dst_free(dst);
1281 } else {
1282 pprev = &dst->next;
1283 }
1284 }
1285 spin_unlock_bh(&icmp6_dst_lock);
1286 }
1287
1288 static int ip6_dst_gc(struct dst_ops *ops)
1289 {
1290 unsigned long now = jiffies;
1291 struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
1292 int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
1293 int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
1294 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1295 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1296 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1297 int entries;
1298
1299 entries = dst_entries_get_fast(ops);
1300 if (time_after(rt_last_gc + rt_min_interval, now) &&
1301 entries <= rt_max_size)
1302 goto out;
1303
1304 net->ipv6.ip6_rt_gc_expire++;
1305 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1306 net->ipv6.ip6_rt_last_gc = now;
1307 entries = dst_entries_get_slow(ops);
1308 if (entries < ops->gc_thresh)
1309 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1310 out:
1311 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1312 return entries > rt_max_size;
1313 }
1314
1315 /* Clean host part of a prefix. Not necessary in radix tree,
1316 but results in cleaner routing tables.
1317
1318 Remove it only when all the things will work!
1319 */
1320
1321 int ip6_dst_hoplimit(struct dst_entry *dst)
1322 {
1323 int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT);
1324 if (hoplimit == 0) {
1325 struct net_device *dev = dst->dev;
1326 struct inet6_dev *idev;
1327
1328 rcu_read_lock();
1329 idev = __in6_dev_get(dev);
1330 if (idev)
1331 hoplimit = idev->cnf.hop_limit;
1332 else
1333 hoplimit = dev_net(dev)->ipv6.devconf_all->hop_limit;
1334 rcu_read_unlock();
1335 }
1336 return hoplimit;
1337 }
1338 EXPORT_SYMBOL(ip6_dst_hoplimit);
1339
1340 /*
1341 *
1342 */
1343
1344 int ip6_route_add(struct fib6_config *cfg)
1345 {
1346 int err;
1347 struct net *net = cfg->fc_nlinfo.nl_net;
1348 struct rt6_info *rt = NULL;
1349 struct net_device *dev = NULL;
1350 struct inet6_dev *idev = NULL;
1351 struct fib6_table *table;
1352 int addr_type;
1353
1354 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1355 return -EINVAL;
1356 #ifndef CONFIG_IPV6_SUBTREES
1357 if (cfg->fc_src_len)
1358 return -EINVAL;
1359 #endif
1360 if (cfg->fc_ifindex) {
1361 err = -ENODEV;
1362 dev = dev_get_by_index(net, cfg->fc_ifindex);
1363 if (!dev)
1364 goto out;
1365 idev = in6_dev_get(dev);
1366 if (!idev)
1367 goto out;
1368 }
1369
1370 if (cfg->fc_metric == 0)
1371 cfg->fc_metric = IP6_RT_PRIO_USER;
1372
1373 err = -ENOBUFS;
1374 if (cfg->fc_nlinfo.nlh &&
1375 !(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
1376 table = fib6_get_table(net, cfg->fc_table);
1377 if (!table) {
1378 pr_warn("NLM_F_CREATE should be specified when creating new route\n");
1379 table = fib6_new_table(net, cfg->fc_table);
1380 }
1381 } else {
1382 table = fib6_new_table(net, cfg->fc_table);
1383 }
1384
1385 if (!table)
1386 goto out;
1387
1388 rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
1389
1390 if (!rt) {
1391 err = -ENOMEM;
1392 goto out;
1393 }
1394
1395 rt->dst.obsolete = -1;
1396
1397 if (cfg->fc_flags & RTF_EXPIRES)
1398 rt6_set_expires(rt, jiffies +
1399 clock_t_to_jiffies(cfg->fc_expires));
1400 else
1401 rt6_clean_expires(rt);
1402
1403 if (cfg->fc_protocol == RTPROT_UNSPEC)
1404 cfg->fc_protocol = RTPROT_BOOT;
1405 rt->rt6i_protocol = cfg->fc_protocol;
1406
1407 addr_type = ipv6_addr_type(&cfg->fc_dst);
1408
1409 if (addr_type & IPV6_ADDR_MULTICAST)
1410 rt->dst.input = ip6_mc_input;
1411 else if (cfg->fc_flags & RTF_LOCAL)
1412 rt->dst.input = ip6_input;
1413 else
1414 rt->dst.input = ip6_forward;
1415
1416 rt->dst.output = ip6_output;
1417
1418 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1419 rt->rt6i_dst.plen = cfg->fc_dst_len;
1420 if (rt->rt6i_dst.plen == 128)
1421 rt->dst.flags |= DST_HOST;
1422
1423 if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) {
1424 u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
1425 if (!metrics) {
1426 err = -ENOMEM;
1427 goto out;
1428 }
1429 dst_init_metrics(&rt->dst, metrics, 0);
1430 }
1431 #ifdef CONFIG_IPV6_SUBTREES
1432 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1433 rt->rt6i_src.plen = cfg->fc_src_len;
1434 #endif
1435
1436 rt->rt6i_metric = cfg->fc_metric;
1437
1438 /* We cannot add true routes via loopback here,
1439 they would result in kernel looping; promote them to reject routes
1440 */
1441 if ((cfg->fc_flags & RTF_REJECT) ||
1442 (dev && (dev->flags & IFF_LOOPBACK) &&
1443 !(addr_type & IPV6_ADDR_LOOPBACK) &&
1444 !(cfg->fc_flags & RTF_LOCAL))) {
1445 /* hold loopback dev/idev if we haven't done so. */
1446 if (dev != net->loopback_dev) {
1447 if (dev) {
1448 dev_put(dev);
1449 in6_dev_put(idev);
1450 }
1451 dev = net->loopback_dev;
1452 dev_hold(dev);
1453 idev = in6_dev_get(dev);
1454 if (!idev) {
1455 err = -ENODEV;
1456 goto out;
1457 }
1458 }
1459 rt->dst.output = ip6_pkt_discard_out;
1460 rt->dst.input = ip6_pkt_discard;
1461 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1462 switch (cfg->fc_type) {
1463 case RTN_BLACKHOLE:
1464 rt->dst.error = -EINVAL;
1465 break;
1466 case RTN_PROHIBIT:
1467 rt->dst.error = -EACCES;
1468 break;
1469 case RTN_THROW:
1470 rt->dst.error = -EAGAIN;
1471 break;
1472 default:
1473 rt->dst.error = -ENETUNREACH;
1474 break;
1475 }
1476 goto install_route;
1477 }
1478
1479 if (cfg->fc_flags & RTF_GATEWAY) {
1480 const struct in6_addr *gw_addr;
1481 int gwa_type;
1482
1483 gw_addr = &cfg->fc_gateway;
1484 rt->rt6i_gateway = *gw_addr;
1485 gwa_type = ipv6_addr_type(gw_addr);
1486
1487 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1488 struct rt6_info *grt;
1489
1490 /* IPv6 strictly inhibits using not link-local
1491 addresses as nexthop address.
1492 Otherwise, router will not able to send redirects.
1493 It is very good, but in some (rare!) circumstances
1494 (SIT, PtP, NBMA NOARP links) it is handy to allow
1495 some exceptions. --ANK
1496 */
1497 err = -EINVAL;
1498 if (!(gwa_type & IPV6_ADDR_UNICAST))
1499 goto out;
1500
1501 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1502
1503 err = -EHOSTUNREACH;
1504 if (!grt)
1505 goto out;
1506 if (dev) {
1507 if (dev != grt->dst.dev) {
1508 dst_release(&grt->dst);
1509 goto out;
1510 }
1511 } else {
1512 dev = grt->dst.dev;
1513 idev = grt->rt6i_idev;
1514 dev_hold(dev);
1515 in6_dev_hold(grt->rt6i_idev);
1516 }
1517 if (!(grt->rt6i_flags & RTF_GATEWAY))
1518 err = 0;
1519 dst_release(&grt->dst);
1520
1521 if (err)
1522 goto out;
1523 }
1524 err = -EINVAL;
1525 if (!dev || (dev->flags & IFF_LOOPBACK))
1526 goto out;
1527 }
1528
1529 err = -ENODEV;
1530 if (!dev)
1531 goto out;
1532
1533 if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
1534 if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
1535 err = -EINVAL;
1536 goto out;
1537 }
1538 rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
1539 rt->rt6i_prefsrc.plen = 128;
1540 } else
1541 rt->rt6i_prefsrc.plen = 0;
1542
1543 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1544 err = rt6_bind_neighbour(rt, dev);
1545 if (err)
1546 goto out;
1547 }
1548
1549 rt->rt6i_flags = cfg->fc_flags;
1550
1551 install_route:
1552 if (cfg->fc_mx) {
1553 struct nlattr *nla;
1554 int remaining;
1555
1556 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1557 int type = nla_type(nla);
1558
1559 if (type) {
1560 if (type > RTAX_MAX) {
1561 err = -EINVAL;
1562 goto out;
1563 }
1564
1565 dst_metric_set(&rt->dst, type, nla_get_u32(nla));
1566 }
1567 }
1568 }
1569
1570 rt->dst.dev = dev;
1571 rt->rt6i_idev = idev;
1572 rt->rt6i_table = table;
1573
1574 cfg->fc_nlinfo.nl_net = dev_net(dev);
1575
1576 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1577
1578 out:
1579 if (dev)
1580 dev_put(dev);
1581 if (idev)
1582 in6_dev_put(idev);
1583 if (rt)
1584 dst_free(&rt->dst);
1585 return err;
1586 }
1587
1588 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1589 {
1590 int err;
1591 struct fib6_table *table;
1592 struct net *net = dev_net(rt->dst.dev);
1593
1594 if (rt == net->ipv6.ip6_null_entry)
1595 return -ENOENT;
1596
1597 table = rt->rt6i_table;
1598 write_lock_bh(&table->tb6_lock);
1599
1600 err = fib6_del(rt, info);
1601 dst_release(&rt->dst);
1602
1603 write_unlock_bh(&table->tb6_lock);
1604
1605 return err;
1606 }
1607
1608 int ip6_del_rt(struct rt6_info *rt)
1609 {
1610 struct nl_info info = {
1611 .nl_net = dev_net(rt->dst.dev),
1612 };
1613 return __ip6_del_rt(rt, &info);
1614 }
1615
1616 static int ip6_route_del(struct fib6_config *cfg)
1617 {
1618 struct fib6_table *table;
1619 struct fib6_node *fn;
1620 struct rt6_info *rt;
1621 int err = -ESRCH;
1622
1623 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1624 if (!table)
1625 return err;
1626
1627 read_lock_bh(&table->tb6_lock);
1628
1629 fn = fib6_locate(&table->tb6_root,
1630 &cfg->fc_dst, cfg->fc_dst_len,
1631 &cfg->fc_src, cfg->fc_src_len);
1632
1633 if (fn) {
1634 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1635 if (cfg->fc_ifindex &&
1636 (!rt->dst.dev ||
1637 rt->dst.dev->ifindex != cfg->fc_ifindex))
1638 continue;
1639 if (cfg->fc_flags & RTF_GATEWAY &&
1640 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1641 continue;
1642 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1643 continue;
1644 dst_hold(&rt->dst);
1645 read_unlock_bh(&table->tb6_lock);
1646
1647 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1648 }
1649 }
1650 read_unlock_bh(&table->tb6_lock);
1651
1652 return err;
1653 }
1654
1655 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
1656 {
1657 struct net *net = dev_net(skb->dev);
1658 struct netevent_redirect netevent;
1659 struct rt6_info *rt, *nrt = NULL;
1660 const struct in6_addr *target;
1661 struct ndisc_options ndopts;
1662 const struct in6_addr *dest;
1663 struct neighbour *old_neigh;
1664 struct inet6_dev *in6_dev;
1665 struct neighbour *neigh;
1666 struct icmp6hdr *icmph;
1667 int optlen, on_link;
1668 u8 *lladdr;
1669
1670 optlen = skb->tail - skb->transport_header;
1671 optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1672
1673 if (optlen < 0) {
1674 net_dbg_ratelimited("rt6_do_redirect: packet too short\n");
1675 return;
1676 }
1677
1678 icmph = icmp6_hdr(skb);
1679 target = (const struct in6_addr *) (icmph + 1);
1680 dest = target + 1;
1681
1682 if (ipv6_addr_is_multicast(dest)) {
1683 net_dbg_ratelimited("rt6_do_redirect: destination address is multicast\n");
1684 return;
1685 }
1686
1687 on_link = 0;
1688 if (ipv6_addr_equal(dest, target)) {
1689 on_link = 1;
1690 } else if (ipv6_addr_type(target) !=
1691 (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1692 net_dbg_ratelimited("rt6_do_redirect: target address is not link-local unicast\n");
1693 return;
1694 }
1695
1696 in6_dev = __in6_dev_get(skb->dev);
1697 if (!in6_dev)
1698 return;
1699 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects)
1700 return;
1701
1702 /* RFC2461 8.1:
1703 * The IP source address of the Redirect MUST be the same as the current
1704 * first-hop router for the specified ICMP Destination Address.
1705 */
1706
1707 if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1708 net_dbg_ratelimited("rt6_redirect: invalid ND options\n");
1709 return;
1710 }
1711
1712 lladdr = NULL;
1713 if (ndopts.nd_opts_tgt_lladdr) {
1714 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1715 skb->dev);
1716 if (!lladdr) {
1717 net_dbg_ratelimited("rt6_redirect: invalid link-layer address length\n");
1718 return;
1719 }
1720 }
1721
1722 rt = (struct rt6_info *) dst;
1723 if (rt == net->ipv6.ip6_null_entry) {
1724 net_dbg_ratelimited("rt6_redirect: source isn't a valid nexthop for redirect target\n");
1725 return;
1726 }
1727
1728 /* Redirect received -> path was valid.
1729 * Look, redirects are sent only in response to data packets,
1730 * so that this nexthop apparently is reachable. --ANK
1731 */
1732 dst_confirm(&rt->dst);
1733
1734 neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1735 if (!neigh)
1736 return;
1737
1738 /* Duplicate redirect: silently ignore. */
1739 old_neigh = rt->n;
1740 if (neigh == old_neigh)
1741 goto out;
1742
1743 /*
1744 * We have finally decided to accept it.
1745 */
1746
1747 neigh_update(neigh, lladdr, NUD_STALE,
1748 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1749 NEIGH_UPDATE_F_OVERRIDE|
1750 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1751 NEIGH_UPDATE_F_ISROUTER))
1752 );
1753
1754 nrt = ip6_rt_copy(rt, dest);
1755 if (!nrt)
1756 goto out;
1757
1758 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1759 if (on_link)
1760 nrt->rt6i_flags &= ~RTF_GATEWAY;
1761
1762 nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
1763 nrt->n = neigh_clone(neigh);
1764
1765 if (ip6_ins_rt(nrt))
1766 goto out;
1767
1768 netevent.old = &rt->dst;
1769 netevent.old_neigh = old_neigh;
1770 netevent.new = &nrt->dst;
1771 netevent.new_neigh = neigh;
1772 netevent.daddr = dest;
1773 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1774
1775 if (rt->rt6i_flags & RTF_CACHE) {
1776 rt = (struct rt6_info *) dst_clone(&rt->dst);
1777 ip6_del_rt(rt);
1778 }
1779
1780 out:
1781 neigh_release(neigh);
1782 }
1783
1784 /*
1785 * Misc support functions
1786 */
1787
1788 static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
1789 const struct in6_addr *dest)
1790 {
1791 struct net *net = dev_net(ort->dst.dev);
1792 struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0,
1793 ort->rt6i_table);
1794
1795 if (rt) {
1796 rt->dst.input = ort->dst.input;
1797 rt->dst.output = ort->dst.output;
1798 rt->dst.flags |= DST_HOST;
1799
1800 rt->rt6i_dst.addr = *dest;
1801 rt->rt6i_dst.plen = 128;
1802 dst_copy_metrics(&rt->dst, &ort->dst);
1803 rt->dst.error = ort->dst.error;
1804 rt->rt6i_idev = ort->rt6i_idev;
1805 if (rt->rt6i_idev)
1806 in6_dev_hold(rt->rt6i_idev);
1807 rt->dst.lastuse = jiffies;
1808
1809 rt->rt6i_gateway = ort->rt6i_gateway;
1810 rt->rt6i_flags = ort->rt6i_flags;
1811 if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
1812 (RTF_DEFAULT | RTF_ADDRCONF))
1813 rt6_set_from(rt, ort);
1814 else
1815 rt6_clean_expires(rt);
1816 rt->rt6i_metric = 0;
1817
1818 #ifdef CONFIG_IPV6_SUBTREES
1819 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1820 #endif
1821 memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key));
1822 rt->rt6i_table = ort->rt6i_table;
1823 }
1824 return rt;
1825 }
1826
1827 #ifdef CONFIG_IPV6_ROUTE_INFO
1828 static struct rt6_info *rt6_get_route_info(struct net *net,
1829 const struct in6_addr *prefix, int prefixlen,
1830 const struct in6_addr *gwaddr, int ifindex)
1831 {
1832 struct fib6_node *fn;
1833 struct rt6_info *rt = NULL;
1834 struct fib6_table *table;
1835
1836 table = fib6_get_table(net, RT6_TABLE_INFO);
1837 if (!table)
1838 return NULL;
1839
1840 read_lock_bh(&table->tb6_lock);
1841 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1842 if (!fn)
1843 goto out;
1844
1845 for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
1846 if (rt->dst.dev->ifindex != ifindex)
1847 continue;
1848 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1849 continue;
1850 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1851 continue;
1852 dst_hold(&rt->dst);
1853 break;
1854 }
1855 out:
1856 read_unlock_bh(&table->tb6_lock);
1857 return rt;
1858 }
1859
1860 static struct rt6_info *rt6_add_route_info(struct net *net,
1861 const struct in6_addr *prefix, int prefixlen,
1862 const struct in6_addr *gwaddr, int ifindex,
1863 unsigned int pref)
1864 {
1865 struct fib6_config cfg = {
1866 .fc_table = RT6_TABLE_INFO,
1867 .fc_metric = IP6_RT_PRIO_USER,
1868 .fc_ifindex = ifindex,
1869 .fc_dst_len = prefixlen,
1870 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1871 RTF_UP | RTF_PREF(pref),
1872 .fc_nlinfo.portid = 0,
1873 .fc_nlinfo.nlh = NULL,
1874 .fc_nlinfo.nl_net = net,
1875 };
1876
1877 cfg.fc_dst = *prefix;
1878 cfg.fc_gateway = *gwaddr;
1879
1880 /* We should treat it as a default route if prefix length is 0. */
1881 if (!prefixlen)
1882 cfg.fc_flags |= RTF_DEFAULT;
1883
1884 ip6_route_add(&cfg);
1885
1886 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1887 }
1888 #endif
1889
1890 struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_device *dev)
1891 {
1892 struct rt6_info *rt;
1893 struct fib6_table *table;
1894
1895 table = fib6_get_table(dev_net(dev), RT6_TABLE_DFLT);
1896 if (!table)
1897 return NULL;
1898
1899 read_lock_bh(&table->tb6_lock);
1900 for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
1901 if (dev == rt->dst.dev &&
1902 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1903 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1904 break;
1905 }
1906 if (rt)
1907 dst_hold(&rt->dst);
1908 read_unlock_bh(&table->tb6_lock);
1909 return rt;
1910 }
1911
1912 struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
1913 struct net_device *dev,
1914 unsigned int pref)
1915 {
1916 struct fib6_config cfg = {
1917 .fc_table = RT6_TABLE_DFLT,
1918 .fc_metric = IP6_RT_PRIO_USER,
1919 .fc_ifindex = dev->ifindex,
1920 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1921 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1922 .fc_nlinfo.portid = 0,
1923 .fc_nlinfo.nlh = NULL,
1924 .fc_nlinfo.nl_net = dev_net(dev),
1925 };
1926
1927 cfg.fc_gateway = *gwaddr;
1928
1929 ip6_route_add(&cfg);
1930
1931 return rt6_get_dflt_router(gwaddr, dev);
1932 }
1933
1934 void rt6_purge_dflt_routers(struct net *net)
1935 {
1936 struct rt6_info *rt;
1937 struct fib6_table *table;
1938
1939 /* NOTE: Keep consistent with rt6_get_dflt_router */
1940 table = fib6_get_table(net, RT6_TABLE_DFLT);
1941 if (!table)
1942 return;
1943
1944 restart:
1945 read_lock_bh(&table->tb6_lock);
1946 for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
1947 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1948 dst_hold(&rt->dst);
1949 read_unlock_bh(&table->tb6_lock);
1950 ip6_del_rt(rt);
1951 goto restart;
1952 }
1953 }
1954 read_unlock_bh(&table->tb6_lock);
1955 }
1956
1957 static void rtmsg_to_fib6_config(struct net *net,
1958 struct in6_rtmsg *rtmsg,
1959 struct fib6_config *cfg)
1960 {
1961 memset(cfg, 0, sizeof(*cfg));
1962
1963 cfg->fc_table = RT6_TABLE_MAIN;
1964 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1965 cfg->fc_metric = rtmsg->rtmsg_metric;
1966 cfg->fc_expires = rtmsg->rtmsg_info;
1967 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1968 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1969 cfg->fc_flags = rtmsg->rtmsg_flags;
1970
1971 cfg->fc_nlinfo.nl_net = net;
1972
1973 cfg->fc_dst = rtmsg->rtmsg_dst;
1974 cfg->fc_src = rtmsg->rtmsg_src;
1975 cfg->fc_gateway = rtmsg->rtmsg_gateway;
1976 }
1977
1978 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1979 {
1980 struct fib6_config cfg;
1981 struct in6_rtmsg rtmsg;
1982 int err;
1983
1984 switch(cmd) {
1985 case SIOCADDRT: /* Add a route */
1986 case SIOCDELRT: /* Delete a route */
1987 if (!capable(CAP_NET_ADMIN))
1988 return -EPERM;
1989 err = copy_from_user(&rtmsg, arg,
1990 sizeof(struct in6_rtmsg));
1991 if (err)
1992 return -EFAULT;
1993
1994 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1995
1996 rtnl_lock();
1997 switch (cmd) {
1998 case SIOCADDRT:
1999 err = ip6_route_add(&cfg);
2000 break;
2001 case SIOCDELRT:
2002 err = ip6_route_del(&cfg);
2003 break;
2004 default:
2005 err = -EINVAL;
2006 }
2007 rtnl_unlock();
2008
2009 return err;
2010 }
2011
2012 return -EINVAL;
2013 }
2014
2015 /*
2016 * Drop the packet on the floor
2017 */
2018
2019 static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
2020 {
2021 int type;
2022 struct dst_entry *dst = skb_dst(skb);
2023 switch (ipstats_mib_noroutes) {
2024 case IPSTATS_MIB_INNOROUTES:
2025 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
2026 if (type == IPV6_ADDR_ANY) {
2027 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2028 IPSTATS_MIB_INADDRERRORS);
2029 break;
2030 }
2031 /* FALLTHROUGH */
2032 case IPSTATS_MIB_OUTNOROUTES:
2033 IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
2034 ipstats_mib_noroutes);
2035 break;
2036 }
2037 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
2038 kfree_skb(skb);
2039 return 0;
2040 }
2041
2042 static int ip6_pkt_discard(struct sk_buff *skb)
2043 {
2044 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
2045 }
2046
2047 static int ip6_pkt_discard_out(struct sk_buff *skb)
2048 {
2049 skb->dev = skb_dst(skb)->dev;
2050 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
2051 }
2052
2053 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2054
2055 static int ip6_pkt_prohibit(struct sk_buff *skb)
2056 {
2057 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
2058 }
2059
2060 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
2061 {
2062 skb->dev = skb_dst(skb)->dev;
2063 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
2064 }
2065
2066 #endif
2067
2068 /*
2069 * Allocate a dst for local (unicast / anycast) address.
2070 */
2071
2072 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
2073 const struct in6_addr *addr,
2074 bool anycast)
2075 {
2076 struct net *net = dev_net(idev->dev);
2077 struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
2078 int err;
2079
2080 if (!rt) {
2081 net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
2082 return ERR_PTR(-ENOMEM);
2083 }
2084
2085 in6_dev_hold(idev);
2086
2087 rt->dst.flags |= DST_HOST;
2088 rt->dst.input = ip6_input;
2089 rt->dst.output = ip6_output;
2090 rt->rt6i_idev = idev;
2091 rt->dst.obsolete = -1;
2092
2093 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
2094 if (anycast)
2095 rt->rt6i_flags |= RTF_ANYCAST;
2096 else
2097 rt->rt6i_flags |= RTF_LOCAL;
2098 err = rt6_bind_neighbour(rt, rt->dst.dev);
2099 if (err) {
2100 dst_free(&rt->dst);
2101 return ERR_PTR(err);
2102 }
2103
2104 rt->rt6i_dst.addr = *addr;
2105 rt->rt6i_dst.plen = 128;
2106 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
2107
2108 atomic_set(&rt->dst.__refcnt, 1);
2109
2110 return rt;
2111 }
2112
2113 int ip6_route_get_saddr(struct net *net,
2114 struct rt6_info *rt,
2115 const struct in6_addr *daddr,
2116 unsigned int prefs,
2117 struct in6_addr *saddr)
2118 {
2119 struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
2120 int err = 0;
2121 if (rt->rt6i_prefsrc.plen)
2122 *saddr = rt->rt6i_prefsrc.addr;
2123 else
2124 err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
2125 daddr, prefs, saddr);
2126 return err;
2127 }
2128
2129 /* remove deleted ip from prefsrc entries */
2130 struct arg_dev_net_ip {
2131 struct net_device *dev;
2132 struct net *net;
2133 struct in6_addr *addr;
2134 };
2135
2136 static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
2137 {
2138 struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
2139 struct net *net = ((struct arg_dev_net_ip *)arg)->net;
2140 struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
2141
2142 if (((void *)rt->dst.dev == dev || !dev) &&
2143 rt != net->ipv6.ip6_null_entry &&
2144 ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
2145 /* remove prefsrc entry */
2146 rt->rt6i_prefsrc.plen = 0;
2147 }
2148 return 0;
2149 }
2150
2151 void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
2152 {
2153 struct net *net = dev_net(ifp->idev->dev);
2154 struct arg_dev_net_ip adni = {
2155 .dev = ifp->idev->dev,
2156 .net = net,
2157 .addr = &ifp->addr,
2158 };
2159 fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni);
2160 }
2161
2162 struct arg_dev_net {
2163 struct net_device *dev;
2164 struct net *net;
2165 };
2166
2167 static int fib6_ifdown(struct rt6_info *rt, void *arg)
2168 {
2169 const struct arg_dev_net *adn = arg;
2170 const struct net_device *dev = adn->dev;
2171
2172 if ((rt->dst.dev == dev || !dev) &&
2173 rt != adn->net->ipv6.ip6_null_entry)
2174 return -1;
2175
2176 return 0;
2177 }
2178
2179 void rt6_ifdown(struct net *net, struct net_device *dev)
2180 {
2181 struct arg_dev_net adn = {
2182 .dev = dev,
2183 .net = net,
2184 };
2185
2186 fib6_clean_all(net, fib6_ifdown, 0, &adn);
2187 icmp6_clean_all(fib6_ifdown, &adn);
2188 }
2189
2190 struct rt6_mtu_change_arg {
2191 struct net_device *dev;
2192 unsigned int mtu;
2193 };
2194
2195 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
2196 {
2197 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
2198 struct inet6_dev *idev;
2199
2200 /* In IPv6 pmtu discovery is not optional,
2201 so that RTAX_MTU lock cannot disable it.
2202 We still use this lock to block changes
2203 caused by addrconf/ndisc.
2204 */
2205
2206 idev = __in6_dev_get(arg->dev);
2207 if (!idev)
2208 return 0;
2209
2210 /* For administrative MTU increase, there is no way to discover
2211 IPv6 PMTU increase, so PMTU increase should be updated here.
2212 Since RFC 1981 doesn't include administrative MTU increase
2213 update PMTU increase is a MUST. (i.e. jumbo frame)
2214 */
2215 /*
2216 If new MTU is less than route PMTU, this new MTU will be the
2217 lowest MTU in the path, update the route PMTU to reflect PMTU
2218 decreases; if new MTU is greater than route PMTU, and the
2219 old MTU is the lowest MTU in the path, update the route PMTU
2220 to reflect the increase. In this case if the other nodes' MTU
2221 also have the lowest MTU, TOO BIG MESSAGE will be lead to
2222 PMTU discouvery.
2223 */
2224 if (rt->dst.dev == arg->dev &&
2225 !dst_metric_locked(&rt->dst, RTAX_MTU) &&
2226 (dst_mtu(&rt->dst) >= arg->mtu ||
2227 (dst_mtu(&rt->dst) < arg->mtu &&
2228 dst_mtu(&rt->dst) == idev->cnf.mtu6))) {
2229 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
2230 }
2231 return 0;
2232 }
2233
2234 void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
2235 {
2236 struct rt6_mtu_change_arg arg = {
2237 .dev = dev,
2238 .mtu = mtu,
2239 };
2240
2241 fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg);
2242 }
2243
2244 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
2245 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
2246 [RTA_OIF] = { .type = NLA_U32 },
2247 [RTA_IIF] = { .type = NLA_U32 },
2248 [RTA_PRIORITY] = { .type = NLA_U32 },
2249 [RTA_METRICS] = { .type = NLA_NESTED },
2250 };
2251
2252 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2253 struct fib6_config *cfg)
2254 {
2255 struct rtmsg *rtm;
2256 struct nlattr *tb[RTA_MAX+1];
2257 int err;
2258
2259 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2260 if (err < 0)
2261 goto errout;
2262
2263 err = -EINVAL;
2264 rtm = nlmsg_data(nlh);
2265 memset(cfg, 0, sizeof(*cfg));
2266
2267 cfg->fc_table = rtm->rtm_table;
2268 cfg->fc_dst_len = rtm->rtm_dst_len;
2269 cfg->fc_src_len = rtm->rtm_src_len;
2270 cfg->fc_flags = RTF_UP;
2271 cfg->fc_protocol = rtm->rtm_protocol;
2272 cfg->fc_type = rtm->rtm_type;
2273
2274 if (rtm->rtm_type == RTN_UNREACHABLE ||
2275 rtm->rtm_type == RTN_BLACKHOLE ||
2276 rtm->rtm_type == RTN_PROHIBIT ||
2277 rtm->rtm_type == RTN_THROW)
2278 cfg->fc_flags |= RTF_REJECT;
2279
2280 if (rtm->rtm_type == RTN_LOCAL)
2281 cfg->fc_flags |= RTF_LOCAL;
2282
2283 cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
2284 cfg->fc_nlinfo.nlh = nlh;
2285 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
2286
2287 if (tb[RTA_GATEWAY]) {
2288 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
2289 cfg->fc_flags |= RTF_GATEWAY;
2290 }
2291
2292 if (tb[RTA_DST]) {
2293 int plen = (rtm->rtm_dst_len + 7) >> 3;
2294
2295 if (nla_len(tb[RTA_DST]) < plen)
2296 goto errout;
2297
2298 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2299 }
2300
2301 if (tb[RTA_SRC]) {
2302 int plen = (rtm->rtm_src_len + 7) >> 3;
2303
2304 if (nla_len(tb[RTA_SRC]) < plen)
2305 goto errout;
2306
2307 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2308 }
2309
2310 if (tb[RTA_PREFSRC])
2311 nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16);
2312
2313 if (tb[RTA_OIF])
2314 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2315
2316 if (tb[RTA_PRIORITY])
2317 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2318
2319 if (tb[RTA_METRICS]) {
2320 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2321 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2322 }
2323
2324 if (tb[RTA_TABLE])
2325 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2326
2327 err = 0;
2328 errout:
2329 return err;
2330 }
2331
2332 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2333 {
2334 struct fib6_config cfg;
2335 int err;
2336
2337 err = rtm_to_fib6_config(skb, nlh, &cfg);
2338 if (err < 0)
2339 return err;
2340
2341 return ip6_route_del(&cfg);
2342 }
2343
2344 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2345 {
2346 struct fib6_config cfg;
2347 int err;
2348
2349 err = rtm_to_fib6_config(skb, nlh, &cfg);
2350 if (err < 0)
2351 return err;
2352
2353 return ip6_route_add(&cfg);
2354 }
2355
2356 static inline size_t rt6_nlmsg_size(void)
2357 {
2358 return NLMSG_ALIGN(sizeof(struct rtmsg))
2359 + nla_total_size(16) /* RTA_SRC */
2360 + nla_total_size(16) /* RTA_DST */
2361 + nla_total_size(16) /* RTA_GATEWAY */
2362 + nla_total_size(16) /* RTA_PREFSRC */
2363 + nla_total_size(4) /* RTA_TABLE */
2364 + nla_total_size(4) /* RTA_IIF */
2365 + nla_total_size(4) /* RTA_OIF */
2366 + nla_total_size(4) /* RTA_PRIORITY */
2367 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2368 + nla_total_size(sizeof(struct rta_cacheinfo));
2369 }
2370
2371 static int rt6_fill_node(struct net *net,
2372 struct sk_buff *skb, struct rt6_info *rt,
2373 struct in6_addr *dst, struct in6_addr *src,
2374 int iif, int type, u32 portid, u32 seq,
2375 int prefix, int nowait, unsigned int flags)
2376 {
2377 struct rtmsg *rtm;
2378 struct nlmsghdr *nlh;
2379 long expires;
2380 u32 table;
2381 struct neighbour *n;
2382
2383 if (prefix) { /* user wants prefix routes only */
2384 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2385 /* success since this is not a prefix route */
2386 return 1;
2387 }
2388 }
2389
2390 nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags);
2391 if (!nlh)
2392 return -EMSGSIZE;
2393
2394 rtm = nlmsg_data(nlh);
2395 rtm->rtm_family = AF_INET6;
2396 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2397 rtm->rtm_src_len = rt->rt6i_src.plen;
2398 rtm->rtm_tos = 0;
2399 if (rt->rt6i_table)
2400 table = rt->rt6i_table->tb6_id;
2401 else
2402 table = RT6_TABLE_UNSPEC;
2403 rtm->rtm_table = table;
2404 if (nla_put_u32(skb, RTA_TABLE, table))
2405 goto nla_put_failure;
2406 if (rt->rt6i_flags & RTF_REJECT) {
2407 switch (rt->dst.error) {
2408 case -EINVAL:
2409 rtm->rtm_type = RTN_BLACKHOLE;
2410 break;
2411 case -EACCES:
2412 rtm->rtm_type = RTN_PROHIBIT;
2413 break;
2414 case -EAGAIN:
2415 rtm->rtm_type = RTN_THROW;
2416 break;
2417 default:
2418 rtm->rtm_type = RTN_UNREACHABLE;
2419 break;
2420 }
2421 }
2422 else if (rt->rt6i_flags & RTF_LOCAL)
2423 rtm->rtm_type = RTN_LOCAL;
2424 else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
2425 rtm->rtm_type = RTN_LOCAL;
2426 else
2427 rtm->rtm_type = RTN_UNICAST;
2428 rtm->rtm_flags = 0;
2429 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2430 rtm->rtm_protocol = rt->rt6i_protocol;
2431 if (rt->rt6i_flags & RTF_DYNAMIC)
2432 rtm->rtm_protocol = RTPROT_REDIRECT;
2433 else if (rt->rt6i_flags & RTF_ADDRCONF) {
2434 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
2435 rtm->rtm_protocol = RTPROT_RA;
2436 else
2437 rtm->rtm_protocol = RTPROT_KERNEL;
2438 }
2439
2440 if (rt->rt6i_flags & RTF_CACHE)
2441 rtm->rtm_flags |= RTM_F_CLONED;
2442
2443 if (dst) {
2444 if (nla_put(skb, RTA_DST, 16, dst))
2445 goto nla_put_failure;
2446 rtm->rtm_dst_len = 128;
2447 } else if (rtm->rtm_dst_len)
2448 if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr))
2449 goto nla_put_failure;
2450 #ifdef CONFIG_IPV6_SUBTREES
2451 if (src) {
2452 if (nla_put(skb, RTA_SRC, 16, src))
2453 goto nla_put_failure;
2454 rtm->rtm_src_len = 128;
2455 } else if (rtm->rtm_src_len &&
2456 nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr))
2457 goto nla_put_failure;
2458 #endif
2459 if (iif) {
2460 #ifdef CONFIG_IPV6_MROUTE
2461 if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr)) {
2462 int err = ip6mr_get_route(net, skb, rtm, nowait);
2463 if (err <= 0) {
2464 if (!nowait) {
2465 if (err == 0)
2466 return 0;
2467 goto nla_put_failure;
2468 } else {
2469 if (err == -EMSGSIZE)
2470 goto nla_put_failure;
2471 }
2472 }
2473 } else
2474 #endif
2475 if (nla_put_u32(skb, RTA_IIF, iif))
2476 goto nla_put_failure;
2477 } else if (dst) {
2478 struct in6_addr saddr_buf;
2479 if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 &&
2480 nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2481 goto nla_put_failure;
2482 }
2483
2484 if (rt->rt6i_prefsrc.plen) {
2485 struct in6_addr saddr_buf;
2486 saddr_buf = rt->rt6i_prefsrc.addr;
2487 if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf))
2488 goto nla_put_failure;
2489 }
2490
2491 if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
2492 goto nla_put_failure;
2493
2494 n = rt->n;
2495 if (n) {
2496 if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0)
2497 goto nla_put_failure;
2498 }
2499
2500 if (rt->dst.dev &&
2501 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2502 goto nla_put_failure;
2503 if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric))
2504 goto nla_put_failure;
2505
2506 expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0;
2507
2508 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
2509 goto nla_put_failure;
2510
2511 return nlmsg_end(skb, nlh);
2512
2513 nla_put_failure:
2514 nlmsg_cancel(skb, nlh);
2515 return -EMSGSIZE;
2516 }
2517
2518 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2519 {
2520 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2521 int prefix;
2522
2523 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2524 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2525 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2526 } else
2527 prefix = 0;
2528
2529 return rt6_fill_node(arg->net,
2530 arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2531 NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq,
2532 prefix, 0, NLM_F_MULTI);
2533 }
2534
2535 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2536 {
2537 struct net *net = sock_net(in_skb->sk);
2538 struct nlattr *tb[RTA_MAX+1];
2539 struct rt6_info *rt;
2540 struct sk_buff *skb;
2541 struct rtmsg *rtm;
2542 struct flowi6 fl6;
2543 int err, iif = 0, oif = 0;
2544
2545 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2546 if (err < 0)
2547 goto errout;
2548
2549 err = -EINVAL;
2550 memset(&fl6, 0, sizeof(fl6));
2551
2552 if (tb[RTA_SRC]) {
2553 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2554 goto errout;
2555
2556 fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
2557 }
2558
2559 if (tb[RTA_DST]) {
2560 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2561 goto errout;
2562
2563 fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
2564 }
2565
2566 if (tb[RTA_IIF])
2567 iif = nla_get_u32(tb[RTA_IIF]);
2568
2569 if (tb[RTA_OIF])
2570 oif = nla_get_u32(tb[RTA_OIF]);
2571
2572 if (iif) {
2573 struct net_device *dev;
2574 int flags = 0;
2575
2576 dev = __dev_get_by_index(net, iif);
2577 if (!dev) {
2578 err = -ENODEV;
2579 goto errout;
2580 }
2581
2582 fl6.flowi6_iif = iif;
2583
2584 if (!ipv6_addr_any(&fl6.saddr))
2585 flags |= RT6_LOOKUP_F_HAS_SADDR;
2586
2587 rt = (struct rt6_info *)ip6_route_input_lookup(net, dev, &fl6,
2588 flags);
2589 } else {
2590 fl6.flowi6_oif = oif;
2591
2592 rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
2593 }
2594
2595 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2596 if (!skb) {
2597 dst_release(&rt->dst);
2598 err = -ENOBUFS;
2599 goto errout;
2600 }
2601
2602 /* Reserve room for dummy headers, this skb can pass
2603 through good chunk of routing engine.
2604 */
2605 skb_reset_mac_header(skb);
2606 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2607
2608 skb_dst_set(skb, &rt->dst);
2609
2610 err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
2611 RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
2612 nlh->nlmsg_seq, 0, 0, 0);
2613 if (err < 0) {
2614 kfree_skb(skb);
2615 goto errout;
2616 }
2617
2618 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
2619 errout:
2620 return err;
2621 }
2622
2623 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2624 {
2625 struct sk_buff *skb;
2626 struct net *net = info->nl_net;
2627 u32 seq;
2628 int err;
2629
2630 err = -ENOBUFS;
2631 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
2632
2633 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2634 if (!skb)
2635 goto errout;
2636
2637 err = rt6_fill_node(net, skb, rt, NULL, NULL, 0,
2638 event, info->portid, seq, 0, 0, 0);
2639 if (err < 0) {
2640 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2641 WARN_ON(err == -EMSGSIZE);
2642 kfree_skb(skb);
2643 goto errout;
2644 }
2645 rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
2646 info->nlh, gfp_any());
2647 return;
2648 errout:
2649 if (err < 0)
2650 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2651 }
2652
2653 static int ip6_route_dev_notify(struct notifier_block *this,
2654 unsigned long event, void *data)
2655 {
2656 struct net_device *dev = (struct net_device *)data;
2657 struct net *net = dev_net(dev);
2658
2659 if (event == NETDEV_REGISTER && (dev->flags & IFF_LOOPBACK)) {
2660 net->ipv6.ip6_null_entry->dst.dev = dev;
2661 net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
2662 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2663 net->ipv6.ip6_prohibit_entry->dst.dev = dev;
2664 net->ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(dev);
2665 net->ipv6.ip6_blk_hole_entry->dst.dev = dev;
2666 net->ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(dev);
2667 #endif
2668 }
2669
2670 return NOTIFY_OK;
2671 }
2672
2673 /*
2674 * /proc
2675 */
2676
2677 #ifdef CONFIG_PROC_FS
2678
2679 struct rt6_proc_arg
2680 {
2681 char *buffer;
2682 int offset;
2683 int length;
2684 int skip;
2685 int len;
2686 };
2687
2688 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2689 {
2690 struct seq_file *m = p_arg;
2691 struct neighbour *n;
2692
2693 seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen);
2694
2695 #ifdef CONFIG_IPV6_SUBTREES
2696 seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen);
2697 #else
2698 seq_puts(m, "00000000000000000000000000000000 00 ");
2699 #endif
2700 n = rt->n;
2701 if (n) {
2702 seq_printf(m, "%pi6", n->primary_key);
2703 } else {
2704 seq_puts(m, "00000000000000000000000000000000");
2705 }
2706 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2707 rt->rt6i_metric, atomic_read(&rt->dst.__refcnt),
2708 rt->dst.__use, rt->rt6i_flags,
2709 rt->dst.dev ? rt->dst.dev->name : "");
2710 return 0;
2711 }
2712
2713 static int ipv6_route_show(struct seq_file *m, void *v)
2714 {
2715 struct net *net = (struct net *)m->private;
2716 fib6_clean_all_ro(net, rt6_info_route, 0, m);
2717 return 0;
2718 }
2719
2720 static int ipv6_route_open(struct inode *inode, struct file *file)
2721 {
2722 return single_open_net(inode, file, ipv6_route_show);
2723 }
2724
2725 static const struct file_operations ipv6_route_proc_fops = {
2726 .owner = THIS_MODULE,
2727 .open = ipv6_route_open,
2728 .read = seq_read,
2729 .llseek = seq_lseek,
2730 .release = single_release_net,
2731 };
2732
2733 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2734 {
2735 struct net *net = (struct net *)seq->private;
2736 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2737 net->ipv6.rt6_stats->fib_nodes,
2738 net->ipv6.rt6_stats->fib_route_nodes,
2739 net->ipv6.rt6_stats->fib_rt_alloc,
2740 net->ipv6.rt6_stats->fib_rt_entries,
2741 net->ipv6.rt6_stats->fib_rt_cache,
2742 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2743 net->ipv6.rt6_stats->fib_discarded_routes);
2744
2745 return 0;
2746 }
2747
2748 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2749 {
2750 return single_open_net(inode, file, rt6_stats_seq_show);
2751 }
2752
2753 static const struct file_operations rt6_stats_seq_fops = {
2754 .owner = THIS_MODULE,
2755 .open = rt6_stats_seq_open,
2756 .read = seq_read,
2757 .llseek = seq_lseek,
2758 .release = single_release_net,
2759 };
2760 #endif /* CONFIG_PROC_FS */
2761
2762 #ifdef CONFIG_SYSCTL
2763
2764 static
2765 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
2766 void __user *buffer, size_t *lenp, loff_t *ppos)
2767 {
2768 struct net *net;
2769 int delay;
2770 if (!write)
2771 return -EINVAL;
2772
2773 net = (struct net *)ctl->extra1;
2774 delay = net->ipv6.sysctl.flush_delay;
2775 proc_dointvec(ctl, write, buffer, lenp, ppos);
2776 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2777 return 0;
2778 }
2779
2780 ctl_table ipv6_route_table_template[] = {
2781 {
2782 .procname = "flush",
2783 .data = &init_net.ipv6.sysctl.flush_delay,
2784 .maxlen = sizeof(int),
2785 .mode = 0200,
2786 .proc_handler = ipv6_sysctl_rtcache_flush
2787 },
2788 {
2789 .procname = "gc_thresh",
2790 .data = &ip6_dst_ops_template.gc_thresh,
2791 .maxlen = sizeof(int),
2792 .mode = 0644,
2793 .proc_handler = proc_dointvec,
2794 },
2795 {
2796 .procname = "max_size",
2797 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2798 .maxlen = sizeof(int),
2799 .mode = 0644,
2800 .proc_handler = proc_dointvec,
2801 },
2802 {
2803 .procname = "gc_min_interval",
2804 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2805 .maxlen = sizeof(int),
2806 .mode = 0644,
2807 .proc_handler = proc_dointvec_jiffies,
2808 },
2809 {
2810 .procname = "gc_timeout",
2811 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2812 .maxlen = sizeof(int),
2813 .mode = 0644,
2814 .proc_handler = proc_dointvec_jiffies,
2815 },
2816 {
2817 .procname = "gc_interval",
2818 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2819 .maxlen = sizeof(int),
2820 .mode = 0644,
2821 .proc_handler = proc_dointvec_jiffies,
2822 },
2823 {
2824 .procname = "gc_elasticity",
2825 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2826 .maxlen = sizeof(int),
2827 .mode = 0644,
2828 .proc_handler = proc_dointvec,
2829 },
2830 {
2831 .procname = "mtu_expires",
2832 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2833 .maxlen = sizeof(int),
2834 .mode = 0644,
2835 .proc_handler = proc_dointvec_jiffies,
2836 },
2837 {
2838 .procname = "min_adv_mss",
2839 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2840 .maxlen = sizeof(int),
2841 .mode = 0644,
2842 .proc_handler = proc_dointvec,
2843 },
2844 {
2845 .procname = "gc_min_interval_ms",
2846 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2847 .maxlen = sizeof(int),
2848 .mode = 0644,
2849 .proc_handler = proc_dointvec_ms_jiffies,
2850 },
2851 { }
2852 };
2853
2854 struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
2855 {
2856 struct ctl_table *table;
2857
2858 table = kmemdup(ipv6_route_table_template,
2859 sizeof(ipv6_route_table_template),
2860 GFP_KERNEL);
2861
2862 if (table) {
2863 table[0].data = &net->ipv6.sysctl.flush_delay;
2864 table[0].extra1 = net;
2865 table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
2866 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2867 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2868 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2869 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2870 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2871 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2872 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2873 table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2874 }
2875
2876 return table;
2877 }
2878 #endif
2879
2880 static int __net_init ip6_route_net_init(struct net *net)
2881 {
2882 int ret = -ENOMEM;
2883
2884 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2885 sizeof(net->ipv6.ip6_dst_ops));
2886
2887 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2888 goto out_ip6_dst_ops;
2889
2890 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2891 sizeof(*net->ipv6.ip6_null_entry),
2892 GFP_KERNEL);
2893 if (!net->ipv6.ip6_null_entry)
2894 goto out_ip6_dst_entries;
2895 net->ipv6.ip6_null_entry->dst.path =
2896 (struct dst_entry *)net->ipv6.ip6_null_entry;
2897 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2898 dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
2899 ip6_template_metrics, true);
2900
2901 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2902 net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2903 sizeof(*net->ipv6.ip6_prohibit_entry),
2904 GFP_KERNEL);
2905 if (!net->ipv6.ip6_prohibit_entry)
2906 goto out_ip6_null_entry;
2907 net->ipv6.ip6_prohibit_entry->dst.path =
2908 (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
2909 net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2910 dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
2911 ip6_template_metrics, true);
2912
2913 net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2914 sizeof(*net->ipv6.ip6_blk_hole_entry),
2915 GFP_KERNEL);
2916 if (!net->ipv6.ip6_blk_hole_entry)
2917 goto out_ip6_prohibit_entry;
2918 net->ipv6.ip6_blk_hole_entry->dst.path =
2919 (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
2920 net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
2921 dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
2922 ip6_template_metrics, true);
2923 #endif
2924
2925 net->ipv6.sysctl.flush_delay = 0;
2926 net->ipv6.sysctl.ip6_rt_max_size = 4096;
2927 net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
2928 net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
2929 net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
2930 net->ipv6.sysctl.ip6_rt_gc_elasticity = 9;
2931 net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
2932 net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
2933
2934 net->ipv6.ip6_rt_gc_expire = 30*HZ;
2935
2936 ret = 0;
2937 out:
2938 return ret;
2939
2940 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2941 out_ip6_prohibit_entry:
2942 kfree(net->ipv6.ip6_prohibit_entry);
2943 out_ip6_null_entry:
2944 kfree(net->ipv6.ip6_null_entry);
2945 #endif
2946 out_ip6_dst_entries:
2947 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2948 out_ip6_dst_ops:
2949 goto out;
2950 }
2951
2952 static void __net_exit ip6_route_net_exit(struct net *net)
2953 {
2954 kfree(net->ipv6.ip6_null_entry);
2955 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2956 kfree(net->ipv6.ip6_prohibit_entry);
2957 kfree(net->ipv6.ip6_blk_hole_entry);
2958 #endif
2959 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2960 }
2961
2962 static int __net_init ip6_route_net_init_late(struct net *net)
2963 {
2964 #ifdef CONFIG_PROC_FS
2965 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2966 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2967 #endif
2968 return 0;
2969 }
2970
2971 static void __net_exit ip6_route_net_exit_late(struct net *net)
2972 {
2973 #ifdef CONFIG_PROC_FS
2974 proc_net_remove(net, "ipv6_route");
2975 proc_net_remove(net, "rt6_stats");
2976 #endif
2977 }
2978
2979 static struct pernet_operations ip6_route_net_ops = {
2980 .init = ip6_route_net_init,
2981 .exit = ip6_route_net_exit,
2982 };
2983
2984 static int __net_init ipv6_inetpeer_init(struct net *net)
2985 {
2986 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2987
2988 if (!bp)
2989 return -ENOMEM;
2990 inet_peer_base_init(bp);
2991 net->ipv6.peers = bp;
2992 return 0;
2993 }
2994
2995 static void __net_exit ipv6_inetpeer_exit(struct net *net)
2996 {
2997 struct inet_peer_base *bp = net->ipv6.peers;
2998
2999 net->ipv6.peers = NULL;
3000 inetpeer_invalidate_tree(bp);
3001 kfree(bp);
3002 }
3003
3004 static struct pernet_operations ipv6_inetpeer_ops = {
3005 .init = ipv6_inetpeer_init,
3006 .exit = ipv6_inetpeer_exit,
3007 };
3008
3009 static struct pernet_operations ip6_route_net_late_ops = {
3010 .init = ip6_route_net_init_late,
3011 .exit = ip6_route_net_exit_late,
3012 };
3013
3014 static struct notifier_block ip6_route_dev_notifier = {
3015 .notifier_call = ip6_route_dev_notify,
3016 .priority = 0,
3017 };
3018
3019 int __init ip6_route_init(void)
3020 {
3021 int ret;
3022
3023 ret = -ENOMEM;
3024 ip6_dst_ops_template.kmem_cachep =
3025 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
3026 SLAB_HWCACHE_ALIGN, NULL);
3027 if (!ip6_dst_ops_template.kmem_cachep)
3028 goto out;
3029
3030 ret = dst_entries_init(&ip6_dst_blackhole_ops);
3031 if (ret)
3032 goto out_kmem_cache;
3033
3034 ret = register_pernet_subsys(&ipv6_inetpeer_ops);
3035 if (ret)
3036 goto out_dst_entries;
3037
3038 ret = register_pernet_subsys(&ip6_route_net_ops);
3039 if (ret)
3040 goto out_register_inetpeer;
3041
3042 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
3043
3044 /* Registering of the loopback is done before this portion of code,
3045 * the loopback reference in rt6_info will not be taken, do it
3046 * manually for init_net */
3047 init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
3048 init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3049 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
3050 init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev;
3051 init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3052 init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev;
3053 init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
3054 #endif
3055 ret = fib6_init();
3056 if (ret)
3057 goto out_register_subsys;
3058
3059 ret = xfrm6_init();
3060 if (ret)
3061 goto out_fib6_init;
3062
3063 ret = fib6_rules_init();
3064 if (ret)
3065 goto xfrm6_init;
3066
3067 ret = register_pernet_subsys(&ip6_route_net_late_ops);
3068 if (ret)
3069 goto fib6_rules_init;
3070
3071 ret = -ENOBUFS;
3072 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) ||
3073 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) ||
3074 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL))
3075 goto out_register_late_subsys;
3076
3077 ret = register_netdevice_notifier(&ip6_route_dev_notifier);
3078 if (ret)
3079 goto out_register_late_subsys;
3080
3081 out:
3082 return ret;
3083
3084 out_register_late_subsys:
3085 unregister_pernet_subsys(&ip6_route_net_late_ops);
3086 fib6_rules_init:
3087 fib6_rules_cleanup();
3088 xfrm6_init:
3089 xfrm6_fini();
3090 out_fib6_init:
3091 fib6_gc_cleanup();
3092 out_register_subsys:
3093 unregister_pernet_subsys(&ip6_route_net_ops);
3094 out_register_inetpeer:
3095 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3096 out_dst_entries:
3097 dst_entries_destroy(&ip6_dst_blackhole_ops);
3098 out_kmem_cache:
3099 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3100 goto out;
3101 }
3102
3103 void ip6_route_cleanup(void)
3104 {
3105 unregister_netdevice_notifier(&ip6_route_dev_notifier);
3106 unregister_pernet_subsys(&ip6_route_net_late_ops);
3107 fib6_rules_cleanup();
3108 xfrm6_fini();
3109 fib6_gc_cleanup();
3110 unregister_pernet_subsys(&ipv6_inetpeer_ops);
3111 unregister_pernet_subsys(&ip6_route_net_ops);
3112 dst_entries_destroy(&ip6_dst_blackhole_ops);
3113 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
3114 }