]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/ip_tunnel.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ip_tunnel.c
1 /*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/init.h>
34 #include <linux/in6.h>
35 #include <linux/inetdevice.h>
36 #include <linux/igmp.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/etherdevice.h>
39 #include <linux/if_ether.h>
40 #include <linux/if_vlan.h>
41 #include <linux/rculist.h>
42 #include <linux/err.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57 #include <net/udp.h>
58 #include <net/dst_metadata.h>
59
60 #if IS_ENABLED(CONFIG_IPV6)
61 #include <net/ipv6.h>
62 #include <net/ip6_fib.h>
63 #include <net/ip6_route.h>
64 #endif
65
66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
67 {
68 return hash_32((__force u32)key ^ (__force u32)remote,
69 IP_TNL_HASH_BITS);
70 }
71
72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
73 __be16 flags, __be32 key)
74 {
75 if (p->i_flags & TUNNEL_KEY) {
76 if (flags & TUNNEL_KEY)
77 return key == p->i_key;
78 else
79 /* key expected, none present */
80 return false;
81 } else
82 return !(flags & TUNNEL_KEY);
83 }
84
85 /* Fallback tunnel: no source, no destination, no key, no options
86
87 Tunnel hash table:
88 We require exact key match i.e. if a key is present in packet
89 it will match only tunnel with the same key; if it is not present,
90 it will match only keyless tunnel.
91
92 All keysless packets, if not matched configured keyless tunnels
93 will match fallback tunnel.
94 Given src, dst and key, find appropriate for input tunnel.
95 */
96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
97 int link, __be16 flags,
98 __be32 remote, __be32 local,
99 __be32 key)
100 {
101 unsigned int hash;
102 struct ip_tunnel *t, *cand = NULL;
103 struct hlist_head *head;
104
105 hash = ip_tunnel_hash(key, remote);
106 head = &itn->tunnels[hash];
107
108 hlist_for_each_entry_rcu(t, head, hash_node) {
109 if (local != t->parms.iph.saddr ||
110 remote != t->parms.iph.daddr ||
111 !(t->dev->flags & IFF_UP))
112 continue;
113
114 if (!ip_tunnel_key_match(&t->parms, flags, key))
115 continue;
116
117 if (t->parms.link == link)
118 return t;
119 else
120 cand = t;
121 }
122
123 hlist_for_each_entry_rcu(t, head, hash_node) {
124 if (remote != t->parms.iph.daddr ||
125 t->parms.iph.saddr != 0 ||
126 !(t->dev->flags & IFF_UP))
127 continue;
128
129 if (!ip_tunnel_key_match(&t->parms, flags, key))
130 continue;
131
132 if (t->parms.link == link)
133 return t;
134 else if (!cand)
135 cand = t;
136 }
137
138 hash = ip_tunnel_hash(key, 0);
139 head = &itn->tunnels[hash];
140
141 hlist_for_each_entry_rcu(t, head, hash_node) {
142 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
143 (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
144 continue;
145
146 if (!(t->dev->flags & IFF_UP))
147 continue;
148
149 if (!ip_tunnel_key_match(&t->parms, flags, key))
150 continue;
151
152 if (t->parms.link == link)
153 return t;
154 else if (!cand)
155 cand = t;
156 }
157
158 if (flags & TUNNEL_NO_KEY)
159 goto skip_key_lookup;
160
161 hlist_for_each_entry_rcu(t, head, hash_node) {
162 if (t->parms.i_key != key ||
163 t->parms.iph.saddr != 0 ||
164 t->parms.iph.daddr != 0 ||
165 !(t->dev->flags & IFF_UP))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else if (!cand)
171 cand = t;
172 }
173
174 skip_key_lookup:
175 if (cand)
176 return cand;
177
178 t = rcu_dereference(itn->collect_md_tun);
179 if (t && t->dev->flags & IFF_UP)
180 return t;
181
182 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
183 return netdev_priv(itn->fb_tunnel_dev);
184
185 return NULL;
186 }
187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188
189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
190 struct ip_tunnel_parm *parms)
191 {
192 unsigned int h;
193 __be32 remote;
194 __be32 i_key = parms->i_key;
195
196 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
197 remote = parms->iph.daddr;
198 else
199 remote = 0;
200
201 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202 i_key = 0;
203
204 h = ip_tunnel_hash(i_key, remote);
205 return &itn->tunnels[h];
206 }
207
208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209 {
210 struct hlist_head *head = ip_bucket(itn, &t->parms);
211
212 if (t->collect_md)
213 rcu_assign_pointer(itn->collect_md_tun, t);
214 hlist_add_head_rcu(&t->hash_node, head);
215 }
216
217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 {
219 if (t->collect_md)
220 rcu_assign_pointer(itn->collect_md_tun, NULL);
221 hlist_del_init_rcu(&t->hash_node);
222 }
223
224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
225 struct ip_tunnel_parm *parms,
226 int type)
227 {
228 __be32 remote = parms->iph.daddr;
229 __be32 local = parms->iph.saddr;
230 __be32 key = parms->i_key;
231 __be16 flags = parms->i_flags;
232 int link = parms->link;
233 struct ip_tunnel *t = NULL;
234 struct hlist_head *head = ip_bucket(itn, parms);
235
236 hlist_for_each_entry_rcu(t, head, hash_node) {
237 if (local == t->parms.iph.saddr &&
238 remote == t->parms.iph.daddr &&
239 link == t->parms.link &&
240 type == t->dev->type &&
241 ip_tunnel_key_match(&t->parms, flags, key))
242 break;
243 }
244 return t;
245 }
246
247 static struct net_device *__ip_tunnel_create(struct net *net,
248 const struct rtnl_link_ops *ops,
249 struct ip_tunnel_parm *parms)
250 {
251 int err;
252 struct ip_tunnel *tunnel;
253 struct net_device *dev;
254 char name[IFNAMSIZ];
255
256 err = -E2BIG;
257 if (parms->name[0]) {
258 if (!dev_valid_name(parms->name))
259 goto failed;
260 strlcpy(name, parms->name, IFNAMSIZ);
261 } else {
262 if (strlen(ops->kind) > (IFNAMSIZ - 3))
263 goto failed;
264 strcpy(name, ops->kind);
265 strcat(name, "%d");
266 }
267
268 ASSERT_RTNL();
269 dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
270 if (!dev) {
271 err = -ENOMEM;
272 goto failed;
273 }
274 dev_net_set(dev, net);
275
276 dev->rtnl_link_ops = ops;
277
278 tunnel = netdev_priv(dev);
279 tunnel->parms = *parms;
280 tunnel->net = net;
281
282 err = register_netdevice(dev);
283 if (err)
284 goto failed_free;
285
286 return dev;
287
288 failed_free:
289 free_netdev(dev);
290 failed:
291 return ERR_PTR(err);
292 }
293
294 static inline void init_tunnel_flow(struct flowi4 *fl4,
295 int proto,
296 __be32 daddr, __be32 saddr,
297 __be32 key, __u8 tos, int oif,
298 __u32 mark)
299 {
300 memset(fl4, 0, sizeof(*fl4));
301 fl4->flowi4_oif = oif;
302 fl4->daddr = daddr;
303 fl4->saddr = saddr;
304 fl4->flowi4_tos = tos;
305 fl4->flowi4_proto = proto;
306 fl4->fl4_gre_key = key;
307 fl4->flowi4_mark = mark;
308 }
309
310 static int ip_tunnel_bind_dev(struct net_device *dev)
311 {
312 struct net_device *tdev = NULL;
313 struct ip_tunnel *tunnel = netdev_priv(dev);
314 const struct iphdr *iph;
315 int hlen = LL_MAX_HEADER;
316 int mtu = ETH_DATA_LEN;
317 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
318
319 iph = &tunnel->parms.iph;
320
321 /* Guess output device to choose reasonable mtu and needed_headroom */
322 if (iph->daddr) {
323 struct flowi4 fl4;
324 struct rtable *rt;
325
326 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
327 iph->saddr, tunnel->parms.o_key,
328 RT_TOS(iph->tos), tunnel->parms.link,
329 tunnel->fwmark);
330 rt = ip_route_output_key(tunnel->net, &fl4);
331
332 if (!IS_ERR(rt)) {
333 tdev = rt->dst.dev;
334 ip_rt_put(rt);
335 }
336 if (dev->type != ARPHRD_ETHER)
337 dev->flags |= IFF_POINTOPOINT;
338
339 dst_cache_reset(&tunnel->dst_cache);
340 }
341
342 if (!tdev && tunnel->parms.link)
343 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
344
345 if (tdev) {
346 hlen = tdev->hard_header_len + tdev->needed_headroom;
347 mtu = tdev->mtu;
348 }
349
350 dev->needed_headroom = t_hlen + hlen;
351 mtu -= (dev->hard_header_len + t_hlen);
352
353 if (mtu < IPV4_MIN_MTU)
354 mtu = IPV4_MIN_MTU;
355
356 return mtu;
357 }
358
359 static struct ip_tunnel *ip_tunnel_create(struct net *net,
360 struct ip_tunnel_net *itn,
361 struct ip_tunnel_parm *parms)
362 {
363 struct ip_tunnel *nt;
364 struct net_device *dev;
365 int t_hlen;
366
367 BUG_ON(!itn->fb_tunnel_dev);
368 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
369 if (IS_ERR(dev))
370 return ERR_CAST(dev);
371
372 dev->mtu = ip_tunnel_bind_dev(dev);
373
374 nt = netdev_priv(dev);
375 t_hlen = nt->hlen + sizeof(struct iphdr);
376 dev->min_mtu = ETH_MIN_MTU;
377 dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
378 ip_tunnel_add(itn, nt);
379 return nt;
380 }
381
382 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
383 const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
384 bool log_ecn_error)
385 {
386 struct pcpu_sw_netstats *tstats;
387 const struct iphdr *iph = ip_hdr(skb);
388 int err;
389
390 #ifdef CONFIG_NET_IPGRE_BROADCAST
391 if (ipv4_is_multicast(iph->daddr)) {
392 tunnel->dev->stats.multicast++;
393 skb->pkt_type = PACKET_BROADCAST;
394 }
395 #endif
396
397 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
398 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
399 tunnel->dev->stats.rx_crc_errors++;
400 tunnel->dev->stats.rx_errors++;
401 goto drop;
402 }
403
404 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
405 if (!(tpi->flags&TUNNEL_SEQ) ||
406 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
407 tunnel->dev->stats.rx_fifo_errors++;
408 tunnel->dev->stats.rx_errors++;
409 goto drop;
410 }
411 tunnel->i_seqno = ntohl(tpi->seq) + 1;
412 }
413
414 skb_reset_network_header(skb);
415
416 err = IP_ECN_decapsulate(iph, skb);
417 if (unlikely(err)) {
418 if (log_ecn_error)
419 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
420 &iph->saddr, iph->tos);
421 if (err > 1) {
422 ++tunnel->dev->stats.rx_frame_errors;
423 ++tunnel->dev->stats.rx_errors;
424 goto drop;
425 }
426 }
427
428 tstats = this_cpu_ptr(tunnel->dev->tstats);
429 u64_stats_update_begin(&tstats->syncp);
430 tstats->rx_packets++;
431 tstats->rx_bytes += skb->len;
432 u64_stats_update_end(&tstats->syncp);
433
434 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
435
436 if (tunnel->dev->type == ARPHRD_ETHER) {
437 skb->protocol = eth_type_trans(skb, tunnel->dev);
438 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
439 } else {
440 skb->dev = tunnel->dev;
441 }
442
443 if (tun_dst)
444 skb_dst_set(skb, (struct dst_entry *)tun_dst);
445
446 gro_cells_receive(&tunnel->gro_cells, skb);
447 return 0;
448
449 drop:
450 if (tun_dst)
451 dst_release((struct dst_entry *)tun_dst);
452 kfree_skb(skb);
453 return 0;
454 }
455 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
456
457 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
458 unsigned int num)
459 {
460 if (num >= MAX_IPTUN_ENCAP_OPS)
461 return -ERANGE;
462
463 return !cmpxchg((const struct ip_tunnel_encap_ops **)
464 &iptun_encaps[num],
465 NULL, ops) ? 0 : -1;
466 }
467 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
468
469 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
470 unsigned int num)
471 {
472 int ret;
473
474 if (num >= MAX_IPTUN_ENCAP_OPS)
475 return -ERANGE;
476
477 ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
478 &iptun_encaps[num],
479 ops, NULL) == ops) ? 0 : -1;
480
481 synchronize_net();
482
483 return ret;
484 }
485 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
486
487 int ip_tunnel_encap_setup(struct ip_tunnel *t,
488 struct ip_tunnel_encap *ipencap)
489 {
490 int hlen;
491
492 memset(&t->encap, 0, sizeof(t->encap));
493
494 hlen = ip_encap_hlen(ipencap);
495 if (hlen < 0)
496 return hlen;
497
498 t->encap.type = ipencap->type;
499 t->encap.sport = ipencap->sport;
500 t->encap.dport = ipencap->dport;
501 t->encap.flags = ipencap->flags;
502
503 t->encap_hlen = hlen;
504 t->hlen = t->encap_hlen + t->tun_hlen;
505
506 return 0;
507 }
508 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
509
510 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
511 struct rtable *rt, __be16 df,
512 const struct iphdr *inner_iph)
513 {
514 struct ip_tunnel *tunnel = netdev_priv(dev);
515 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
516 int mtu;
517
518 if (df)
519 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
520 - sizeof(struct iphdr) - tunnel->hlen;
521 else
522 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
523
524 skb_dst_update_pmtu_no_confirm(skb, mtu);
525
526 if (skb->protocol == htons(ETH_P_IP)) {
527 if (!skb_is_gso(skb) &&
528 (inner_iph->frag_off & htons(IP_DF)) &&
529 mtu < pkt_size) {
530 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
531 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
532 return -E2BIG;
533 }
534 }
535 #if IS_ENABLED(CONFIG_IPV6)
536 else if (skb->protocol == htons(ETH_P_IPV6)) {
537 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
538
539 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
540 mtu >= IPV6_MIN_MTU) {
541 if ((tunnel->parms.iph.daddr &&
542 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
543 rt6->rt6i_dst.plen == 128) {
544 rt6->rt6i_flags |= RTF_MODIFIED;
545 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
546 }
547 }
548
549 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
550 mtu < pkt_size) {
551 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
552 return -E2BIG;
553 }
554 }
555 #endif
556 return 0;
557 }
558
559 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
560 {
561 struct ip_tunnel *tunnel = netdev_priv(dev);
562 u32 headroom = sizeof(struct iphdr);
563 struct ip_tunnel_info *tun_info;
564 const struct ip_tunnel_key *key;
565 const struct iphdr *inner_iph;
566 struct rtable *rt;
567 struct flowi4 fl4;
568 __be16 df = 0;
569 u8 tos, ttl;
570
571 tun_info = skb_tunnel_info(skb);
572 if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573 ip_tunnel_info_af(tun_info) != AF_INET))
574 goto tx_error;
575 key = &tun_info->key;
576 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578 tos = key->tos;
579 if (tos == 1) {
580 if (skb->protocol == htons(ETH_P_IP))
581 tos = inner_iph->tos;
582 else if (skb->protocol == htons(ETH_P_IPV6))
583 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584 }
585 init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
586 RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
587 if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
588 goto tx_error;
589 rt = ip_route_output_key(tunnel->net, &fl4);
590 if (IS_ERR(rt)) {
591 dev->stats.tx_carrier_errors++;
592 goto tx_error;
593 }
594 if (rt->dst.dev == dev) {
595 ip_rt_put(rt);
596 dev->stats.collisions++;
597 goto tx_error;
598 }
599 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
600 ttl = key->ttl;
601 if (ttl == 0) {
602 if (skb->protocol == htons(ETH_P_IP))
603 ttl = inner_iph->ttl;
604 else if (skb->protocol == htons(ETH_P_IPV6))
605 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
606 else
607 ttl = ip4_dst_hoplimit(&rt->dst);
608 }
609 if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
610 df = htons(IP_DF);
611 else if (skb->protocol == htons(ETH_P_IP))
612 df = inner_iph->frag_off & htons(IP_DF);
613 headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
614 if (headroom > dev->needed_headroom)
615 dev->needed_headroom = headroom;
616
617 if (skb_cow_head(skb, dev->needed_headroom)) {
618 ip_rt_put(rt);
619 goto tx_dropped;
620 }
621 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
622 df, !net_eq(tunnel->net, dev_net(dev)));
623 return;
624 tx_error:
625 dev->stats.tx_errors++;
626 goto kfree;
627 tx_dropped:
628 dev->stats.tx_dropped++;
629 kfree:
630 kfree_skb(skb);
631 }
632 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
633
634 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
635 const struct iphdr *tnl_params, u8 protocol)
636 {
637 struct ip_tunnel *tunnel = netdev_priv(dev);
638 const struct iphdr *inner_iph;
639 struct flowi4 fl4;
640 u8 tos, ttl;
641 __be16 df;
642 struct rtable *rt; /* Route to the other host */
643 unsigned int max_headroom; /* The extra header space needed */
644 __be32 dst;
645 bool connected;
646
647 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
648 connected = (tunnel->parms.iph.daddr != 0);
649
650 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
651
652 dst = tnl_params->daddr;
653 if (dst == 0) {
654 /* NBMA tunnel */
655 struct ip_tunnel_info *tun_info;
656
657 if (!skb_dst(skb)) {
658 dev->stats.tx_fifo_errors++;
659 goto tx_error;
660 }
661
662 tun_info = skb_tunnel_info(skb);
663 if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
664 ip_tunnel_info_af(tun_info) == AF_INET &&
665 tun_info->key.u.ipv4.dst)
666 dst = tun_info->key.u.ipv4.dst;
667 else if (skb->protocol == htons(ETH_P_IP)) {
668 rt = skb_rtable(skb);
669 dst = rt_nexthop(rt, inner_iph->daddr);
670 }
671 #if IS_ENABLED(CONFIG_IPV6)
672 else if (skb->protocol == htons(ETH_P_IPV6)) {
673 const struct in6_addr *addr6;
674 struct neighbour *neigh;
675 bool do_tx_error_icmp;
676 int addr_type;
677
678 neigh = dst_neigh_lookup(skb_dst(skb),
679 &ipv6_hdr(skb)->daddr);
680 if (!neigh)
681 goto tx_error;
682
683 addr6 = (const struct in6_addr *)&neigh->primary_key;
684 addr_type = ipv6_addr_type(addr6);
685
686 if (addr_type == IPV6_ADDR_ANY) {
687 addr6 = &ipv6_hdr(skb)->daddr;
688 addr_type = ipv6_addr_type(addr6);
689 }
690
691 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
692 do_tx_error_icmp = true;
693 else {
694 do_tx_error_icmp = false;
695 dst = addr6->s6_addr32[3];
696 }
697 neigh_release(neigh);
698 if (do_tx_error_icmp)
699 goto tx_error_icmp;
700 }
701 #endif
702 else
703 goto tx_error;
704
705 connected = false;
706 }
707
708 tos = tnl_params->tos;
709 if (tos & 0x1) {
710 tos &= ~0x1;
711 if (skb->protocol == htons(ETH_P_IP)) {
712 tos = inner_iph->tos;
713 connected = false;
714 } else if (skb->protocol == htons(ETH_P_IPV6)) {
715 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
716 connected = false;
717 }
718 }
719
720 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
721 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
722 tunnel->fwmark);
723
724 if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
725 goto tx_error;
726
727 rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
728 NULL;
729
730 if (!rt) {
731 rt = ip_route_output_key(tunnel->net, &fl4);
732
733 if (IS_ERR(rt)) {
734 dev->stats.tx_carrier_errors++;
735 goto tx_error;
736 }
737 if (connected)
738 dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
739 fl4.saddr);
740 }
741
742 if (rt->dst.dev == dev) {
743 ip_rt_put(rt);
744 dev->stats.collisions++;
745 goto tx_error;
746 }
747
748 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
749 ip_rt_put(rt);
750 goto tx_error;
751 }
752
753 if (tunnel->err_count > 0) {
754 if (time_before(jiffies,
755 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
756 tunnel->err_count--;
757
758 dst_link_failure(skb);
759 } else
760 tunnel->err_count = 0;
761 }
762
763 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
764 ttl = tnl_params->ttl;
765 if (ttl == 0) {
766 if (skb->protocol == htons(ETH_P_IP))
767 ttl = inner_iph->ttl;
768 #if IS_ENABLED(CONFIG_IPV6)
769 else if (skb->protocol == htons(ETH_P_IPV6))
770 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
771 #endif
772 else
773 ttl = ip4_dst_hoplimit(&rt->dst);
774 }
775
776 df = tnl_params->frag_off;
777 if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
778 df |= (inner_iph->frag_off&htons(IP_DF));
779
780 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
781 + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
782 if (max_headroom > dev->needed_headroom)
783 dev->needed_headroom = max_headroom;
784
785 if (skb_cow_head(skb, dev->needed_headroom)) {
786 ip_rt_put(rt);
787 dev->stats.tx_dropped++;
788 kfree_skb(skb);
789 return;
790 }
791
792 iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
793 df, !net_eq(tunnel->net, dev_net(dev)));
794 return;
795
796 #if IS_ENABLED(CONFIG_IPV6)
797 tx_error_icmp:
798 dst_link_failure(skb);
799 #endif
800 tx_error:
801 dev->stats.tx_errors++;
802 kfree_skb(skb);
803 }
804 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
805
806 static void ip_tunnel_update(struct ip_tunnel_net *itn,
807 struct ip_tunnel *t,
808 struct net_device *dev,
809 struct ip_tunnel_parm *p,
810 bool set_mtu,
811 __u32 fwmark)
812 {
813 ip_tunnel_del(itn, t);
814 t->parms.iph.saddr = p->iph.saddr;
815 t->parms.iph.daddr = p->iph.daddr;
816 t->parms.i_key = p->i_key;
817 t->parms.o_key = p->o_key;
818 if (dev->type != ARPHRD_ETHER) {
819 memcpy(dev->dev_addr, &p->iph.saddr, 4);
820 memcpy(dev->broadcast, &p->iph.daddr, 4);
821 }
822 ip_tunnel_add(itn, t);
823
824 t->parms.iph.ttl = p->iph.ttl;
825 t->parms.iph.tos = p->iph.tos;
826 t->parms.iph.frag_off = p->iph.frag_off;
827
828 if (t->parms.link != p->link || t->fwmark != fwmark) {
829 int mtu;
830
831 t->parms.link = p->link;
832 t->fwmark = fwmark;
833 mtu = ip_tunnel_bind_dev(dev);
834 if (set_mtu)
835 dev->mtu = mtu;
836 }
837 dst_cache_reset(&t->dst_cache);
838 netdev_state_change(dev);
839 }
840
841 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
842 {
843 int err = 0;
844 struct ip_tunnel *t = netdev_priv(dev);
845 struct net *net = t->net;
846 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
847
848 BUG_ON(!itn->fb_tunnel_dev);
849 switch (cmd) {
850 case SIOCGETTUNNEL:
851 if (dev == itn->fb_tunnel_dev) {
852 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
853 if (!t)
854 t = netdev_priv(dev);
855 }
856 memcpy(p, &t->parms, sizeof(*p));
857 break;
858
859 case SIOCADDTUNNEL:
860 case SIOCCHGTUNNEL:
861 err = -EPERM;
862 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
863 goto done;
864 if (p->iph.ttl)
865 p->iph.frag_off |= htons(IP_DF);
866 if (!(p->i_flags & VTI_ISVTI)) {
867 if (!(p->i_flags & TUNNEL_KEY))
868 p->i_key = 0;
869 if (!(p->o_flags & TUNNEL_KEY))
870 p->o_key = 0;
871 }
872
873 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
874
875 if (cmd == SIOCADDTUNNEL) {
876 if (!t) {
877 t = ip_tunnel_create(net, itn, p);
878 err = PTR_ERR_OR_ZERO(t);
879 break;
880 }
881
882 err = -EEXIST;
883 break;
884 }
885 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
886 if (t) {
887 if (t->dev != dev) {
888 err = -EEXIST;
889 break;
890 }
891 } else {
892 unsigned int nflags = 0;
893
894 if (ipv4_is_multicast(p->iph.daddr))
895 nflags = IFF_BROADCAST;
896 else if (p->iph.daddr)
897 nflags = IFF_POINTOPOINT;
898
899 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
900 err = -EINVAL;
901 break;
902 }
903
904 t = netdev_priv(dev);
905 }
906 }
907
908 if (t) {
909 err = 0;
910 ip_tunnel_update(itn, t, dev, p, true, 0);
911 } else {
912 err = -ENOENT;
913 }
914 break;
915
916 case SIOCDELTUNNEL:
917 err = -EPERM;
918 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
919 goto done;
920
921 if (dev == itn->fb_tunnel_dev) {
922 err = -ENOENT;
923 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
924 if (!t)
925 goto done;
926 err = -EPERM;
927 if (t == netdev_priv(itn->fb_tunnel_dev))
928 goto done;
929 dev = t->dev;
930 }
931 unregister_netdevice(dev);
932 err = 0;
933 break;
934
935 default:
936 err = -EINVAL;
937 }
938
939 done:
940 return err;
941 }
942 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
943
944 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
945 {
946 struct ip_tunnel *tunnel = netdev_priv(dev);
947 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
948 int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
949
950 if (new_mtu < ETH_MIN_MTU)
951 return -EINVAL;
952
953 if (new_mtu > max_mtu) {
954 if (strict)
955 return -EINVAL;
956
957 new_mtu = max_mtu;
958 }
959
960 dev->mtu = new_mtu;
961 return 0;
962 }
963 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
964
965 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
966 {
967 return __ip_tunnel_change_mtu(dev, new_mtu, true);
968 }
969 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
970
971 static void ip_tunnel_dev_free(struct net_device *dev)
972 {
973 struct ip_tunnel *tunnel = netdev_priv(dev);
974
975 gro_cells_destroy(&tunnel->gro_cells);
976 dst_cache_destroy(&tunnel->dst_cache);
977 free_percpu(dev->tstats);
978 }
979
980 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
981 {
982 struct ip_tunnel *tunnel = netdev_priv(dev);
983 struct ip_tunnel_net *itn;
984
985 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
986
987 if (itn->fb_tunnel_dev != dev) {
988 ip_tunnel_del(itn, netdev_priv(dev));
989 unregister_netdevice_queue(dev, head);
990 }
991 }
992 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
993
994 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
995 {
996 struct ip_tunnel *tunnel = netdev_priv(dev);
997
998 return tunnel->net;
999 }
1000 EXPORT_SYMBOL(ip_tunnel_get_link_net);
1001
1002 int ip_tunnel_get_iflink(const struct net_device *dev)
1003 {
1004 struct ip_tunnel *tunnel = netdev_priv(dev);
1005
1006 return tunnel->parms.link;
1007 }
1008 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1009
1010 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1011 struct rtnl_link_ops *ops, char *devname)
1012 {
1013 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1014 struct ip_tunnel_parm parms;
1015 unsigned int i;
1016
1017 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1018 INIT_HLIST_HEAD(&itn->tunnels[i]);
1019
1020 if (!ops) {
1021 itn->fb_tunnel_dev = NULL;
1022 return 0;
1023 }
1024
1025 memset(&parms, 0, sizeof(parms));
1026 if (devname)
1027 strlcpy(parms.name, devname, IFNAMSIZ);
1028
1029 rtnl_lock();
1030 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1031 /* FB netdevice is special: we have one, and only one per netns.
1032 * Allowing to move it to another netns is clearly unsafe.
1033 */
1034 if (!IS_ERR(itn->fb_tunnel_dev)) {
1035 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1036 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1037 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1038 }
1039 rtnl_unlock();
1040
1041 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1042 }
1043 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1044
1045 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1046 struct rtnl_link_ops *ops)
1047 {
1048 struct net *net = dev_net(itn->fb_tunnel_dev);
1049 struct net_device *dev, *aux;
1050 int h;
1051
1052 for_each_netdev_safe(net, dev, aux)
1053 if (dev->rtnl_link_ops == ops)
1054 unregister_netdevice_queue(dev, head);
1055
1056 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1057 struct ip_tunnel *t;
1058 struct hlist_node *n;
1059 struct hlist_head *thead = &itn->tunnels[h];
1060
1061 hlist_for_each_entry_safe(t, n, thead, hash_node)
1062 /* If dev is in the same netns, it has already
1063 * been added to the list by the previous loop.
1064 */
1065 if (!net_eq(dev_net(t->dev), net))
1066 unregister_netdevice_queue(t->dev, head);
1067 }
1068 }
1069
1070 void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
1071 struct rtnl_link_ops *ops)
1072 {
1073 struct ip_tunnel_net *itn;
1074 struct net *net;
1075 LIST_HEAD(list);
1076
1077 rtnl_lock();
1078 list_for_each_entry(net, net_list, exit_list) {
1079 itn = net_generic(net, id);
1080 ip_tunnel_destroy(itn, &list, ops);
1081 }
1082 unregister_netdevice_many(&list);
1083 rtnl_unlock();
1084 }
1085 EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
1086
1087 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1088 struct ip_tunnel_parm *p, __u32 fwmark)
1089 {
1090 struct ip_tunnel *nt;
1091 struct net *net = dev_net(dev);
1092 struct ip_tunnel_net *itn;
1093 int mtu;
1094 int err;
1095
1096 nt = netdev_priv(dev);
1097 itn = net_generic(net, nt->ip_tnl_net_id);
1098
1099 if (nt->collect_md) {
1100 if (rtnl_dereference(itn->collect_md_tun))
1101 return -EEXIST;
1102 } else {
1103 if (ip_tunnel_find(itn, p, dev->type))
1104 return -EEXIST;
1105 }
1106
1107 nt->net = net;
1108 nt->parms = *p;
1109 nt->fwmark = fwmark;
1110 err = register_netdevice(dev);
1111 if (err)
1112 goto out;
1113
1114 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1115 eth_hw_addr_random(dev);
1116
1117 mtu = ip_tunnel_bind_dev(dev);
1118 if (tb[IFLA_MTU]) {
1119 unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
1120
1121 dev->mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
1122 (unsigned int)(max - sizeof(struct iphdr)));
1123 } else {
1124 dev->mtu = mtu;
1125 }
1126
1127 ip_tunnel_add(itn, nt);
1128 out:
1129 return err;
1130 }
1131 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1132
1133 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1134 struct ip_tunnel_parm *p, __u32 fwmark)
1135 {
1136 struct ip_tunnel *t;
1137 struct ip_tunnel *tunnel = netdev_priv(dev);
1138 struct net *net = tunnel->net;
1139 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1140
1141 if (dev == itn->fb_tunnel_dev)
1142 return fan_has_map(&tunnel->fan) ? 0 : -EINVAL;
1143
1144 t = ip_tunnel_find(itn, p, dev->type);
1145
1146 if (t) {
1147 if (t->dev != dev)
1148 return -EEXIST;
1149 } else {
1150 t = tunnel;
1151
1152 if (dev->type != ARPHRD_ETHER) {
1153 unsigned int nflags = 0;
1154
1155 if (ipv4_is_multicast(p->iph.daddr))
1156 nflags = IFF_BROADCAST;
1157 else if (p->iph.daddr)
1158 nflags = IFF_POINTOPOINT;
1159
1160 if ((dev->flags ^ nflags) &
1161 (IFF_POINTOPOINT | IFF_BROADCAST))
1162 return -EINVAL;
1163 }
1164 }
1165
1166 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1167 return 0;
1168 }
1169 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1170
1171 int ip_tunnel_init(struct net_device *dev)
1172 {
1173 struct ip_tunnel *tunnel = netdev_priv(dev);
1174 struct iphdr *iph = &tunnel->parms.iph;
1175 int err;
1176
1177 dev->needs_free_netdev = true;
1178 dev->priv_destructor = ip_tunnel_dev_free;
1179 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1180 if (!dev->tstats)
1181 return -ENOMEM;
1182
1183 err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1184 if (err) {
1185 free_percpu(dev->tstats);
1186 return err;
1187 }
1188
1189 err = gro_cells_init(&tunnel->gro_cells, dev);
1190 if (err) {
1191 dst_cache_destroy(&tunnel->dst_cache);
1192 free_percpu(dev->tstats);
1193 return err;
1194 }
1195
1196 tunnel->dev = dev;
1197 tunnel->net = dev_net(dev);
1198 strcpy(tunnel->parms.name, dev->name);
1199 iph->version = 4;
1200 iph->ihl = 5;
1201
1202 if (tunnel->collect_md)
1203 netif_keep_dst(dev);
1204 return 0;
1205 }
1206 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1207
1208 void ip_tunnel_uninit(struct net_device *dev)
1209 {
1210 struct ip_tunnel *tunnel = netdev_priv(dev);
1211 struct net *net = tunnel->net;
1212 struct ip_tunnel_net *itn;
1213
1214 itn = net_generic(net, tunnel->ip_tnl_net_id);
1215 /* fb_tunnel_dev will be unregisted in net-exit call. */
1216 if (itn->fb_tunnel_dev != dev)
1217 ip_tunnel_del(itn, netdev_priv(dev));
1218
1219 dst_cache_reset(&tunnel->dst_cache);
1220 }
1221 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1222
1223 /* Do least required initialization, rest of init is done in tunnel_init call */
1224 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1225 {
1226 struct ip_tunnel *tunnel = netdev_priv(dev);
1227 tunnel->ip_tnl_net_id = net_id;
1228 }
1229 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1230
1231 MODULE_LICENSE("GPL");