]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/ip_tunnel.c
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net...
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
9a4aa9af
TH
71static inline void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
76 if (dst && (dst->flags & DST_NOCACHE))
77 dst = NULL;
78
9a4aa9af
TH
79 spin_lock_bh(&idst->lock);
80 old_dst = rcu_dereference(idst->dst);
81 rcu_assign_pointer(idst->dst, dst);
7d442fab 82 dst_release(old_dst);
9a4aa9af 83 spin_unlock_bh(&idst->lock);
7d442fab
TH
84}
85
86static inline void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
87{
9a4aa9af 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
7d442fab
TH
89}
90
91static inline void tunnel_dst_reset(struct ip_tunnel *t)
92{
93 tunnel_dst_set(t, NULL);
94}
95
9a4aa9af
TH
96static void tunnel_dst_reset_all(struct ip_tunnel *t)
97{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
103
7d442fab
TH
104static inline struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
105{
106 struct dst_entry *dst;
107
108 rcu_read_lock();
9a4aa9af 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
7d442fab
TH
110 if (dst)
111 dst_hold(dst);
112 rcu_read_unlock();
113 return dst;
114}
115
116struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
117{
118 struct dst_entry *dst = tunnel_dst_get(t);
119
120 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
121 tunnel_dst_reset(t);
122 return NULL;
123 }
124
125 return dst;
126}
127
c5441932
PS
128/* Often modified stats are per cpu, other are shared (netdev->stats) */
129struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
130 struct rtnl_link_stats64 *tot)
131{
132 int i;
133
134 for_each_possible_cpu(i) {
135 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
136 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
137 unsigned int start;
138
139 do {
140 start = u64_stats_fetch_begin_bh(&tstats->syncp);
141 rx_packets = tstats->rx_packets;
142 tx_packets = tstats->tx_packets;
143 rx_bytes = tstats->rx_bytes;
144 tx_bytes = tstats->tx_bytes;
145 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
146
147 tot->rx_packets += rx_packets;
148 tot->tx_packets += tx_packets;
149 tot->rx_bytes += rx_bytes;
150 tot->tx_bytes += tx_bytes;
151 }
152
153 tot->multicast = dev->stats.multicast;
154
155 tot->rx_crc_errors = dev->stats.rx_crc_errors;
156 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
157 tot->rx_length_errors = dev->stats.rx_length_errors;
158 tot->rx_frame_errors = dev->stats.rx_frame_errors;
159 tot->rx_errors = dev->stats.rx_errors;
160
161 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
162 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
163 tot->tx_dropped = dev->stats.tx_dropped;
164 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
165 tot->tx_errors = dev->stats.tx_errors;
166
167 tot->collisions = dev->stats.collisions;
168
169 return tot;
170}
171EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
172
173static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
174 __be16 flags, __be32 key)
175{
176 if (p->i_flags & TUNNEL_KEY) {
177 if (flags & TUNNEL_KEY)
178 return key == p->i_key;
179 else
180 /* key expected, none present */
181 return false;
182 } else
183 return !(flags & TUNNEL_KEY);
184}
185
186/* Fallback tunnel: no source, no destination, no key, no options
187
188 Tunnel hash table:
189 We require exact key match i.e. if a key is present in packet
190 it will match only tunnel with the same key; if it is not present,
191 it will match only keyless tunnel.
192
193 All keysless packets, if not matched configured keyless tunnels
194 will match fallback tunnel.
195 Given src, dst and key, find appropriate for input tunnel.
196*/
197struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
198 int link, __be16 flags,
199 __be32 remote, __be32 local,
200 __be32 key)
201{
202 unsigned int hash;
203 struct ip_tunnel *t, *cand = NULL;
204 struct hlist_head *head;
205
206 hash = ip_tunnel_hash(itn, key, remote);
207 head = &itn->tunnels[hash];
208
209 hlist_for_each_entry_rcu(t, head, hash_node) {
210 if (local != t->parms.iph.saddr ||
211 remote != t->parms.iph.daddr ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (!ip_tunnel_key_match(&t->parms, flags, key))
216 continue;
217
218 if (t->parms.link == link)
219 return t;
220 else
221 cand = t;
222 }
223
224 hlist_for_each_entry_rcu(t, head, hash_node) {
225 if (remote != t->parms.iph.daddr ||
226 !(t->dev->flags & IFF_UP))
227 continue;
228
229 if (!ip_tunnel_key_match(&t->parms, flags, key))
230 continue;
231
232 if (t->parms.link == link)
233 return t;
234 else if (!cand)
235 cand = t;
236 }
237
238 hash = ip_tunnel_hash(itn, key, 0);
239 head = &itn->tunnels[hash];
240
241 hlist_for_each_entry_rcu(t, head, hash_node) {
242 if ((local != t->parms.iph.saddr &&
243 (local != t->parms.iph.daddr ||
244 !ipv4_is_multicast(local))) ||
245 !(t->dev->flags & IFF_UP))
246 continue;
247
248 if (!ip_tunnel_key_match(&t->parms, flags, key))
249 continue;
250
251 if (t->parms.link == link)
252 return t;
253 else if (!cand)
254 cand = t;
255 }
256
257 if (flags & TUNNEL_NO_KEY)
258 goto skip_key_lookup;
259
260 hlist_for_each_entry_rcu(t, head, hash_node) {
261 if (t->parms.i_key != key ||
262 !(t->dev->flags & IFF_UP))
263 continue;
264
265 if (t->parms.link == link)
266 return t;
267 else if (!cand)
268 cand = t;
269 }
270
271skip_key_lookup:
272 if (cand)
273 return cand;
274
275 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
276 return netdev_priv(itn->fb_tunnel_dev);
277
278
279 return NULL;
280}
281EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
282
283static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
284 struct ip_tunnel_parm *parms)
285{
286 unsigned int h;
287 __be32 remote;
288
289 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
290 remote = parms->iph.daddr;
291 else
292 remote = 0;
293
294 h = ip_tunnel_hash(itn, parms->i_key, remote);
295 return &itn->tunnels[h];
296}
297
298static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
299{
300 struct hlist_head *head = ip_bucket(itn, &t->parms);
301
302 hlist_add_head_rcu(&t->hash_node, head);
303}
304
305static void ip_tunnel_del(struct ip_tunnel *t)
306{
307 hlist_del_init_rcu(&t->hash_node);
308}
309
310static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
311 struct ip_tunnel_parm *parms,
312 int type)
313{
314 __be32 remote = parms->iph.daddr;
315 __be32 local = parms->iph.saddr;
316 __be32 key = parms->i_key;
317 int link = parms->link;
318 struct ip_tunnel *t = NULL;
319 struct hlist_head *head = ip_bucket(itn, parms);
320
321 hlist_for_each_entry_rcu(t, head, hash_node) {
322 if (local == t->parms.iph.saddr &&
323 remote == t->parms.iph.daddr &&
324 key == t->parms.i_key &&
325 link == t->parms.link &&
326 type == t->dev->type)
327 break;
328 }
329 return t;
330}
331
332static struct net_device *__ip_tunnel_create(struct net *net,
333 const struct rtnl_link_ops *ops,
334 struct ip_tunnel_parm *parms)
335{
336 int err;
337 struct ip_tunnel *tunnel;
338 struct net_device *dev;
339 char name[IFNAMSIZ];
340
341 if (parms->name[0])
342 strlcpy(name, parms->name, IFNAMSIZ);
343 else {
54a5d382 344 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
345 err = -E2BIG;
346 goto failed;
347 }
348 strlcpy(name, ops->kind, IFNAMSIZ);
349 strncat(name, "%d", 2);
350 }
351
352 ASSERT_RTNL();
353 dev = alloc_netdev(ops->priv_size, name, ops->setup);
354 if (!dev) {
355 err = -ENOMEM;
356 goto failed;
357 }
358 dev_net_set(dev, net);
359
360 dev->rtnl_link_ops = ops;
361
362 tunnel = netdev_priv(dev);
363 tunnel->parms = *parms;
5e6700b3 364 tunnel->net = net;
c5441932
PS
365
366 err = register_netdevice(dev);
367 if (err)
368 goto failed_free;
369
370 return dev;
371
372failed_free:
373 free_netdev(dev);
374failed:
375 return ERR_PTR(err);
376}
377
7d442fab
TH
378static inline void init_tunnel_flow(struct flowi4 *fl4,
379 int proto,
380 __be32 daddr, __be32 saddr,
381 __be32 key, __u8 tos, int oif)
c5441932
PS
382{
383 memset(fl4, 0, sizeof(*fl4));
384 fl4->flowi4_oif = oif;
385 fl4->daddr = daddr;
386 fl4->saddr = saddr;
387 fl4->flowi4_tos = tos;
388 fl4->flowi4_proto = proto;
389 fl4->fl4_gre_key = key;
c5441932
PS
390}
391
392static int ip_tunnel_bind_dev(struct net_device *dev)
393{
394 struct net_device *tdev = NULL;
395 struct ip_tunnel *tunnel = netdev_priv(dev);
396 const struct iphdr *iph;
397 int hlen = LL_MAX_HEADER;
398 int mtu = ETH_DATA_LEN;
399 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
400
401 iph = &tunnel->parms.iph;
402
403 /* Guess output device to choose reasonable mtu and needed_headroom */
404 if (iph->daddr) {
405 struct flowi4 fl4;
406 struct rtable *rt;
407
7d442fab
TH
408 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
409 iph->saddr, tunnel->parms.o_key,
410 RT_TOS(iph->tos), tunnel->parms.link);
411 rt = ip_route_output_key(tunnel->net, &fl4);
412
c5441932
PS
413 if (!IS_ERR(rt)) {
414 tdev = rt->dst.dev;
7d442fab 415 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
c5441932
PS
416 ip_rt_put(rt);
417 }
418 if (dev->type != ARPHRD_ETHER)
419 dev->flags |= IFF_POINTOPOINT;
420 }
421
422 if (!tdev && tunnel->parms.link)
6c742e71 423 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
424
425 if (tdev) {
426 hlen = tdev->hard_header_len + tdev->needed_headroom;
427 mtu = tdev->mtu;
428 }
429 dev->iflink = tunnel->parms.link;
430
431 dev->needed_headroom = t_hlen + hlen;
432 mtu -= (dev->hard_header_len + t_hlen);
433
434 if (mtu < 68)
435 mtu = 68;
436
437 return mtu;
438}
439
440static struct ip_tunnel *ip_tunnel_create(struct net *net,
441 struct ip_tunnel_net *itn,
442 struct ip_tunnel_parm *parms)
443{
444 struct ip_tunnel *nt, *fbt;
445 struct net_device *dev;
446
447 BUG_ON(!itn->fb_tunnel_dev);
448 fbt = netdev_priv(itn->fb_tunnel_dev);
449 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
450 if (IS_ERR(dev))
451 return NULL;
452
453 dev->mtu = ip_tunnel_bind_dev(dev);
454
455 nt = netdev_priv(dev);
456 ip_tunnel_add(itn, nt);
457 return nt;
458}
459
460int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
461 const struct tnl_ptk_info *tpi, bool log_ecn_error)
462{
463 struct pcpu_tstats *tstats;
464 const struct iphdr *iph = ip_hdr(skb);
465 int err;
466
c5441932
PS
467#ifdef CONFIG_NET_IPGRE_BROADCAST
468 if (ipv4_is_multicast(iph->daddr)) {
469 /* Looped back packet, drop it! */
470 if (rt_is_output_route(skb_rtable(skb)))
471 goto drop;
472 tunnel->dev->stats.multicast++;
473 skb->pkt_type = PACKET_BROADCAST;
474 }
475#endif
476
477 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
478 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
479 tunnel->dev->stats.rx_crc_errors++;
480 tunnel->dev->stats.rx_errors++;
481 goto drop;
482 }
483
484 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
485 if (!(tpi->flags&TUNNEL_SEQ) ||
486 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
487 tunnel->dev->stats.rx_fifo_errors++;
488 tunnel->dev->stats.rx_errors++;
489 goto drop;
490 }
491 tunnel->i_seqno = ntohl(tpi->seq) + 1;
492 }
493
c5441932
PS
494 err = IP_ECN_decapsulate(iph, skb);
495 if (unlikely(err)) {
496 if (log_ecn_error)
497 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
498 &iph->saddr, iph->tos);
499 if (err > 1) {
500 ++tunnel->dev->stats.rx_frame_errors;
501 ++tunnel->dev->stats.rx_errors;
502 goto drop;
503 }
504 }
505
506 tstats = this_cpu_ptr(tunnel->dev->tstats);
507 u64_stats_update_begin(&tstats->syncp);
508 tstats->rx_packets++;
509 tstats->rx_bytes += skb->len;
510 u64_stats_update_end(&tstats->syncp);
511
81b9eab5
AS
512 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
513
3d7b46cd
PS
514 if (tunnel->dev->type == ARPHRD_ETHER) {
515 skb->protocol = eth_type_trans(skb, tunnel->dev);
516 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
517 } else {
518 skb->dev = tunnel->dev;
519 }
64261f23 520
c5441932
PS
521 gro_cells_receive(&tunnel->gro_cells, skb);
522 return 0;
523
524drop:
525 kfree_skb(skb);
526 return 0;
527}
528EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
529
23a3647b
PS
530static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
531 struct rtable *rt, __be16 df)
532{
533 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 534 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
535 int mtu;
536
537 if (df)
538 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
539 - sizeof(struct iphdr) - tunnel->hlen;
540 else
541 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
542
543 if (skb_dst(skb))
544 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
545
546 if (skb->protocol == htons(ETH_P_IP)) {
547 if (!skb_is_gso(skb) &&
548 (df & htons(IP_DF)) && mtu < pkt_size) {
549 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
550 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
551 return -E2BIG;
552 }
553 }
554#if IS_ENABLED(CONFIG_IPV6)
555 else if (skb->protocol == htons(ETH_P_IPV6)) {
556 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
557
558 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
559 mtu >= IPV6_MIN_MTU) {
560 if ((tunnel->parms.iph.daddr &&
561 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
562 rt6->rt6i_dst.plen == 128) {
563 rt6->rt6i_flags |= RTF_MODIFIED;
564 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
565 }
566 }
567
568 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
569 mtu < pkt_size) {
570 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
571 return -E2BIG;
572 }
573 }
574#endif
575 return 0;
576}
577
c5441932 578void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 579 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
580{
581 struct ip_tunnel *tunnel = netdev_priv(dev);
582 const struct iphdr *inner_iph;
c5441932
PS
583 struct flowi4 fl4;
584 u8 tos, ttl;
585 __be16 df;
7d442fab 586 struct rtable *rt = NULL; /* Route to the other host */
c5441932
PS
587 unsigned int max_headroom; /* The extra header space needed */
588 __be32 dst;
0e6fbc5b 589 int err;
7d442fab 590 bool connected = true;
c5441932
PS
591
592 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
593
594 dst = tnl_params->daddr;
595 if (dst == 0) {
596 /* NBMA tunnel */
597
598 if (skb_dst(skb) == NULL) {
599 dev->stats.tx_fifo_errors++;
600 goto tx_error;
601 }
602
603 if (skb->protocol == htons(ETH_P_IP)) {
604 rt = skb_rtable(skb);
605 dst = rt_nexthop(rt, inner_iph->daddr);
606 }
607#if IS_ENABLED(CONFIG_IPV6)
608 else if (skb->protocol == htons(ETH_P_IPV6)) {
609 const struct in6_addr *addr6;
610 struct neighbour *neigh;
611 bool do_tx_error_icmp;
612 int addr_type;
613
614 neigh = dst_neigh_lookup(skb_dst(skb),
615 &ipv6_hdr(skb)->daddr);
616 if (neigh == NULL)
617 goto tx_error;
618
619 addr6 = (const struct in6_addr *)&neigh->primary_key;
620 addr_type = ipv6_addr_type(addr6);
621
622 if (addr_type == IPV6_ADDR_ANY) {
623 addr6 = &ipv6_hdr(skb)->daddr;
624 addr_type = ipv6_addr_type(addr6);
625 }
626
627 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
628 do_tx_error_icmp = true;
629 else {
630 do_tx_error_icmp = false;
631 dst = addr6->s6_addr32[3];
632 }
633 neigh_release(neigh);
634 if (do_tx_error_icmp)
635 goto tx_error_icmp;
636 }
637#endif
638 else
639 goto tx_error;
7d442fab
TH
640
641 connected = false;
c5441932
PS
642 }
643
644 tos = tnl_params->tos;
645 if (tos & 0x1) {
646 tos &= ~0x1;
7d442fab 647 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 648 tos = inner_iph->tos;
7d442fab
TH
649 connected = false;
650 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 651 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
652 connected = false;
653 }
c5441932
PS
654 }
655
7d442fab
TH
656 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
657 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
658
659 if (connected)
660 rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
661
662 if (!rt) {
663 rt = ip_route_output_key(tunnel->net, &fl4);
664
665 if (IS_ERR(rt)) {
666 dev->stats.tx_carrier_errors++;
667 goto tx_error;
668 }
669 if (connected)
670 tunnel_dst_set(tunnel, dst_clone(&rt->dst));
c5441932 671 }
7d442fab 672
0e6fbc5b 673 if (rt->dst.dev == dev) {
c5441932
PS
674 ip_rt_put(rt);
675 dev->stats.collisions++;
676 goto tx_error;
677 }
c5441932 678
23a3647b
PS
679 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
680 ip_rt_put(rt);
681 goto tx_error;
c5441932 682 }
c5441932
PS
683
684 if (tunnel->err_count > 0) {
685 if (time_before(jiffies,
686 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
687 tunnel->err_count--;
688
689 dst_link_failure(skb);
690 } else
691 tunnel->err_count = 0;
692 }
693
d4a71b15 694 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
695 ttl = tnl_params->ttl;
696 if (ttl == 0) {
697 if (skb->protocol == htons(ETH_P_IP))
698 ttl = inner_iph->ttl;
699#if IS_ENABLED(CONFIG_IPV6)
700 else if (skb->protocol == htons(ETH_P_IPV6))
701 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
702#endif
703 else
704 ttl = ip4_dst_hoplimit(&rt->dst);
705 }
706
23a3647b
PS
707 df = tnl_params->frag_off;
708 if (skb->protocol == htons(ETH_P_IP))
709 df |= (inner_iph->frag_off&htons(IP_DF));
710
0e6fbc5b
PS
711 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
712 + rt->dst.header_len;
3e08f4a7 713 if (max_headroom > dev->needed_headroom)
c5441932 714 dev->needed_headroom = max_headroom;
3e08f4a7
SK
715
716 if (skb_cow_head(skb, dev->needed_headroom)) {
717 dev->stats.tx_dropped++;
718 dev_kfree_skb(skb);
719 return;
c5441932
PS
720 }
721
8b7ed2d9 722 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 723 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 724 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 725
c5441932
PS
726 return;
727
728#if IS_ENABLED(CONFIG_IPV6)
729tx_error_icmp:
730 dst_link_failure(skb);
731#endif
732tx_error:
733 dev->stats.tx_errors++;
734 dev_kfree_skb(skb);
735}
736EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
737
738static void ip_tunnel_update(struct ip_tunnel_net *itn,
739 struct ip_tunnel *t,
740 struct net_device *dev,
741 struct ip_tunnel_parm *p,
742 bool set_mtu)
743{
744 ip_tunnel_del(t);
745 t->parms.iph.saddr = p->iph.saddr;
746 t->parms.iph.daddr = p->iph.daddr;
747 t->parms.i_key = p->i_key;
748 t->parms.o_key = p->o_key;
749 if (dev->type != ARPHRD_ETHER) {
750 memcpy(dev->dev_addr, &p->iph.saddr, 4);
751 memcpy(dev->broadcast, &p->iph.daddr, 4);
752 }
753 ip_tunnel_add(itn, t);
754
755 t->parms.iph.ttl = p->iph.ttl;
756 t->parms.iph.tos = p->iph.tos;
757 t->parms.iph.frag_off = p->iph.frag_off;
758
759 if (t->parms.link != p->link) {
760 int mtu;
761
762 t->parms.link = p->link;
763 mtu = ip_tunnel_bind_dev(dev);
764 if (set_mtu)
765 dev->mtu = mtu;
766 }
9a4aa9af 767 tunnel_dst_reset_all(t);
c5441932
PS
768 netdev_state_change(dev);
769}
770
771int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
772{
773 int err = 0;
774 struct ip_tunnel *t;
775 struct net *net = dev_net(dev);
776 struct ip_tunnel *tunnel = netdev_priv(dev);
777 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
778
779 BUG_ON(!itn->fb_tunnel_dev);
780 switch (cmd) {
781 case SIOCGETTUNNEL:
782 t = NULL;
783 if (dev == itn->fb_tunnel_dev)
784 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
785 if (t == NULL)
786 t = netdev_priv(dev);
787 memcpy(p, &t->parms, sizeof(*p));
788 break;
789
790 case SIOCADDTUNNEL:
791 case SIOCCHGTUNNEL:
792 err = -EPERM;
793 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794 goto done;
795 if (p->iph.ttl)
796 p->iph.frag_off |= htons(IP_DF);
797 if (!(p->i_flags&TUNNEL_KEY))
798 p->i_key = 0;
799 if (!(p->o_flags&TUNNEL_KEY))
800 p->o_key = 0;
801
802 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
803
804 if (!t && (cmd == SIOCADDTUNNEL))
805 t = ip_tunnel_create(net, itn, p);
806
807 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
808 if (t != NULL) {
809 if (t->dev != dev) {
810 err = -EEXIST;
811 break;
812 }
813 } else {
814 unsigned int nflags = 0;
815
816 if (ipv4_is_multicast(p->iph.daddr))
817 nflags = IFF_BROADCAST;
818 else if (p->iph.daddr)
819 nflags = IFF_POINTOPOINT;
820
821 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
822 err = -EINVAL;
823 break;
824 }
825
826 t = netdev_priv(dev);
827 }
828 }
829
830 if (t) {
831 err = 0;
832 ip_tunnel_update(itn, t, dev, p, true);
833 } else
834 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
835 break;
836
837 case SIOCDELTUNNEL:
838 err = -EPERM;
839 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
840 goto done;
841
842 if (dev == itn->fb_tunnel_dev) {
843 err = -ENOENT;
844 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
845 if (t == NULL)
846 goto done;
847 err = -EPERM;
848 if (t == netdev_priv(itn->fb_tunnel_dev))
849 goto done;
850 dev = t->dev;
851 }
852 unregister_netdevice(dev);
853 err = 0;
854 break;
855
856 default:
857 err = -EINVAL;
858 }
859
860done:
861 return err;
862}
863EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
864
865int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
866{
867 struct ip_tunnel *tunnel = netdev_priv(dev);
868 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
869
870 if (new_mtu < 68 ||
871 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
872 return -EINVAL;
873 dev->mtu = new_mtu;
874 return 0;
875}
876EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
877
878static void ip_tunnel_dev_free(struct net_device *dev)
879{
880 struct ip_tunnel *tunnel = netdev_priv(dev);
881
882 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 883 free_percpu(tunnel->dst_cache);
c5441932
PS
884 free_percpu(dev->tstats);
885 free_netdev(dev);
886}
887
888void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
889{
c5441932
PS
890 struct ip_tunnel *tunnel = netdev_priv(dev);
891 struct ip_tunnel_net *itn;
892
6c742e71 893 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
894
895 if (itn->fb_tunnel_dev != dev) {
896 ip_tunnel_del(netdev_priv(dev));
897 unregister_netdevice_queue(dev, head);
898 }
899}
900EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
901
d3b6f614 902int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
903 struct rtnl_link_ops *ops, char *devname)
904{
905 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
906 struct ip_tunnel_parm parms;
6261d983 907 unsigned int i;
c5441932 908
6261d983 909 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
910 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
911
912 if (!ops) {
913 itn->fb_tunnel_dev = NULL;
914 return 0;
915 }
6261d983 916
c5441932
PS
917 memset(&parms, 0, sizeof(parms));
918 if (devname)
919 strlcpy(parms.name, devname, IFNAMSIZ);
920
921 rtnl_lock();
922 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
923 /* FB netdevice is special: we have one, and only one per netns.
924 * Allowing to move it to another netns is clearly unsafe.
925 */
67013282 926 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 927 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
67013282
SK
928 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
929 }
b4de77ad 930 rtnl_unlock();
c5441932 931
b4de77ad 932 return PTR_RET(itn->fb_tunnel_dev);
c5441932
PS
933}
934EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
935
6c742e71
ND
936static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
937 struct rtnl_link_ops *ops)
c5441932 938{
6c742e71
ND
939 struct net *net = dev_net(itn->fb_tunnel_dev);
940 struct net_device *dev, *aux;
c5441932
PS
941 int h;
942
6c742e71
ND
943 for_each_netdev_safe(net, dev, aux)
944 if (dev->rtnl_link_ops == ops)
945 unregister_netdevice_queue(dev, head);
946
c5441932
PS
947 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
948 struct ip_tunnel *t;
949 struct hlist_node *n;
950 struct hlist_head *thead = &itn->tunnels[h];
951
952 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
953 /* If dev is in the same netns, it has already
954 * been added to the list by the previous loop.
955 */
956 if (!net_eq(dev_net(t->dev), net))
957 unregister_netdevice_queue(t->dev, head);
c5441932 958 }
c5441932
PS
959}
960
6c742e71 961void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
962{
963 LIST_HEAD(list);
964
965 rtnl_lock();
6c742e71 966 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
967 unregister_netdevice_many(&list);
968 rtnl_unlock();
c5441932
PS
969}
970EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
971
972int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
973 struct ip_tunnel_parm *p)
974{
975 struct ip_tunnel *nt;
976 struct net *net = dev_net(dev);
977 struct ip_tunnel_net *itn;
978 int mtu;
979 int err;
980
981 nt = netdev_priv(dev);
982 itn = net_generic(net, nt->ip_tnl_net_id);
983
984 if (ip_tunnel_find(itn, p, dev->type))
985 return -EEXIST;
986
5e6700b3 987 nt->net = net;
c5441932
PS
988 nt->parms = *p;
989 err = register_netdevice(dev);
990 if (err)
991 goto out;
992
993 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
994 eth_hw_addr_random(dev);
995
996 mtu = ip_tunnel_bind_dev(dev);
997 if (!tb[IFLA_MTU])
998 dev->mtu = mtu;
999
1000 ip_tunnel_add(itn, nt);
1001
1002out:
1003 return err;
1004}
1005EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1006
1007int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1008 struct ip_tunnel_parm *p)
1009{
6c742e71 1010 struct ip_tunnel *t;
c5441932 1011 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1012 struct net *net = tunnel->net;
c5441932
PS
1013 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1014
1015 if (dev == itn->fb_tunnel_dev)
1016 return -EINVAL;
1017
c5441932
PS
1018 t = ip_tunnel_find(itn, p, dev->type);
1019
1020 if (t) {
1021 if (t->dev != dev)
1022 return -EEXIST;
1023 } else {
6c742e71 1024 t = tunnel;
c5441932
PS
1025
1026 if (dev->type != ARPHRD_ETHER) {
1027 unsigned int nflags = 0;
1028
1029 if (ipv4_is_multicast(p->iph.daddr))
1030 nflags = IFF_BROADCAST;
1031 else if (p->iph.daddr)
1032 nflags = IFF_POINTOPOINT;
1033
1034 if ((dev->flags ^ nflags) &
1035 (IFF_POINTOPOINT | IFF_BROADCAST))
1036 return -EINVAL;
1037 }
1038 }
1039
1040 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1041 return 0;
1042}
1043EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1044
1045int ip_tunnel_init(struct net_device *dev)
1046{
1047 struct ip_tunnel *tunnel = netdev_priv(dev);
1048 struct iphdr *iph = &tunnel->parms.iph;
827da44c 1049 int i, err;
c5441932
PS
1050
1051 dev->destructor = ip_tunnel_dev_free;
1052 dev->tstats = alloc_percpu(struct pcpu_tstats);
1053 if (!dev->tstats)
1054 return -ENOMEM;
1055
827da44c
JS
1056 for_each_possible_cpu(i) {
1057 struct pcpu_tstats *ipt_stats;
1058 ipt_stats = per_cpu_ptr(dev->tstats, i);
1059 u64_stats_init(&ipt_stats->syncp);
1060 }
1061
9a4aa9af
TH
1062 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1063 if (!tunnel->dst_cache) {
1064 free_percpu(dev->tstats);
1065 return -ENOMEM;
1066 }
1067
1068 for_each_possible_cpu(i) {
1069 struct ip_tunnel_dst *idst = per_cpu_ptr(tunnel->dst_cache, i);
1070 idst-> dst = NULL;
1071 spin_lock_init(&idst->lock);
1072 }
1073
c5441932
PS
1074 err = gro_cells_init(&tunnel->gro_cells, dev);
1075 if (err) {
9a4aa9af 1076 free_percpu(tunnel->dst_cache);
c5441932
PS
1077 free_percpu(dev->tstats);
1078 return err;
1079 }
1080
1081 tunnel->dev = dev;
6c742e71 1082 tunnel->net = dev_net(dev);
c5441932
PS
1083 strcpy(tunnel->parms.name, dev->name);
1084 iph->version = 4;
1085 iph->ihl = 5;
1086
1087 return 0;
1088}
1089EXPORT_SYMBOL_GPL(ip_tunnel_init);
1090
1091void ip_tunnel_uninit(struct net_device *dev)
1092{
c5441932 1093 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1094 struct net *net = tunnel->net;
c5441932
PS
1095 struct ip_tunnel_net *itn;
1096
1097 itn = net_generic(net, tunnel->ip_tnl_net_id);
1098 /* fb_tunnel_dev will be unregisted in net-exit call. */
1099 if (itn->fb_tunnel_dev != dev)
1100 ip_tunnel_del(netdev_priv(dev));
7d442fab 1101
9a4aa9af 1102 tunnel_dst_reset_all(tunnel);
c5441932
PS
1103}
1104EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1105
1106/* Do least required initialization, rest of init is done in tunnel_init call */
1107void ip_tunnel_setup(struct net_device *dev, int net_id)
1108{
1109 struct ip_tunnel *tunnel = netdev_priv(dev);
1110 tunnel->ip_tnl_net_id = net_id;
1111}
1112EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1113
1114MODULE_LICENSE("GPL");