]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv4/ip_tunnel.c
xfrm: Add xfrm_tunnel_skb_cb to the skb common buffer
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
6c7e7610
ED
71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
6c7e7610
ED
76 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 83 dst_release(old_dst);
7d442fab
TH
84}
85
6c7e7610 86static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
7d442fab 87{
9a4aa9af 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
7d442fab
TH
89}
90
6c7e7610 91static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab
TH
92{
93 tunnel_dst_set(t, NULL);
94}
95
9a4aa9af
TH
96static void tunnel_dst_reset_all(struct ip_tunnel *t)
97{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
103
b045d37b 104static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
7d442fab
TH
105{
106 struct dst_entry *dst;
107
108 rcu_read_lock();
9a4aa9af 109 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
b045d37b
ED
110 if (dst) {
111 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
112 rcu_read_unlock();
113 tunnel_dst_reset(t);
114 return NULL;
115 }
7d442fab 116 dst_hold(dst);
7d442fab 117 }
b045d37b
ED
118 rcu_read_unlock();
119 return (struct rtable *)dst;
7d442fab
TH
120}
121
c5441932
PS
122/* Often modified stats are per cpu, other are shared (netdev->stats) */
123struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
124 struct rtnl_link_stats64 *tot)
125{
126 int i;
127
128 for_each_possible_cpu(i) {
8f84985f
LR
129 const struct pcpu_sw_netstats *tstats =
130 per_cpu_ptr(dev->tstats, i);
c5441932
PS
131 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
132 unsigned int start;
133
134 do {
135 start = u64_stats_fetch_begin_bh(&tstats->syncp);
136 rx_packets = tstats->rx_packets;
137 tx_packets = tstats->tx_packets;
138 rx_bytes = tstats->rx_bytes;
139 tx_bytes = tstats->tx_bytes;
140 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
141
142 tot->rx_packets += rx_packets;
143 tot->tx_packets += tx_packets;
144 tot->rx_bytes += rx_bytes;
145 tot->tx_bytes += tx_bytes;
146 }
147
148 tot->multicast = dev->stats.multicast;
149
150 tot->rx_crc_errors = dev->stats.rx_crc_errors;
151 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
152 tot->rx_length_errors = dev->stats.rx_length_errors;
153 tot->rx_frame_errors = dev->stats.rx_frame_errors;
154 tot->rx_errors = dev->stats.rx_errors;
155
156 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
157 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
158 tot->tx_dropped = dev->stats.tx_dropped;
159 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
160 tot->tx_errors = dev->stats.tx_errors;
161
162 tot->collisions = dev->stats.collisions;
163
164 return tot;
165}
166EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
167
168static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
169 __be16 flags, __be32 key)
170{
171 if (p->i_flags & TUNNEL_KEY) {
172 if (flags & TUNNEL_KEY)
173 return key == p->i_key;
174 else
175 /* key expected, none present */
176 return false;
177 } else
178 return !(flags & TUNNEL_KEY);
179}
180
181/* Fallback tunnel: no source, no destination, no key, no options
182
183 Tunnel hash table:
184 We require exact key match i.e. if a key is present in packet
185 it will match only tunnel with the same key; if it is not present,
186 it will match only keyless tunnel.
187
188 All keysless packets, if not matched configured keyless tunnels
189 will match fallback tunnel.
190 Given src, dst and key, find appropriate for input tunnel.
191*/
192struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
193 int link, __be16 flags,
194 __be32 remote, __be32 local,
195 __be32 key)
196{
197 unsigned int hash;
198 struct ip_tunnel *t, *cand = NULL;
199 struct hlist_head *head;
200
967680e0 201 hash = ip_tunnel_hash(key, remote);
c5441932
PS
202 head = &itn->tunnels[hash];
203
204 hlist_for_each_entry_rcu(t, head, hash_node) {
205 if (local != t->parms.iph.saddr ||
206 remote != t->parms.iph.daddr ||
207 !(t->dev->flags & IFF_UP))
208 continue;
209
210 if (!ip_tunnel_key_match(&t->parms, flags, key))
211 continue;
212
213 if (t->parms.link == link)
214 return t;
215 else
216 cand = t;
217 }
218
219 hlist_for_each_entry_rcu(t, head, hash_node) {
220 if (remote != t->parms.iph.daddr ||
221 !(t->dev->flags & IFF_UP))
222 continue;
223
224 if (!ip_tunnel_key_match(&t->parms, flags, key))
225 continue;
226
227 if (t->parms.link == link)
228 return t;
229 else if (!cand)
230 cand = t;
231 }
232
967680e0 233 hash = ip_tunnel_hash(key, 0);
c5441932
PS
234 head = &itn->tunnels[hash];
235
236 hlist_for_each_entry_rcu(t, head, hash_node) {
237 if ((local != t->parms.iph.saddr &&
238 (local != t->parms.iph.daddr ||
239 !ipv4_is_multicast(local))) ||
240 !(t->dev->flags & IFF_UP))
241 continue;
242
243 if (!ip_tunnel_key_match(&t->parms, flags, key))
244 continue;
245
246 if (t->parms.link == link)
247 return t;
248 else if (!cand)
249 cand = t;
250 }
251
252 if (flags & TUNNEL_NO_KEY)
253 goto skip_key_lookup;
254
255 hlist_for_each_entry_rcu(t, head, hash_node) {
256 if (t->parms.i_key != key ||
257 !(t->dev->flags & IFF_UP))
258 continue;
259
260 if (t->parms.link == link)
261 return t;
262 else if (!cand)
263 cand = t;
264 }
265
266skip_key_lookup:
267 if (cand)
268 return cand;
269
270 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
271 return netdev_priv(itn->fb_tunnel_dev);
272
273
274 return NULL;
275}
276EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
277
278static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
279 struct ip_tunnel_parm *parms)
280{
281 unsigned int h;
282 __be32 remote;
283
284 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
285 remote = parms->iph.daddr;
286 else
287 remote = 0;
288
967680e0 289 h = ip_tunnel_hash(parms->i_key, remote);
c5441932
PS
290 return &itn->tunnels[h];
291}
292
293static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
294{
295 struct hlist_head *head = ip_bucket(itn, &t->parms);
296
297 hlist_add_head_rcu(&t->hash_node, head);
298}
299
300static void ip_tunnel_del(struct ip_tunnel *t)
301{
302 hlist_del_init_rcu(&t->hash_node);
303}
304
305static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
306 struct ip_tunnel_parm *parms,
307 int type)
308{
309 __be32 remote = parms->iph.daddr;
310 __be32 local = parms->iph.saddr;
311 __be32 key = parms->i_key;
312 int link = parms->link;
313 struct ip_tunnel *t = NULL;
314 struct hlist_head *head = ip_bucket(itn, parms);
315
316 hlist_for_each_entry_rcu(t, head, hash_node) {
317 if (local == t->parms.iph.saddr &&
318 remote == t->parms.iph.daddr &&
319 key == t->parms.i_key &&
320 link == t->parms.link &&
321 type == t->dev->type)
322 break;
323 }
324 return t;
325}
326
327static struct net_device *__ip_tunnel_create(struct net *net,
328 const struct rtnl_link_ops *ops,
329 struct ip_tunnel_parm *parms)
330{
331 int err;
332 struct ip_tunnel *tunnel;
333 struct net_device *dev;
334 char name[IFNAMSIZ];
335
336 if (parms->name[0])
337 strlcpy(name, parms->name, IFNAMSIZ);
338 else {
54a5d382 339 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
340 err = -E2BIG;
341 goto failed;
342 }
343 strlcpy(name, ops->kind, IFNAMSIZ);
344 strncat(name, "%d", 2);
345 }
346
347 ASSERT_RTNL();
348 dev = alloc_netdev(ops->priv_size, name, ops->setup);
349 if (!dev) {
350 err = -ENOMEM;
351 goto failed;
352 }
353 dev_net_set(dev, net);
354
355 dev->rtnl_link_ops = ops;
356
357 tunnel = netdev_priv(dev);
358 tunnel->parms = *parms;
5e6700b3 359 tunnel->net = net;
c5441932
PS
360
361 err = register_netdevice(dev);
362 if (err)
363 goto failed_free;
364
365 return dev;
366
367failed_free:
368 free_netdev(dev);
369failed:
370 return ERR_PTR(err);
371}
372
7d442fab
TH
373static inline void init_tunnel_flow(struct flowi4 *fl4,
374 int proto,
375 __be32 daddr, __be32 saddr,
376 __be32 key, __u8 tos, int oif)
c5441932
PS
377{
378 memset(fl4, 0, sizeof(*fl4));
379 fl4->flowi4_oif = oif;
380 fl4->daddr = daddr;
381 fl4->saddr = saddr;
382 fl4->flowi4_tos = tos;
383 fl4->flowi4_proto = proto;
384 fl4->fl4_gre_key = key;
c5441932
PS
385}
386
387static int ip_tunnel_bind_dev(struct net_device *dev)
388{
389 struct net_device *tdev = NULL;
390 struct ip_tunnel *tunnel = netdev_priv(dev);
391 const struct iphdr *iph;
392 int hlen = LL_MAX_HEADER;
393 int mtu = ETH_DATA_LEN;
394 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
395
396 iph = &tunnel->parms.iph;
397
398 /* Guess output device to choose reasonable mtu and needed_headroom */
399 if (iph->daddr) {
400 struct flowi4 fl4;
401 struct rtable *rt;
402
7d442fab
TH
403 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
404 iph->saddr, tunnel->parms.o_key,
405 RT_TOS(iph->tos), tunnel->parms.link);
406 rt = ip_route_output_key(tunnel->net, &fl4);
407
c5441932
PS
408 if (!IS_ERR(rt)) {
409 tdev = rt->dst.dev;
6c7e7610 410 tunnel_dst_set(tunnel, &rt->dst);
c5441932
PS
411 ip_rt_put(rt);
412 }
413 if (dev->type != ARPHRD_ETHER)
414 dev->flags |= IFF_POINTOPOINT;
415 }
416
417 if (!tdev && tunnel->parms.link)
6c742e71 418 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
419
420 if (tdev) {
421 hlen = tdev->hard_header_len + tdev->needed_headroom;
422 mtu = tdev->mtu;
423 }
424 dev->iflink = tunnel->parms.link;
425
426 dev->needed_headroom = t_hlen + hlen;
427 mtu -= (dev->hard_header_len + t_hlen);
428
429 if (mtu < 68)
430 mtu = 68;
431
432 return mtu;
433}
434
435static struct ip_tunnel *ip_tunnel_create(struct net *net,
436 struct ip_tunnel_net *itn,
437 struct ip_tunnel_parm *parms)
438{
439 struct ip_tunnel *nt, *fbt;
440 struct net_device *dev;
441
442 BUG_ON(!itn->fb_tunnel_dev);
443 fbt = netdev_priv(itn->fb_tunnel_dev);
444 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
445 if (IS_ERR(dev))
6dd3c9ec 446 return ERR_CAST(dev);
c5441932
PS
447
448 dev->mtu = ip_tunnel_bind_dev(dev);
449
450 nt = netdev_priv(dev);
451 ip_tunnel_add(itn, nt);
452 return nt;
453}
454
455int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
456 const struct tnl_ptk_info *tpi, bool log_ecn_error)
457{
8f84985f 458 struct pcpu_sw_netstats *tstats;
c5441932
PS
459 const struct iphdr *iph = ip_hdr(skb);
460 int err;
461
c5441932
PS
462#ifdef CONFIG_NET_IPGRE_BROADCAST
463 if (ipv4_is_multicast(iph->daddr)) {
464 /* Looped back packet, drop it! */
465 if (rt_is_output_route(skb_rtable(skb)))
466 goto drop;
467 tunnel->dev->stats.multicast++;
468 skb->pkt_type = PACKET_BROADCAST;
469 }
470#endif
471
472 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
473 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
474 tunnel->dev->stats.rx_crc_errors++;
475 tunnel->dev->stats.rx_errors++;
476 goto drop;
477 }
478
479 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
480 if (!(tpi->flags&TUNNEL_SEQ) ||
481 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
482 tunnel->dev->stats.rx_fifo_errors++;
483 tunnel->dev->stats.rx_errors++;
484 goto drop;
485 }
486 tunnel->i_seqno = ntohl(tpi->seq) + 1;
487 }
488
c5441932
PS
489 err = IP_ECN_decapsulate(iph, skb);
490 if (unlikely(err)) {
491 if (log_ecn_error)
492 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
493 &iph->saddr, iph->tos);
494 if (err > 1) {
495 ++tunnel->dev->stats.rx_frame_errors;
496 ++tunnel->dev->stats.rx_errors;
497 goto drop;
498 }
499 }
500
501 tstats = this_cpu_ptr(tunnel->dev->tstats);
502 u64_stats_update_begin(&tstats->syncp);
503 tstats->rx_packets++;
504 tstats->rx_bytes += skb->len;
505 u64_stats_update_end(&tstats->syncp);
506
81b9eab5
AS
507 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
508
3d7b46cd
PS
509 if (tunnel->dev->type == ARPHRD_ETHER) {
510 skb->protocol = eth_type_trans(skb, tunnel->dev);
511 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
512 } else {
513 skb->dev = tunnel->dev;
514 }
64261f23 515
c5441932
PS
516 gro_cells_receive(&tunnel->gro_cells, skb);
517 return 0;
518
519drop:
520 kfree_skb(skb);
521 return 0;
522}
523EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
524
23a3647b
PS
525static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
526 struct rtable *rt, __be16 df)
527{
528 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 529 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
530 int mtu;
531
532 if (df)
533 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
534 - sizeof(struct iphdr) - tunnel->hlen;
535 else
536 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
537
538 if (skb_dst(skb))
539 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
540
541 if (skb->protocol == htons(ETH_P_IP)) {
542 if (!skb_is_gso(skb) &&
543 (df & htons(IP_DF)) && mtu < pkt_size) {
544 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
545 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
546 return -E2BIG;
547 }
548 }
549#if IS_ENABLED(CONFIG_IPV6)
550 else if (skb->protocol == htons(ETH_P_IPV6)) {
551 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
552
553 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
554 mtu >= IPV6_MIN_MTU) {
555 if ((tunnel->parms.iph.daddr &&
556 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
557 rt6->rt6i_dst.plen == 128) {
558 rt6->rt6i_flags |= RTF_MODIFIED;
559 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
560 }
561 }
562
563 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
564 mtu < pkt_size) {
565 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
566 return -E2BIG;
567 }
568 }
569#endif
570 return 0;
571}
572
c5441932 573void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 574 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
575{
576 struct ip_tunnel *tunnel = netdev_priv(dev);
577 const struct iphdr *inner_iph;
c5441932
PS
578 struct flowi4 fl4;
579 u8 tos, ttl;
580 __be16 df;
b045d37b 581 struct rtable *rt; /* Route to the other host */
c5441932
PS
582 unsigned int max_headroom; /* The extra header space needed */
583 __be32 dst;
0e6fbc5b 584 int err;
7d442fab 585 bool connected = true;
c5441932
PS
586
587 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
588
589 dst = tnl_params->daddr;
590 if (dst == 0) {
591 /* NBMA tunnel */
592
593 if (skb_dst(skb) == NULL) {
594 dev->stats.tx_fifo_errors++;
595 goto tx_error;
596 }
597
598 if (skb->protocol == htons(ETH_P_IP)) {
599 rt = skb_rtable(skb);
600 dst = rt_nexthop(rt, inner_iph->daddr);
601 }
602#if IS_ENABLED(CONFIG_IPV6)
603 else if (skb->protocol == htons(ETH_P_IPV6)) {
604 const struct in6_addr *addr6;
605 struct neighbour *neigh;
606 bool do_tx_error_icmp;
607 int addr_type;
608
609 neigh = dst_neigh_lookup(skb_dst(skb),
610 &ipv6_hdr(skb)->daddr);
611 if (neigh == NULL)
612 goto tx_error;
613
614 addr6 = (const struct in6_addr *)&neigh->primary_key;
615 addr_type = ipv6_addr_type(addr6);
616
617 if (addr_type == IPV6_ADDR_ANY) {
618 addr6 = &ipv6_hdr(skb)->daddr;
619 addr_type = ipv6_addr_type(addr6);
620 }
621
622 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
623 do_tx_error_icmp = true;
624 else {
625 do_tx_error_icmp = false;
626 dst = addr6->s6_addr32[3];
627 }
628 neigh_release(neigh);
629 if (do_tx_error_icmp)
630 goto tx_error_icmp;
631 }
632#endif
633 else
634 goto tx_error;
7d442fab
TH
635
636 connected = false;
c5441932
PS
637 }
638
639 tos = tnl_params->tos;
640 if (tos & 0x1) {
641 tos &= ~0x1;
7d442fab 642 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 643 tos = inner_iph->tos;
7d442fab
TH
644 connected = false;
645 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 646 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
647 connected = false;
648 }
c5441932
PS
649 }
650
7d442fab
TH
651 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
652 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
653
b045d37b 654 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
7d442fab
TH
655
656 if (!rt) {
657 rt = ip_route_output_key(tunnel->net, &fl4);
658
659 if (IS_ERR(rt)) {
660 dev->stats.tx_carrier_errors++;
661 goto tx_error;
662 }
663 if (connected)
6c7e7610 664 tunnel_dst_set(tunnel, &rt->dst);
c5441932 665 }
7d442fab 666
0e6fbc5b 667 if (rt->dst.dev == dev) {
c5441932
PS
668 ip_rt_put(rt);
669 dev->stats.collisions++;
670 goto tx_error;
671 }
c5441932 672
23a3647b
PS
673 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
674 ip_rt_put(rt);
675 goto tx_error;
c5441932 676 }
c5441932
PS
677
678 if (tunnel->err_count > 0) {
679 if (time_before(jiffies,
680 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
681 tunnel->err_count--;
682
11c21a30 683 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
684 dst_link_failure(skb);
685 } else
686 tunnel->err_count = 0;
687 }
688
d4a71b15 689 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
690 ttl = tnl_params->ttl;
691 if (ttl == 0) {
692 if (skb->protocol == htons(ETH_P_IP))
693 ttl = inner_iph->ttl;
694#if IS_ENABLED(CONFIG_IPV6)
695 else if (skb->protocol == htons(ETH_P_IPV6))
696 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
697#endif
698 else
699 ttl = ip4_dst_hoplimit(&rt->dst);
700 }
701
23a3647b
PS
702 df = tnl_params->frag_off;
703 if (skb->protocol == htons(ETH_P_IP))
704 df |= (inner_iph->frag_off&htons(IP_DF));
705
0e6fbc5b
PS
706 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
707 + rt->dst.header_len;
3e08f4a7 708 if (max_headroom > dev->needed_headroom)
c5441932 709 dev->needed_headroom = max_headroom;
3e08f4a7
SK
710
711 if (skb_cow_head(skb, dev->needed_headroom)) {
712 dev->stats.tx_dropped++;
3acfa1e7 713 kfree_skb(skb);
3e08f4a7 714 return;
c5441932
PS
715 }
716
8b7ed2d9 717 err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 718 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 719 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 720
c5441932
PS
721 return;
722
723#if IS_ENABLED(CONFIG_IPV6)
724tx_error_icmp:
725 dst_link_failure(skb);
726#endif
727tx_error:
728 dev->stats.tx_errors++;
3acfa1e7 729 kfree_skb(skb);
c5441932
PS
730}
731EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
732
733static void ip_tunnel_update(struct ip_tunnel_net *itn,
734 struct ip_tunnel *t,
735 struct net_device *dev,
736 struct ip_tunnel_parm *p,
737 bool set_mtu)
738{
739 ip_tunnel_del(t);
740 t->parms.iph.saddr = p->iph.saddr;
741 t->parms.iph.daddr = p->iph.daddr;
742 t->parms.i_key = p->i_key;
743 t->parms.o_key = p->o_key;
744 if (dev->type != ARPHRD_ETHER) {
745 memcpy(dev->dev_addr, &p->iph.saddr, 4);
746 memcpy(dev->broadcast, &p->iph.daddr, 4);
747 }
748 ip_tunnel_add(itn, t);
749
750 t->parms.iph.ttl = p->iph.ttl;
751 t->parms.iph.tos = p->iph.tos;
752 t->parms.iph.frag_off = p->iph.frag_off;
753
754 if (t->parms.link != p->link) {
755 int mtu;
756
757 t->parms.link = p->link;
758 mtu = ip_tunnel_bind_dev(dev);
759 if (set_mtu)
760 dev->mtu = mtu;
761 }
9a4aa9af 762 tunnel_dst_reset_all(t);
c5441932
PS
763 netdev_state_change(dev);
764}
765
766int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
767{
768 int err = 0;
769 struct ip_tunnel *t;
770 struct net *net = dev_net(dev);
771 struct ip_tunnel *tunnel = netdev_priv(dev);
772 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
773
774 BUG_ON(!itn->fb_tunnel_dev);
775 switch (cmd) {
776 case SIOCGETTUNNEL:
777 t = NULL;
778 if (dev == itn->fb_tunnel_dev)
779 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
780 if (t == NULL)
781 t = netdev_priv(dev);
782 memcpy(p, &t->parms, sizeof(*p));
783 break;
784
785 case SIOCADDTUNNEL:
786 case SIOCCHGTUNNEL:
787 err = -EPERM;
788 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
789 goto done;
790 if (p->iph.ttl)
791 p->iph.frag_off |= htons(IP_DF);
792 if (!(p->i_flags&TUNNEL_KEY))
793 p->i_key = 0;
794 if (!(p->o_flags&TUNNEL_KEY))
795 p->o_key = 0;
796
797 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
798
6dd3c9ec 799 if (!t && (cmd == SIOCADDTUNNEL)) {
c5441932 800 t = ip_tunnel_create(net, itn, p);
6dd3c9ec
FW
801 if (IS_ERR(t)) {
802 err = PTR_ERR(t);
803 break;
804 }
805 }
c5441932
PS
806 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
807 if (t != NULL) {
808 if (t->dev != dev) {
809 err = -EEXIST;
810 break;
811 }
812 } else {
813 unsigned int nflags = 0;
814
815 if (ipv4_is_multicast(p->iph.daddr))
816 nflags = IFF_BROADCAST;
817 else if (p->iph.daddr)
818 nflags = IFF_POINTOPOINT;
819
820 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
821 err = -EINVAL;
822 break;
823 }
824
825 t = netdev_priv(dev);
826 }
827 }
828
829 if (t) {
830 err = 0;
831 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
832 } else {
833 err = -ENOENT;
834 }
c5441932
PS
835 break;
836
837 case SIOCDELTUNNEL:
838 err = -EPERM;
839 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
840 goto done;
841
842 if (dev == itn->fb_tunnel_dev) {
843 err = -ENOENT;
844 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
845 if (t == NULL)
846 goto done;
847 err = -EPERM;
848 if (t == netdev_priv(itn->fb_tunnel_dev))
849 goto done;
850 dev = t->dev;
851 }
852 unregister_netdevice(dev);
853 err = 0;
854 break;
855
856 default:
857 err = -EINVAL;
858 }
859
860done:
861 return err;
862}
863EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
864
865int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
866{
867 struct ip_tunnel *tunnel = netdev_priv(dev);
868 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
869
870 if (new_mtu < 68 ||
871 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
872 return -EINVAL;
873 dev->mtu = new_mtu;
874 return 0;
875}
876EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
877
878static void ip_tunnel_dev_free(struct net_device *dev)
879{
880 struct ip_tunnel *tunnel = netdev_priv(dev);
881
882 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 883 free_percpu(tunnel->dst_cache);
c5441932
PS
884 free_percpu(dev->tstats);
885 free_netdev(dev);
886}
887
888void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
889{
c5441932
PS
890 struct ip_tunnel *tunnel = netdev_priv(dev);
891 struct ip_tunnel_net *itn;
892
6c742e71 893 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
894
895 if (itn->fb_tunnel_dev != dev) {
896 ip_tunnel_del(netdev_priv(dev));
897 unregister_netdevice_queue(dev, head);
898 }
899}
900EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
901
d3b6f614 902int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
903 struct rtnl_link_ops *ops, char *devname)
904{
905 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
906 struct ip_tunnel_parm parms;
6261d983 907 unsigned int i;
c5441932 908
6261d983 909 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
910 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
911
912 if (!ops) {
913 itn->fb_tunnel_dev = NULL;
914 return 0;
915 }
6261d983 916
c5441932
PS
917 memset(&parms, 0, sizeof(parms));
918 if (devname)
919 strlcpy(parms.name, devname, IFNAMSIZ);
920
921 rtnl_lock();
922 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
923 /* FB netdevice is special: we have one, and only one per netns.
924 * Allowing to move it to another netns is clearly unsafe.
925 */
67013282 926 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 927 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
67013282
SK
928 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
929 }
b4de77ad 930 rtnl_unlock();
c5441932 931
27d79f3b 932 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
933}
934EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
935
6c742e71
ND
936static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
937 struct rtnl_link_ops *ops)
c5441932 938{
6c742e71
ND
939 struct net *net = dev_net(itn->fb_tunnel_dev);
940 struct net_device *dev, *aux;
c5441932
PS
941 int h;
942
6c742e71
ND
943 for_each_netdev_safe(net, dev, aux)
944 if (dev->rtnl_link_ops == ops)
945 unregister_netdevice_queue(dev, head);
946
c5441932
PS
947 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
948 struct ip_tunnel *t;
949 struct hlist_node *n;
950 struct hlist_head *thead = &itn->tunnels[h];
951
952 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
953 /* If dev is in the same netns, it has already
954 * been added to the list by the previous loop.
955 */
956 if (!net_eq(dev_net(t->dev), net))
957 unregister_netdevice_queue(t->dev, head);
c5441932 958 }
c5441932
PS
959}
960
6c742e71 961void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
962{
963 LIST_HEAD(list);
964
965 rtnl_lock();
6c742e71 966 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
967 unregister_netdevice_many(&list);
968 rtnl_unlock();
c5441932
PS
969}
970EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
971
972int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
973 struct ip_tunnel_parm *p)
974{
975 struct ip_tunnel *nt;
976 struct net *net = dev_net(dev);
977 struct ip_tunnel_net *itn;
978 int mtu;
979 int err;
980
981 nt = netdev_priv(dev);
982 itn = net_generic(net, nt->ip_tnl_net_id);
983
984 if (ip_tunnel_find(itn, p, dev->type))
985 return -EEXIST;
986
5e6700b3 987 nt->net = net;
c5441932
PS
988 nt->parms = *p;
989 err = register_netdevice(dev);
990 if (err)
991 goto out;
992
993 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
994 eth_hw_addr_random(dev);
995
996 mtu = ip_tunnel_bind_dev(dev);
997 if (!tb[IFLA_MTU])
998 dev->mtu = mtu;
999
1000 ip_tunnel_add(itn, nt);
1001
1002out:
1003 return err;
1004}
1005EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1006
1007int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1008 struct ip_tunnel_parm *p)
1009{
6c742e71 1010 struct ip_tunnel *t;
c5441932 1011 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1012 struct net *net = tunnel->net;
c5441932
PS
1013 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1014
1015 if (dev == itn->fb_tunnel_dev)
1016 return -EINVAL;
1017
c5441932
PS
1018 t = ip_tunnel_find(itn, p, dev->type);
1019
1020 if (t) {
1021 if (t->dev != dev)
1022 return -EEXIST;
1023 } else {
6c742e71 1024 t = tunnel;
c5441932
PS
1025
1026 if (dev->type != ARPHRD_ETHER) {
1027 unsigned int nflags = 0;
1028
1029 if (ipv4_is_multicast(p->iph.daddr))
1030 nflags = IFF_BROADCAST;
1031 else if (p->iph.daddr)
1032 nflags = IFF_POINTOPOINT;
1033
1034 if ((dev->flags ^ nflags) &
1035 (IFF_POINTOPOINT | IFF_BROADCAST))
1036 return -EINVAL;
1037 }
1038 }
1039
1040 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1041 return 0;
1042}
1043EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1044
1045int ip_tunnel_init(struct net_device *dev)
1046{
1047 struct ip_tunnel *tunnel = netdev_priv(dev);
1048 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1049 int err;
c5441932
PS
1050
1051 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1052 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1053 if (!dev->tstats)
1054 return -ENOMEM;
1055
9a4aa9af
TH
1056 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1057 if (!tunnel->dst_cache) {
1058 free_percpu(dev->tstats);
1059 return -ENOMEM;
1060 }
1061
c5441932
PS
1062 err = gro_cells_init(&tunnel->gro_cells, dev);
1063 if (err) {
9a4aa9af 1064 free_percpu(tunnel->dst_cache);
c5441932
PS
1065 free_percpu(dev->tstats);
1066 return err;
1067 }
1068
1069 tunnel->dev = dev;
6c742e71 1070 tunnel->net = dev_net(dev);
c5441932
PS
1071 strcpy(tunnel->parms.name, dev->name);
1072 iph->version = 4;
1073 iph->ihl = 5;
1074
1075 return 0;
1076}
1077EXPORT_SYMBOL_GPL(ip_tunnel_init);
1078
1079void ip_tunnel_uninit(struct net_device *dev)
1080{
c5441932 1081 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1082 struct net *net = tunnel->net;
c5441932
PS
1083 struct ip_tunnel_net *itn;
1084
1085 itn = net_generic(net, tunnel->ip_tnl_net_id);
1086 /* fb_tunnel_dev will be unregisted in net-exit call. */
1087 if (itn->fb_tunnel_dev != dev)
1088 ip_tunnel_del(netdev_priv(dev));
7d442fab 1089
9a4aa9af 1090 tunnel_dst_reset_all(tunnel);
c5441932
PS
1091}
1092EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1093
1094/* Do least required initialization, rest of init is done in tunnel_init call */
1095void ip_tunnel_setup(struct net_device *dev, int net_id)
1096{
1097 struct ip_tunnel *tunnel = netdev_priv(dev);
1098 tunnel->ip_tnl_net_id = net_id;
1099}
1100EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1101
1102MODULE_LICENSE("GPL");