]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/ipv4/ip_tunnel.c
ip_tunnel: fix i_key matching in ip_tunnel_find
[mirror_ubuntu-jammy-kernel.git] / net / ipv4 / ip_tunnel.c
CommitLineData
c5441932
PS
1/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
27d79f3b 43#include <linux/err.h>
c5441932
PS
44
45#include <net/sock.h>
46#include <net/ip.h>
47#include <net/icmp.h>
48#include <net/protocol.h>
49#include <net/ip_tunnels.h>
50#include <net/arp.h>
51#include <net/checksum.h>
52#include <net/dsfield.h>
53#include <net/inet_ecn.h>
54#include <net/xfrm.h>
55#include <net/net_namespace.h>
56#include <net/netns/generic.h>
57#include <net/rtnetlink.h>
58
59#if IS_ENABLED(CONFIG_IPV6)
60#include <net/ipv6.h>
61#include <net/ip6_fib.h>
62#include <net/ip6_route.h>
63#endif
64
967680e0 65static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
c5441932
PS
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
6c7e7610
ED
71static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
72 struct dst_entry *dst)
7d442fab
TH
73{
74 struct dst_entry *old_dst;
75
6c7e7610
ED
76 if (dst) {
77 if (dst->flags & DST_NOCACHE)
78 dst = NULL;
79 else
80 dst_clone(dst);
81 }
82 old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
7d442fab 83 dst_release(old_dst);
7d442fab
TH
84}
85
6c7e7610 86static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
7d442fab 87{
9a4aa9af 88 __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
7d442fab
TH
89}
90
6c7e7610 91static void tunnel_dst_reset(struct ip_tunnel *t)
7d442fab
TH
92{
93 tunnel_dst_set(t, NULL);
94}
95
cf71d2bc 96void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
9a4aa9af
TH
97{
98 int i;
99
100 for_each_possible_cpu(i)
101 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
102}
cf71d2bc 103EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
9a4aa9af 104
b045d37b 105static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
7d442fab
TH
106{
107 struct dst_entry *dst;
108
109 rcu_read_lock();
9a4aa9af 110 dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
b045d37b
ED
111 if (dst) {
112 if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
113 rcu_read_unlock();
114 tunnel_dst_reset(t);
115 return NULL;
116 }
7d442fab 117 dst_hold(dst);
7d442fab 118 }
b045d37b
ED
119 rcu_read_unlock();
120 return (struct rtable *)dst;
7d442fab
TH
121}
122
c5441932
PS
123static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
124 __be16 flags, __be32 key)
125{
126 if (p->i_flags & TUNNEL_KEY) {
127 if (flags & TUNNEL_KEY)
128 return key == p->i_key;
129 else
130 /* key expected, none present */
131 return false;
132 } else
133 return !(flags & TUNNEL_KEY);
134}
135
136/* Fallback tunnel: no source, no destination, no key, no options
137
138 Tunnel hash table:
139 We require exact key match i.e. if a key is present in packet
140 it will match only tunnel with the same key; if it is not present,
141 it will match only keyless tunnel.
142
143 All keysless packets, if not matched configured keyless tunnels
144 will match fallback tunnel.
145 Given src, dst and key, find appropriate for input tunnel.
146*/
147struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
148 int link, __be16 flags,
149 __be32 remote, __be32 local,
150 __be32 key)
151{
152 unsigned int hash;
153 struct ip_tunnel *t, *cand = NULL;
154 struct hlist_head *head;
155
967680e0 156 hash = ip_tunnel_hash(key, remote);
c5441932
PS
157 head = &itn->tunnels[hash];
158
159 hlist_for_each_entry_rcu(t, head, hash_node) {
160 if (local != t->parms.iph.saddr ||
161 remote != t->parms.iph.daddr ||
162 !(t->dev->flags & IFF_UP))
163 continue;
164
165 if (!ip_tunnel_key_match(&t->parms, flags, key))
166 continue;
167
168 if (t->parms.link == link)
169 return t;
170 else
171 cand = t;
172 }
173
174 hlist_for_each_entry_rcu(t, head, hash_node) {
175 if (remote != t->parms.iph.daddr ||
176 !(t->dev->flags & IFF_UP))
177 continue;
178
179 if (!ip_tunnel_key_match(&t->parms, flags, key))
180 continue;
181
182 if (t->parms.link == link)
183 return t;
184 else if (!cand)
185 cand = t;
186 }
187
967680e0 188 hash = ip_tunnel_hash(key, 0);
c5441932
PS
189 head = &itn->tunnels[hash];
190
191 hlist_for_each_entry_rcu(t, head, hash_node) {
192 if ((local != t->parms.iph.saddr &&
193 (local != t->parms.iph.daddr ||
194 !ipv4_is_multicast(local))) ||
195 !(t->dev->flags & IFF_UP))
196 continue;
197
198 if (!ip_tunnel_key_match(&t->parms, flags, key))
199 continue;
200
201 if (t->parms.link == link)
202 return t;
203 else if (!cand)
204 cand = t;
205 }
206
207 if (flags & TUNNEL_NO_KEY)
208 goto skip_key_lookup;
209
210 hlist_for_each_entry_rcu(t, head, hash_node) {
211 if (t->parms.i_key != key ||
212 !(t->dev->flags & IFF_UP))
213 continue;
214
215 if (t->parms.link == link)
216 return t;
217 else if (!cand)
218 cand = t;
219 }
220
221skip_key_lookup:
222 if (cand)
223 return cand;
224
225 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
226 return netdev_priv(itn->fb_tunnel_dev);
227
228
229 return NULL;
230}
231EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
232
233static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
234 struct ip_tunnel_parm *parms)
235{
236 unsigned int h;
237 __be32 remote;
6d608f06 238 __be32 i_key = parms->i_key;
c5441932
PS
239
240 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
241 remote = parms->iph.daddr;
242 else
243 remote = 0;
244
6d608f06
SK
245 if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
246 i_key = 0;
247
248 h = ip_tunnel_hash(i_key, remote);
c5441932
PS
249 return &itn->tunnels[h];
250}
251
252static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
253{
254 struct hlist_head *head = ip_bucket(itn, &t->parms);
255
256 hlist_add_head_rcu(&t->hash_node, head);
257}
258
259static void ip_tunnel_del(struct ip_tunnel *t)
260{
261 hlist_del_init_rcu(&t->hash_node);
262}
263
264static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
265 struct ip_tunnel_parm *parms,
266 int type)
267{
268 __be32 remote = parms->iph.daddr;
269 __be32 local = parms->iph.saddr;
270 __be32 key = parms->i_key;
5ce54af1 271 __be16 flags = parms->i_flags;
c5441932
PS
272 int link = parms->link;
273 struct ip_tunnel *t = NULL;
274 struct hlist_head *head = ip_bucket(itn, parms);
275
276 hlist_for_each_entry_rcu(t, head, hash_node) {
277 if (local == t->parms.iph.saddr &&
278 remote == t->parms.iph.daddr &&
c5441932 279 link == t->parms.link &&
5ce54af1
DP
280 type == t->dev->type &&
281 ip_tunnel_key_match(&t->parms, flags, key))
c5441932
PS
282 break;
283 }
284 return t;
285}
286
287static struct net_device *__ip_tunnel_create(struct net *net,
288 const struct rtnl_link_ops *ops,
289 struct ip_tunnel_parm *parms)
290{
291 int err;
292 struct ip_tunnel *tunnel;
293 struct net_device *dev;
294 char name[IFNAMSIZ];
295
296 if (parms->name[0])
297 strlcpy(name, parms->name, IFNAMSIZ);
298 else {
54a5d382 299 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
c5441932
PS
300 err = -E2BIG;
301 goto failed;
302 }
303 strlcpy(name, ops->kind, IFNAMSIZ);
304 strncat(name, "%d", 2);
305 }
306
307 ASSERT_RTNL();
308 dev = alloc_netdev(ops->priv_size, name, ops->setup);
309 if (!dev) {
310 err = -ENOMEM;
311 goto failed;
312 }
313 dev_net_set(dev, net);
314
315 dev->rtnl_link_ops = ops;
316
317 tunnel = netdev_priv(dev);
318 tunnel->parms = *parms;
5e6700b3 319 tunnel->net = net;
c5441932
PS
320
321 err = register_netdevice(dev);
322 if (err)
323 goto failed_free;
324
325 return dev;
326
327failed_free:
328 free_netdev(dev);
329failed:
330 return ERR_PTR(err);
331}
332
7d442fab
TH
333static inline void init_tunnel_flow(struct flowi4 *fl4,
334 int proto,
335 __be32 daddr, __be32 saddr,
336 __be32 key, __u8 tos, int oif)
c5441932
PS
337{
338 memset(fl4, 0, sizeof(*fl4));
339 fl4->flowi4_oif = oif;
340 fl4->daddr = daddr;
341 fl4->saddr = saddr;
342 fl4->flowi4_tos = tos;
343 fl4->flowi4_proto = proto;
344 fl4->fl4_gre_key = key;
c5441932
PS
345}
346
347static int ip_tunnel_bind_dev(struct net_device *dev)
348{
349 struct net_device *tdev = NULL;
350 struct ip_tunnel *tunnel = netdev_priv(dev);
351 const struct iphdr *iph;
352 int hlen = LL_MAX_HEADER;
353 int mtu = ETH_DATA_LEN;
354 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
355
356 iph = &tunnel->parms.iph;
357
358 /* Guess output device to choose reasonable mtu and needed_headroom */
359 if (iph->daddr) {
360 struct flowi4 fl4;
361 struct rtable *rt;
362
7d442fab
TH
363 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
364 iph->saddr, tunnel->parms.o_key,
365 RT_TOS(iph->tos), tunnel->parms.link);
366 rt = ip_route_output_key(tunnel->net, &fl4);
367
c5441932
PS
368 if (!IS_ERR(rt)) {
369 tdev = rt->dst.dev;
6c7e7610 370 tunnel_dst_set(tunnel, &rt->dst);
c5441932
PS
371 ip_rt_put(rt);
372 }
373 if (dev->type != ARPHRD_ETHER)
374 dev->flags |= IFF_POINTOPOINT;
375 }
376
377 if (!tdev && tunnel->parms.link)
6c742e71 378 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
c5441932
PS
379
380 if (tdev) {
381 hlen = tdev->hard_header_len + tdev->needed_headroom;
382 mtu = tdev->mtu;
383 }
384 dev->iflink = tunnel->parms.link;
385
386 dev->needed_headroom = t_hlen + hlen;
387 mtu -= (dev->hard_header_len + t_hlen);
388
389 if (mtu < 68)
390 mtu = 68;
391
392 return mtu;
393}
394
395static struct ip_tunnel *ip_tunnel_create(struct net *net,
396 struct ip_tunnel_net *itn,
397 struct ip_tunnel_parm *parms)
398{
399 struct ip_tunnel *nt, *fbt;
400 struct net_device *dev;
401
402 BUG_ON(!itn->fb_tunnel_dev);
403 fbt = netdev_priv(itn->fb_tunnel_dev);
404 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
405 if (IS_ERR(dev))
6dd3c9ec 406 return ERR_CAST(dev);
c5441932
PS
407
408 dev->mtu = ip_tunnel_bind_dev(dev);
409
410 nt = netdev_priv(dev);
411 ip_tunnel_add(itn, nt);
412 return nt;
413}
414
415int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
416 const struct tnl_ptk_info *tpi, bool log_ecn_error)
417{
8f84985f 418 struct pcpu_sw_netstats *tstats;
c5441932
PS
419 const struct iphdr *iph = ip_hdr(skb);
420 int err;
421
c5441932
PS
422#ifdef CONFIG_NET_IPGRE_BROADCAST
423 if (ipv4_is_multicast(iph->daddr)) {
c5441932
PS
424 tunnel->dev->stats.multicast++;
425 skb->pkt_type = PACKET_BROADCAST;
426 }
427#endif
428
429 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
430 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
431 tunnel->dev->stats.rx_crc_errors++;
432 tunnel->dev->stats.rx_errors++;
433 goto drop;
434 }
435
436 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
437 if (!(tpi->flags&TUNNEL_SEQ) ||
438 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
439 tunnel->dev->stats.rx_fifo_errors++;
440 tunnel->dev->stats.rx_errors++;
441 goto drop;
442 }
443 tunnel->i_seqno = ntohl(tpi->seq) + 1;
444 }
445
e96f2e7c
YC
446 skb_reset_network_header(skb);
447
c5441932
PS
448 err = IP_ECN_decapsulate(iph, skb);
449 if (unlikely(err)) {
450 if (log_ecn_error)
451 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
452 &iph->saddr, iph->tos);
453 if (err > 1) {
454 ++tunnel->dev->stats.rx_frame_errors;
455 ++tunnel->dev->stats.rx_errors;
456 goto drop;
457 }
458 }
459
460 tstats = this_cpu_ptr(tunnel->dev->tstats);
461 u64_stats_update_begin(&tstats->syncp);
462 tstats->rx_packets++;
463 tstats->rx_bytes += skb->len;
464 u64_stats_update_end(&tstats->syncp);
465
81b9eab5
AS
466 skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
467
3d7b46cd
PS
468 if (tunnel->dev->type == ARPHRD_ETHER) {
469 skb->protocol = eth_type_trans(skb, tunnel->dev);
470 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
471 } else {
472 skb->dev = tunnel->dev;
473 }
64261f23 474
c5441932
PS
475 gro_cells_receive(&tunnel->gro_cells, skb);
476 return 0;
477
478drop:
479 kfree_skb(skb);
480 return 0;
481}
482EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
483
23a3647b
PS
484static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
485 struct rtable *rt, __be16 df)
486{
487 struct ip_tunnel *tunnel = netdev_priv(dev);
8c91e162 488 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
23a3647b
PS
489 int mtu;
490
491 if (df)
492 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
493 - sizeof(struct iphdr) - tunnel->hlen;
494 else
495 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
496
497 if (skb_dst(skb))
498 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
499
500 if (skb->protocol == htons(ETH_P_IP)) {
501 if (!skb_is_gso(skb) &&
502 (df & htons(IP_DF)) && mtu < pkt_size) {
503 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
504 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
505 return -E2BIG;
506 }
507 }
508#if IS_ENABLED(CONFIG_IPV6)
509 else if (skb->protocol == htons(ETH_P_IPV6)) {
510 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
511
512 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
513 mtu >= IPV6_MIN_MTU) {
514 if ((tunnel->parms.iph.daddr &&
515 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
516 rt6->rt6i_dst.plen == 128) {
517 rt6->rt6i_flags |= RTF_MODIFIED;
518 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
519 }
520 }
521
522 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
523 mtu < pkt_size) {
524 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
525 return -E2BIG;
526 }
527 }
528#endif
529 return 0;
530}
531
c5441932 532void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
bf3d6a8f 533 const struct iphdr *tnl_params, const u8 protocol)
c5441932
PS
534{
535 struct ip_tunnel *tunnel = netdev_priv(dev);
536 const struct iphdr *inner_iph;
c5441932
PS
537 struct flowi4 fl4;
538 u8 tos, ttl;
539 __be16 df;
b045d37b 540 struct rtable *rt; /* Route to the other host */
c5441932
PS
541 unsigned int max_headroom; /* The extra header space needed */
542 __be32 dst;
0e6fbc5b 543 int err;
22fb22ea 544 bool connected;
c5441932
PS
545
546 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
22fb22ea 547 connected = (tunnel->parms.iph.daddr != 0);
c5441932
PS
548
549 dst = tnl_params->daddr;
550 if (dst == 0) {
551 /* NBMA tunnel */
552
553 if (skb_dst(skb) == NULL) {
554 dev->stats.tx_fifo_errors++;
555 goto tx_error;
556 }
557
558 if (skb->protocol == htons(ETH_P_IP)) {
559 rt = skb_rtable(skb);
560 dst = rt_nexthop(rt, inner_iph->daddr);
561 }
562#if IS_ENABLED(CONFIG_IPV6)
563 else if (skb->protocol == htons(ETH_P_IPV6)) {
564 const struct in6_addr *addr6;
565 struct neighbour *neigh;
566 bool do_tx_error_icmp;
567 int addr_type;
568
569 neigh = dst_neigh_lookup(skb_dst(skb),
570 &ipv6_hdr(skb)->daddr);
571 if (neigh == NULL)
572 goto tx_error;
573
574 addr6 = (const struct in6_addr *)&neigh->primary_key;
575 addr_type = ipv6_addr_type(addr6);
576
577 if (addr_type == IPV6_ADDR_ANY) {
578 addr6 = &ipv6_hdr(skb)->daddr;
579 addr_type = ipv6_addr_type(addr6);
580 }
581
582 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
583 do_tx_error_icmp = true;
584 else {
585 do_tx_error_icmp = false;
586 dst = addr6->s6_addr32[3];
587 }
588 neigh_release(neigh);
589 if (do_tx_error_icmp)
590 goto tx_error_icmp;
591 }
592#endif
593 else
594 goto tx_error;
7d442fab
TH
595
596 connected = false;
c5441932
PS
597 }
598
599 tos = tnl_params->tos;
600 if (tos & 0x1) {
601 tos &= ~0x1;
7d442fab 602 if (skb->protocol == htons(ETH_P_IP)) {
c5441932 603 tos = inner_iph->tos;
7d442fab
TH
604 connected = false;
605 } else if (skb->protocol == htons(ETH_P_IPV6)) {
c5441932 606 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
7d442fab
TH
607 connected = false;
608 }
c5441932
PS
609 }
610
7d442fab
TH
611 init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
612 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
613
b045d37b 614 rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
7d442fab
TH
615
616 if (!rt) {
617 rt = ip_route_output_key(tunnel->net, &fl4);
618
619 if (IS_ERR(rt)) {
620 dev->stats.tx_carrier_errors++;
621 goto tx_error;
622 }
623 if (connected)
6c7e7610 624 tunnel_dst_set(tunnel, &rt->dst);
c5441932 625 }
7d442fab 626
0e6fbc5b 627 if (rt->dst.dev == dev) {
c5441932
PS
628 ip_rt_put(rt);
629 dev->stats.collisions++;
630 goto tx_error;
631 }
c5441932 632
23a3647b
PS
633 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
634 ip_rt_put(rt);
635 goto tx_error;
c5441932 636 }
c5441932
PS
637
638 if (tunnel->err_count > 0) {
639 if (time_before(jiffies,
640 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
641 tunnel->err_count--;
642
11c21a30 643 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
c5441932
PS
644 dst_link_failure(skb);
645 } else
646 tunnel->err_count = 0;
647 }
648
d4a71b15 649 tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
c5441932
PS
650 ttl = tnl_params->ttl;
651 if (ttl == 0) {
652 if (skb->protocol == htons(ETH_P_IP))
653 ttl = inner_iph->ttl;
654#if IS_ENABLED(CONFIG_IPV6)
655 else if (skb->protocol == htons(ETH_P_IPV6))
656 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
657#endif
658 else
659 ttl = ip4_dst_hoplimit(&rt->dst);
660 }
661
23a3647b
PS
662 df = tnl_params->frag_off;
663 if (skb->protocol == htons(ETH_P_IP))
664 df |= (inner_iph->frag_off&htons(IP_DF));
665
0e6fbc5b
PS
666 max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
667 + rt->dst.header_len;
3e08f4a7 668 if (max_headroom > dev->needed_headroom)
c5441932 669 dev->needed_headroom = max_headroom;
3e08f4a7
SK
670
671 if (skb_cow_head(skb, dev->needed_headroom)) {
586d5fc8 672 ip_rt_put(rt);
3e08f4a7 673 dev->stats.tx_dropped++;
3acfa1e7 674 kfree_skb(skb);
3e08f4a7 675 return;
c5441932
PS
676 }
677
aad88724 678 err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
d4a71b15 679 tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
0e6fbc5b 680 iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
c5441932 681
c5441932
PS
682 return;
683
684#if IS_ENABLED(CONFIG_IPV6)
685tx_error_icmp:
686 dst_link_failure(skb);
687#endif
688tx_error:
689 dev->stats.tx_errors++;
3acfa1e7 690 kfree_skb(skb);
c5441932
PS
691}
692EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
693
694static void ip_tunnel_update(struct ip_tunnel_net *itn,
695 struct ip_tunnel *t,
696 struct net_device *dev,
697 struct ip_tunnel_parm *p,
698 bool set_mtu)
699{
700 ip_tunnel_del(t);
701 t->parms.iph.saddr = p->iph.saddr;
702 t->parms.iph.daddr = p->iph.daddr;
703 t->parms.i_key = p->i_key;
704 t->parms.o_key = p->o_key;
705 if (dev->type != ARPHRD_ETHER) {
706 memcpy(dev->dev_addr, &p->iph.saddr, 4);
707 memcpy(dev->broadcast, &p->iph.daddr, 4);
708 }
709 ip_tunnel_add(itn, t);
710
711 t->parms.iph.ttl = p->iph.ttl;
712 t->parms.iph.tos = p->iph.tos;
713 t->parms.iph.frag_off = p->iph.frag_off;
714
715 if (t->parms.link != p->link) {
716 int mtu;
717
718 t->parms.link = p->link;
719 mtu = ip_tunnel_bind_dev(dev);
720 if (set_mtu)
721 dev->mtu = mtu;
722 }
cf71d2bc 723 ip_tunnel_dst_reset_all(t);
c5441932
PS
724 netdev_state_change(dev);
725}
726
727int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
728{
729 int err = 0;
8c923ce2
ND
730 struct ip_tunnel *t = netdev_priv(dev);
731 struct net *net = t->net;
732 struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
c5441932
PS
733
734 BUG_ON(!itn->fb_tunnel_dev);
735 switch (cmd) {
736 case SIOCGETTUNNEL:
8c923ce2 737 if (dev == itn->fb_tunnel_dev) {
c5441932 738 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
8c923ce2
ND
739 if (t == NULL)
740 t = netdev_priv(dev);
741 }
c5441932
PS
742 memcpy(p, &t->parms, sizeof(*p));
743 break;
744
745 case SIOCADDTUNNEL:
746 case SIOCCHGTUNNEL:
747 err = -EPERM;
748 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
749 goto done;
750 if (p->iph.ttl)
751 p->iph.frag_off |= htons(IP_DF);
7c8e6b9c
DP
752 if (!(p->i_flags & VTI_ISVTI)) {
753 if (!(p->i_flags & TUNNEL_KEY))
754 p->i_key = 0;
755 if (!(p->o_flags & TUNNEL_KEY))
756 p->o_key = 0;
757 }
c5441932
PS
758
759 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
760
6dd3c9ec 761 if (!t && (cmd == SIOCADDTUNNEL)) {
c5441932 762 t = ip_tunnel_create(net, itn, p);
6dd3c9ec
FW
763 if (IS_ERR(t)) {
764 err = PTR_ERR(t);
765 break;
766 }
767 }
c5441932
PS
768 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
769 if (t != NULL) {
770 if (t->dev != dev) {
771 err = -EEXIST;
772 break;
773 }
774 } else {
775 unsigned int nflags = 0;
776
777 if (ipv4_is_multicast(p->iph.daddr))
778 nflags = IFF_BROADCAST;
779 else if (p->iph.daddr)
780 nflags = IFF_POINTOPOINT;
781
782 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
783 err = -EINVAL;
784 break;
785 }
786
787 t = netdev_priv(dev);
788 }
789 }
790
791 if (t) {
792 err = 0;
793 ip_tunnel_update(itn, t, dev, p, true);
6dd3c9ec
FW
794 } else {
795 err = -ENOENT;
796 }
c5441932
PS
797 break;
798
799 case SIOCDELTUNNEL:
800 err = -EPERM;
801 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
802 goto done;
803
804 if (dev == itn->fb_tunnel_dev) {
805 err = -ENOENT;
806 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
807 if (t == NULL)
808 goto done;
809 err = -EPERM;
810 if (t == netdev_priv(itn->fb_tunnel_dev))
811 goto done;
812 dev = t->dev;
813 }
814 unregister_netdevice(dev);
815 err = 0;
816 break;
817
818 default:
819 err = -EINVAL;
820 }
821
822done:
823 return err;
824}
825EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
826
827int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
828{
829 struct ip_tunnel *tunnel = netdev_priv(dev);
830 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
831
832 if (new_mtu < 68 ||
833 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
834 return -EINVAL;
835 dev->mtu = new_mtu;
836 return 0;
837}
838EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
839
840static void ip_tunnel_dev_free(struct net_device *dev)
841{
842 struct ip_tunnel *tunnel = netdev_priv(dev);
843
844 gro_cells_destroy(&tunnel->gro_cells);
9a4aa9af 845 free_percpu(tunnel->dst_cache);
c5441932
PS
846 free_percpu(dev->tstats);
847 free_netdev(dev);
848}
849
850void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
851{
c5441932
PS
852 struct ip_tunnel *tunnel = netdev_priv(dev);
853 struct ip_tunnel_net *itn;
854
6c742e71 855 itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
c5441932
PS
856
857 if (itn->fb_tunnel_dev != dev) {
858 ip_tunnel_del(netdev_priv(dev));
859 unregister_netdevice_queue(dev, head);
860 }
861}
862EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
863
d3b6f614 864int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
c5441932
PS
865 struct rtnl_link_ops *ops, char *devname)
866{
867 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
868 struct ip_tunnel_parm parms;
6261d983 869 unsigned int i;
c5441932 870
6261d983 871 for (i = 0; i < IP_TNL_HASH_SIZE; i++)
872 INIT_HLIST_HEAD(&itn->tunnels[i]);
c5441932
PS
873
874 if (!ops) {
875 itn->fb_tunnel_dev = NULL;
876 return 0;
877 }
6261d983 878
c5441932
PS
879 memset(&parms, 0, sizeof(parms));
880 if (devname)
881 strlcpy(parms.name, devname, IFNAMSIZ);
882
883 rtnl_lock();
884 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
ea857f28
DC
885 /* FB netdevice is special: we have one, and only one per netns.
886 * Allowing to move it to another netns is clearly unsafe.
887 */
67013282 888 if (!IS_ERR(itn->fb_tunnel_dev)) {
b4de77ad 889 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
78ff4be4 890 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
67013282
SK
891 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
892 }
b4de77ad 893 rtnl_unlock();
c5441932 894
27d79f3b 895 return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
c5441932
PS
896}
897EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
898
6c742e71
ND
899static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
900 struct rtnl_link_ops *ops)
c5441932 901{
6c742e71
ND
902 struct net *net = dev_net(itn->fb_tunnel_dev);
903 struct net_device *dev, *aux;
c5441932
PS
904 int h;
905
6c742e71
ND
906 for_each_netdev_safe(net, dev, aux)
907 if (dev->rtnl_link_ops == ops)
908 unregister_netdevice_queue(dev, head);
909
c5441932
PS
910 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
911 struct ip_tunnel *t;
912 struct hlist_node *n;
913 struct hlist_head *thead = &itn->tunnels[h];
914
915 hlist_for_each_entry_safe(t, n, thead, hash_node)
6c742e71
ND
916 /* If dev is in the same netns, it has already
917 * been added to the list by the previous loop.
918 */
919 if (!net_eq(dev_net(t->dev), net))
920 unregister_netdevice_queue(t->dev, head);
c5441932 921 }
c5441932
PS
922}
923
6c742e71 924void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
c5441932
PS
925{
926 LIST_HEAD(list);
927
928 rtnl_lock();
6c742e71 929 ip_tunnel_destroy(itn, &list, ops);
c5441932
PS
930 unregister_netdevice_many(&list);
931 rtnl_unlock();
c5441932
PS
932}
933EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
934
935int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
936 struct ip_tunnel_parm *p)
937{
938 struct ip_tunnel *nt;
939 struct net *net = dev_net(dev);
940 struct ip_tunnel_net *itn;
941 int mtu;
942 int err;
943
944 nt = netdev_priv(dev);
945 itn = net_generic(net, nt->ip_tnl_net_id);
946
947 if (ip_tunnel_find(itn, p, dev->type))
948 return -EEXIST;
949
5e6700b3 950 nt->net = net;
c5441932
PS
951 nt->parms = *p;
952 err = register_netdevice(dev);
953 if (err)
954 goto out;
955
956 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
957 eth_hw_addr_random(dev);
958
959 mtu = ip_tunnel_bind_dev(dev);
960 if (!tb[IFLA_MTU])
961 dev->mtu = mtu;
962
963 ip_tunnel_add(itn, nt);
964
965out:
966 return err;
967}
968EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
969
970int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
971 struct ip_tunnel_parm *p)
972{
6c742e71 973 struct ip_tunnel *t;
c5441932 974 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 975 struct net *net = tunnel->net;
c5441932
PS
976 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
977
978 if (dev == itn->fb_tunnel_dev)
979 return -EINVAL;
980
c5441932
PS
981 t = ip_tunnel_find(itn, p, dev->type);
982
983 if (t) {
984 if (t->dev != dev)
985 return -EEXIST;
986 } else {
6c742e71 987 t = tunnel;
c5441932
PS
988
989 if (dev->type != ARPHRD_ETHER) {
990 unsigned int nflags = 0;
991
992 if (ipv4_is_multicast(p->iph.daddr))
993 nflags = IFF_BROADCAST;
994 else if (p->iph.daddr)
995 nflags = IFF_POINTOPOINT;
996
997 if ((dev->flags ^ nflags) &
998 (IFF_POINTOPOINT | IFF_BROADCAST))
999 return -EINVAL;
1000 }
1001 }
1002
1003 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1004 return 0;
1005}
1006EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1007
1008int ip_tunnel_init(struct net_device *dev)
1009{
1010 struct ip_tunnel *tunnel = netdev_priv(dev);
1011 struct iphdr *iph = &tunnel->parms.iph;
1c213bd2 1012 int err;
c5441932
PS
1013
1014 dev->destructor = ip_tunnel_dev_free;
1c213bd2 1015 dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
c5441932
PS
1016 if (!dev->tstats)
1017 return -ENOMEM;
1018
9a4aa9af
TH
1019 tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1020 if (!tunnel->dst_cache) {
1021 free_percpu(dev->tstats);
1022 return -ENOMEM;
1023 }
1024
c5441932
PS
1025 err = gro_cells_init(&tunnel->gro_cells, dev);
1026 if (err) {
9a4aa9af 1027 free_percpu(tunnel->dst_cache);
c5441932
PS
1028 free_percpu(dev->tstats);
1029 return err;
1030 }
1031
1032 tunnel->dev = dev;
6c742e71 1033 tunnel->net = dev_net(dev);
c5441932
PS
1034 strcpy(tunnel->parms.name, dev->name);
1035 iph->version = 4;
1036 iph->ihl = 5;
1037
1038 return 0;
1039}
1040EXPORT_SYMBOL_GPL(ip_tunnel_init);
1041
1042void ip_tunnel_uninit(struct net_device *dev)
1043{
c5441932 1044 struct ip_tunnel *tunnel = netdev_priv(dev);
6c742e71 1045 struct net *net = tunnel->net;
c5441932
PS
1046 struct ip_tunnel_net *itn;
1047
1048 itn = net_generic(net, tunnel->ip_tnl_net_id);
1049 /* fb_tunnel_dev will be unregisted in net-exit call. */
1050 if (itn->fb_tunnel_dev != dev)
1051 ip_tunnel_del(netdev_priv(dev));
7d442fab 1052
cf71d2bc 1053 ip_tunnel_dst_reset_all(tunnel);
c5441932
PS
1054}
1055EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1056
1057/* Do least required initialization, rest of init is done in tunnel_init call */
1058void ip_tunnel_setup(struct net_device *dev, int net_id)
1059{
1060 struct ip_tunnel *tunnel = netdev_priv(dev);
1061 tunnel->ip_tnl_net_id = net_id;
1062}
1063EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1064
1065MODULE_LICENSE("GPL");