]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - net/ipv4/ipip.c
UBUNTU: [Config] CONFIG_BCM2835_THERMAL=y
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4 3 *
1da177e4
LT
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27/* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 36
1da177e4
LT
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
e905a9ed 43
113aa838 44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
1da177e4
LT
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 53
1da177e4
LT
54*/
55
56/* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
e905a9ed 76 find out how much more space you can allocate by calling
1da177e4
LT
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87/*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
e905a9ed 93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/module.h>
96#include <linux/types.h>
1da177e4 97#include <linux/kernel.h>
5a0e3ad6 98#include <linux/slab.h>
7c0f6ba6 99#include <linux/uaccess.h>
1da177e4
LT
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
1da177e4
LT
106#include <linux/init.h>
107#include <linux/netfilter_ipv4.h>
46f25dff 108#include <linux/if_ether.h>
0b500f42 109#include <linux/inetdevice.h>
11cd9476 110#include <linux/rculist.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
c5441932 115#include <net/ip_tunnels.h>
1da177e4
LT
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
10dc4c7b
PE
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
cfc7381b 120#include <net/dst_metadata.h>
1da177e4 121
eccc1bb8 122static bool log_ecn_error = true;
123module_param(log_ecn_error, bool, 0644);
124MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
c7d03a00 126static unsigned int ipip_net_id __read_mostly;
10dc4c7b 127
3c97af99 128static int ipip_tunnel_init(struct net_device *dev);
0974658d 129static struct rtnl_link_ops ipip_link_ops __read_mostly;
1da177e4 130
d2acc347 131static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4 132{
1da177e4 133
071f92d0 134/* All the routers (except for Linux) return only
1da177e4
LT
135 8 bytes of packet payload. It means, that precise relaying of
136 ICMP in the real Internet is absolutely infeasible.
137 */
fd58156e
PS
138 struct net *net = dev_net(skb->dev);
139 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
b71d1d42 140 const struct iphdr *iph = (const struct iphdr *)skb->data;
1da177e4 141 struct ip_tunnel *t;
d2acc347 142 int err;
fd58156e
PS
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
1da177e4 145
d2acc347 146 err = -ENOENT;
fd58156e
PS
147 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
148 iph->daddr, iph->saddr, 0);
51456b29 149 if (!t)
36393395
DM
150 goto out;
151
152 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
153 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
1b69e7e6 154 t->parms.link, 0, iph->protocol, 0);
36393395
DM
155 err = 0;
156 goto out;
157 }
158
55be7a9c 159 if (type == ICMP_REDIRECT) {
2346829e 160 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
1b69e7e6 161 iph->protocol, 0);
55be7a9c
DM
162 err = 0;
163 goto out;
164 }
165
36393395 166 if (t->parms.iph.daddr == 0)
1da177e4 167 goto out;
d2acc347
HX
168
169 err = 0;
1da177e4
LT
170 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
171 goto out;
172
26d94b46 173 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
174 t->err_count++;
175 else
176 t->err_count = 1;
177 t->err_time = jiffies;
b0558ef2 178
fd58156e 179out:
d2acc347 180 return err;
1da177e4
LT
181}
182
1b69e7e6 183static const struct tnl_ptk_info ipip_tpi = {
fd58156e
PS
184 /* no tunnel info required for ipip. */
185 .proto = htons(ETH_P_IP),
186};
187
1b69e7e6
SH
188#if IS_ENABLED(CONFIG_MPLS)
189static const struct tnl_ptk_info mplsip_tpi = {
190 /* no tunnel info required for mplsip. */
191 .proto = htons(ETH_P_MPLS_UC),
192};
193#endif
194
195static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
1da177e4 196{
fd58156e
PS
197 struct net *net = dev_net(skb->dev);
198 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
cfc7381b 199 struct metadata_dst *tun_dst = NULL;
1da177e4 200 struct ip_tunnel *tunnel;
3d7b46cd 201 const struct iphdr *iph;
3c97af99 202
3d7b46cd 203 iph = ip_hdr(skb);
fd58156e
PS
204 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
205 iph->saddr, iph->daddr, 0);
206 if (tunnel) {
1b69e7e6
SH
207 const struct tnl_ptk_info *tpi;
208
209 if (tunnel->parms.iph.protocol != ipproto &&
210 tunnel->parms.iph.protocol != 0)
211 goto drop;
212
eccc1bb8 213 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
214 goto drop;
1b69e7e6
SH
215#if IS_ENABLED(CONFIG_MPLS)
216 if (ipproto == IPPROTO_MPLS)
217 tpi = &mplsip_tpi;
218 else
219#endif
220 tpi = &ipip_tpi;
221 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
737e828b 222 goto drop;
cfc7381b
AS
223 if (tunnel->collect_md) {
224 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
225 if (!tun_dst)
226 return 0;
227 }
228 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
1da177e4 229 }
1da177e4 230
1da177e4 231 return -1;
eccc1bb8 232
233drop:
234 kfree_skb(skb);
235 return 0;
1da177e4
LT
236}
237
1b69e7e6
SH
238static int ipip_rcv(struct sk_buff *skb)
239{
240 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
241}
242
243#if IS_ENABLED(CONFIG_MPLS)
244static int mplsip_rcv(struct sk_buff *skb)
245{
246 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
247}
248#endif
249
11cd9476 250static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
0b500f42 251{
11cd9476
JV
252 struct ip_fan_map *fan_map;
253
254 rcu_read_lock();
255 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
256 if (fan_map->overlay ==
257 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
258 rcu_read_unlock();
259 return fan_map;
260 }
261 }
262 rcu_read_unlock();
263
264 return NULL;
0b500f42
JV
265}
266
11cd9476
JV
267/* Determine fan tunnel endpoint to send packet to, based on the inner IP
268 * address.
269 *
270 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
271 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
272 * two octets of the underlay network (the network portion of a /16), "A"
273 * and "B" are the low order two octets of the underlay network host (the
274 * host portion of a /16), and "Y" is a configured first octet of the
275 * overlay network.
276 *
277 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
278 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
279 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
280 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
281 * described in detail further below.
282 *
283 * Using netmasks for the overlay and underlay other than /8 and /16, as
284 * shown above, can yield larger (or smaller) overlay subnets, with the
285 * trade-off of allowing fewer (or more) underlay hosts to participate.
286 *
287 * The size of each overlay network subnet is defined by the total of the
288 * network mask of the overlay plus the size of host portion of the
289 * underlay network. In the above example, /8 + /16 = /24.
290 *
291 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
292 * this case, the network portion of the underlay is 10.99.224.0/20, and
293 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
294 * network subnet, the 12 bits of host portion are left shifted 12 bits
295 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
296 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
297 * 12 bits underlay. This yields 12 bits in the overlay network portion,
298 * allowing for 4094 addresses in each overlay network subnet. The
299 * trade-off is that fewer hosts may participate in the underlay network,
300 * as its host address size has shrunk from 16 bits (65534 addresses) in
301 * the first example to 12 bits (4094 addresses) here.
302 *
303 * For fewer hosts per overlay subnet (permitting a larger number of
304 * underlay hosts to participate), the underlay netmask may be made
305 * smaller.
306 *
307 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
308 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
309 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
310 * the lowest order bit of the /8 overlay). This yields an overlay subnet
311 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
312 * the underlay). This provides more addresses for the underlay network
313 * (approximately 2^20), but each host's segment of the overlay provides
314 * only 4 bits of addresses (14 usable).
315 *
316 * It is also possible to adjust the overlay subnet.
317 *
318 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
319 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
320 * shifted 15 bits (/20 - /5), yielding an overlay network of
321 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
322 * overlay network of 242.107.128.0/17.
323 *
324 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
325 * underlay host 10.224.220.10, the underlay host portion (.10) is left
326 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
327 * This would permit 254 addresses on the underlay, with each overlay
328 * segment providing approximately 2^14 - 2 addresses (16382).
329 *
330 * For packets being encapsulated, the overlay network destination IP
331 * address is deconstructed into its overlay and underlay-derived
332 * portions. The underlay portion (determined by the overlay mask and
333 * overlay subnet mask) is right shifted according to the size of the
334 * underlay network mask. This value is then ORed with the network
335 * portion of the underlay network to produce the underlay network
336 * destination for the encapsulated datagram.
337 *
338 * For example, using the initial example of underlay 10.88.3.4/16 and
339 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
340 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
341 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
342 * of the address extracted. This is a number of bits equal to underlay
343 * network host portion. In the destination address, the highest order of
344 * these bits is one bit lower than the lowest order bit from the overlay
345 * network mask.
346 *
347 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
348 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
349 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
350 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
351 * which is 1 bit lower than the lowest order overlay address bit).
0b500f42 352 *
11cd9476
JV
353 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
354 * This value is then ORed with the underlay network portion,
355 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
356 * the encapuslated datagram.
357 *
358 * Another transform using the final example: overlay 100.64.0.0/10 and
359 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
360 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
361 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
362 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
363 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
364 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
365 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
366 * the underlay destination of 10.224.220.11 for overlay destination
367 * 100.66.200.5.
0b500f42
JV
368 */
369static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
370{
11cd9476 371 struct ip_fan_map *f_map;
0b500f42
JV
372 u32 daddr, underlay;
373
11cd9476
JV
374 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
375 if (!f_map)
376 return -ENOENT;
377
0b500f42 378 daddr = ntohl(ip_hdr(skb)->daddr);
11cd9476 379 underlay = ntohl(f_map->underlay);
0b500f42
JV
380 if (!underlay)
381 return -EINVAL;
382
383 *iph = tunnel->parms.iph;
11cd9476
JV
384 iph->daddr = htonl(underlay |
385 ((daddr & ~f_map->overlay_mask) >>
386 (32 - f_map->overlay_prefix -
387 (32 - f_map->underlay_prefix))));
0b500f42
JV
388 return 0;
389}
390
1da177e4
LT
391/*
392 * This function assumes it is being called from dev_queue_xmit()
393 * and that skb is filled properly by that function.
394 */
1b69e7e6
SH
395static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
396 struct net_device *dev)
1da177e4 397{
2941a486 398 struct ip_tunnel *tunnel = netdev_priv(dev);
b71d1d42 399 const struct iphdr *tiph = &tunnel->parms.iph;
1b69e7e6 400 u8 ipproto;
0b500f42 401 struct iphdr fiph;
1b69e7e6
SH
402
403 switch (skb->protocol) {
404 case htons(ETH_P_IP):
405 ipproto = IPPROTO_IPIP;
406 break;
407#if IS_ENABLED(CONFIG_MPLS)
408 case htons(ETH_P_MPLS_UC):
409 ipproto = IPPROTO_MPLS;
410 break;
411#endif
412 default:
413 goto tx_error;
414 }
1da177e4 415
1b69e7e6 416 if (tiph->protocol != ipproto && tiph->protocol != 0)
1da177e4 417 goto tx_error;
1da177e4 418
7e13318d 419 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
aed069df 420 goto tx_error;
8344bfc6 421
11cd9476 422 if (fan_has_map(&tunnel->fan)) {
0b500f42
JV
423 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
424 goto tx_error;
425 tiph = &fiph;
426 } else {
427 tiph = &tunnel->parms.iph;
428 }
429
1b69e7e6 430 skb_set_inner_ipproto(skb, ipproto);
077c5a09 431
cfc7381b
AS
432 if (tunnel->collect_md)
433 ip_md_tunnel_xmit(skb, dev, ipproto);
434 else
435 ip_tunnel_xmit(skb, dev, tiph, ipproto);
6ed10654 436 return NETDEV_TX_OK;
1da177e4 437
1da177e4 438tx_error:
3acfa1e7 439 kfree_skb(skb);
aed069df 440
cb32f511 441 dev->stats.tx_errors++;
6ed10654 442 return NETDEV_TX_OK;
1da177e4
LT
443}
444
1b69e7e6
SH
445static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
446{
447 switch (ipproto) {
448 case 0:
449 case IPPROTO_IPIP:
450#if IS_ENABLED(CONFIG_MPLS)
451 case IPPROTO_MPLS:
452#endif
453 return true;
454 }
455
456 return false;
457}
458
1da177e4 459static int
fd58156e 460ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1da177e4
LT
461{
462 int err = 0;
463 struct ip_tunnel_parm p;
1da177e4 464
fd58156e
PS
465 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
466 return -EFAULT;
1da177e4 467
3b7b514f 468 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
1b69e7e6
SH
469 if (p.iph.version != 4 ||
470 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
3b7b514f
CW
471 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
472 return -EINVAL;
473 }
474
252a8fbe
ED
475 p.i_key = p.o_key = 0;
476 p.i_flags = p.o_flags = 0;
fd58156e
PS
477 err = ip_tunnel_ioctl(dev, &p, cmd);
478 if (err)
479 return err;
480
481 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
482 return -EFAULT;
483
1da177e4
LT
484 return 0;
485}
486
23a12b14 487static const struct net_device_ops ipip_netdev_ops = {
fd58156e
PS
488 .ndo_init = ipip_tunnel_init,
489 .ndo_uninit = ip_tunnel_uninit,
23a12b14
SH
490 .ndo_start_xmit = ipip_tunnel_xmit,
491 .ndo_do_ioctl = ipip_tunnel_ioctl,
fd58156e
PS
492 .ndo_change_mtu = ip_tunnel_change_mtu,
493 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 494 .ndo_get_iflink = ip_tunnel_get_iflink,
23a12b14
SH
495};
496
c3b89fbb
ED
497#define IPIP_FEATURES (NETIF_F_SG | \
498 NETIF_F_FRAGLIST | \
499 NETIF_F_HIGHDMA | \
cb32f511 500 NETIF_F_GSO_SOFTWARE | \
c3b89fbb
ED
501 NETIF_F_HW_CSUM)
502
1da177e4
LT
503static void ipip_tunnel_setup(struct net_device *dev)
504{
11cd9476
JV
505 struct ip_tunnel *t = netdev_priv(dev);
506
23a12b14 507 dev->netdev_ops = &ipip_netdev_ops;
1da177e4
LT
508
509 dev->type = ARPHRD_TUNNEL;
1da177e4 510 dev->flags = IFF_NOARP;
1da177e4 511 dev->addr_len = 4;
153f0943 512 dev->features |= NETIF_F_LLTX;
02875878 513 netif_keep_dst(dev);
c3b89fbb
ED
514
515 dev->features |= IPIP_FEATURES;
516 dev->hw_features |= IPIP_FEATURES;
fd58156e 517 ip_tunnel_setup(dev, ipip_net_id);
11cd9476 518 INIT_LIST_HEAD(&t->fan.fan_maps);
1da177e4
LT
519}
520
3c97af99 521static int ipip_tunnel_init(struct net_device *dev)
1da177e4 522{
23a12b14 523 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 524
1da177e4
LT
525 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
526 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
527
473ab820
TH
528 tunnel->tun_hlen = 0;
529 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
fd58156e 530 return ip_tunnel_init(dev);
1da177e4
LT
531}
532
a8b8a889
MS
533static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
534 struct netlink_ext_ack *extack)
1b69e7e6
SH
535{
536 u8 proto;
537
538 if (!data || !data[IFLA_IPTUN_PROTO])
539 return 0;
540
541 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
542 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
543 return -EINVAL;
544
545 return 0;
546}
547
be42da0e 548static void ipip_netlink_parms(struct nlattr *data[],
9830ad4c
CG
549 struct ip_tunnel_parm *parms, bool *collect_md,
550 __u32 *fwmark)
be42da0e
ND
551{
552 memset(parms, 0, sizeof(*parms));
553
554 parms->iph.version = 4;
555 parms->iph.protocol = IPPROTO_IPIP;
556 parms->iph.ihl = 5;
cfc7381b 557 *collect_md = false;
be42da0e
ND
558
559 if (!data)
560 return;
561
562 if (data[IFLA_IPTUN_LINK])
563 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
564
565 if (data[IFLA_IPTUN_LOCAL])
67b61f6c 566 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
be42da0e
ND
567
568 if (data[IFLA_IPTUN_REMOTE])
67b61f6c 569 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
be42da0e
ND
570
571 if (data[IFLA_IPTUN_TTL]) {
572 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
573 if (parms->iph.ttl)
574 parms->iph.frag_off = htons(IP_DF);
575 }
576
577 if (data[IFLA_IPTUN_TOS])
578 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
579
1b69e7e6
SH
580 if (data[IFLA_IPTUN_PROTO])
581 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
582
be42da0e
ND
583 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
584 parms->iph.frag_off = htons(IP_DF);
cfc7381b
AS
585
586 if (data[IFLA_IPTUN_COLLECT_METADATA])
587 *collect_md = true;
9830ad4c
CG
588
589 if (data[IFLA_IPTUN_FWMARK])
590 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
be42da0e
ND
591}
592
473ab820
TH
593/* This function returns true when ENCAP attributes are present in the nl msg */
594static bool ipip_netlink_encap_parms(struct nlattr *data[],
595 struct ip_tunnel_encap *ipencap)
596{
597 bool ret = false;
598
599 memset(ipencap, 0, sizeof(*ipencap));
600
601 if (!data)
602 return ret;
603
604 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
605 ret = true;
606 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
607 }
608
609 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
610 ret = true;
611 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
612 }
613
614 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
615 ret = true;
3e97fa70 616 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
473ab820
TH
617 }
618
619 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
620 ret = true;
3e97fa70 621 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
473ab820
TH
622 }
623
624 return ret;
625}
626
11cd9476 627static void ipip_fan_flush_map(struct ip_tunnel *t)
0b500f42 628{
11cd9476
JV
629 struct ip_fan_map *fan_map;
630
631 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
632 list_del_rcu(&fan_map->list);
633 kfree_rcu(fan_map, rcu);
634 }
0b500f42
JV
635}
636
11cd9476 637static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
0b500f42 638{
11cd9476 639 struct ip_fan_map *fan_map;
0b500f42 640
11cd9476
JV
641 fan_map = ipip_fan_find_map(t, overlay);
642 if (!fan_map)
643 return -ENOENT;
644
645 list_del_rcu(&fan_map->list);
646 kfree_rcu(fan_map, rcu);
0b500f42 647
11cd9476
JV
648 return 0;
649}
0b500f42 650
11cd9476
JV
651static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
652{
653 __be32 overlay_mask, underlay_mask;
654 struct ip_fan_map *fan_map;
0b500f42 655
11cd9476
JV
656 overlay_mask = inet_make_mask(map->overlay_prefix);
657 underlay_mask = inet_make_mask(map->underlay_prefix);
0b500f42 658
11cd9476 659 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
0b500f42
JV
660 return -EINVAL;
661
11cd9476
JV
662 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
663 return -EINVAL;
0b500f42 664
11cd9476
JV
665 /* Special case: overlay 0 and underlay 0: flush all mappings */
666 if (!map->overlay && !map->underlay) {
667 ipip_fan_flush_map(t);
0b500f42
JV
668 return 0;
669 }
11cd9476
JV
670
671 /* Special case: overlay set and underlay 0: clear map for overlay */
672 if (!map->underlay)
673 return ipip_fan_del_map(t, map->overlay);
674
675 if (ipip_fan_find_map(t, map->overlay))
676 return -EEXIST;
677
678 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
679 fan_map->underlay = map->underlay;
680 fan_map->overlay = map->overlay;
681 fan_map->underlay_prefix = map->underlay_prefix;
682 fan_map->overlay_mask = ntohl(overlay_mask);
683 fan_map->overlay_prefix = map->overlay_prefix;
0b500f42 684
11cd9476 685 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
0b500f42
JV
686
687 return 0;
688}
689
690
691static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
692 struct ip_tunnel_parm *parms)
693{
11cd9476 694 struct ifla_fan_map *map;
0b500f42
JV
695 struct nlattr *attr;
696 int rem, rv;
697
698 if (!data[IFLA_IPTUN_FAN_MAP])
699 return 0;
700
701 if (parms->iph.daddr)
702 return -EINVAL;
703
704 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
705 map = nla_data(attr);
11cd9476 706 rv = ipip_fan_add_map(t, map);
0b500f42
JV
707 if (rv)
708 return rv;
709 }
710
711 return 0;
712}
713
be42da0e 714static int ipip_newlink(struct net *src_net, struct net_device *dev,
7a3f4a18
MS
715 struct nlattr *tb[], struct nlattr *data[],
716 struct netlink_ext_ack *extack)
be42da0e 717{
cfc7381b 718 struct ip_tunnel *t = netdev_priv(dev);
fd58156e 719 struct ip_tunnel_parm p;
473ab820 720 struct ip_tunnel_encap ipencap;
9830ad4c 721 __u32 fwmark = 0;
0b500f42 722 int err;
473ab820
TH
723
724 if (ipip_netlink_encap_parms(data, &ipencap)) {
0b500f42 725 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
726
727 if (err < 0)
728 return err;
729 }
be42da0e 730
9830ad4c 731 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
0b500f42
JV
732 err = ipip_netlink_fan(data, t, &p);
733 if (err < 0)
734 return err;
9830ad4c 735 return ip_tunnel_newlink(dev, tb, &p, fwmark);
be42da0e
ND
736}
737
738static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
ad744b22
MS
739 struct nlattr *data[],
740 struct netlink_ext_ack *extack)
be42da0e 741{
9830ad4c 742 struct ip_tunnel *t = netdev_priv(dev);
be42da0e 743 struct ip_tunnel_parm p;
473ab820 744 struct ip_tunnel_encap ipencap;
cfc7381b 745 bool collect_md;
9830ad4c 746 __u32 fwmark = t->fwmark;
0b500f42 747 int err;
473ab820
TH
748
749 if (ipip_netlink_encap_parms(data, &ipencap)) {
0b500f42 750 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
751
752 if (err < 0)
753 return err;
754 }
be42da0e 755
9830ad4c 756 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
cfc7381b
AS
757 if (collect_md)
758 return -EINVAL;
0b500f42
JV
759 err = ipip_netlink_fan(data, t, &p);
760 if (err < 0)
761 return err;
be42da0e
ND
762
763 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
764 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
765 return -EINVAL;
766
9830ad4c 767 return ip_tunnel_changelink(dev, tb, &p, fwmark);
be42da0e
ND
768}
769
0974658d
ND
770static size_t ipip_get_size(const struct net_device *dev)
771{
772 return
773 /* IFLA_IPTUN_LINK */
774 nla_total_size(4) +
775 /* IFLA_IPTUN_LOCAL */
776 nla_total_size(4) +
777 /* IFLA_IPTUN_REMOTE */
778 nla_total_size(4) +
779 /* IFLA_IPTUN_TTL */
780 nla_total_size(1) +
781 /* IFLA_IPTUN_TOS */
782 nla_total_size(1) +
1b69e7e6
SH
783 /* IFLA_IPTUN_PROTO */
784 nla_total_size(1) +
befe2aa1
ND
785 /* IFLA_IPTUN_PMTUDISC */
786 nla_total_size(1) +
473ab820
TH
787 /* IFLA_IPTUN_ENCAP_TYPE */
788 nla_total_size(2) +
789 /* IFLA_IPTUN_ENCAP_FLAGS */
790 nla_total_size(2) +
791 /* IFLA_IPTUN_ENCAP_SPORT */
792 nla_total_size(2) +
793 /* IFLA_IPTUN_ENCAP_DPORT */
794 nla_total_size(2) +
cfc7381b
AS
795 /* IFLA_IPTUN_COLLECT_METADATA */
796 nla_total_size(0) +
9830ad4c
CG
797 /* IFLA_IPTUN_FWMARK */
798 nla_total_size(4) +
0b500f42 799 /* IFLA_IPTUN_FAN_MAP */
11cd9476 800 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
0974658d
ND
801 0;
802}
803
804static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
805{
806 struct ip_tunnel *tunnel = netdev_priv(dev);
807 struct ip_tunnel_parm *parm = &tunnel->parms;
808
809 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
930345ea
JB
810 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
811 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
0974658d 812 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
befe2aa1 813 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1b69e7e6 814 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
befe2aa1 815 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
9830ad4c
CG
816 !!(parm->iph.frag_off & htons(IP_DF))) ||
817 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
0974658d 818 goto nla_put_failure;
473ab820
TH
819
820 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
821 tunnel->encap.type) ||
3e97fa70
SD
822 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
823 tunnel->encap.sport) ||
824 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
825 tunnel->encap.dport) ||
473ab820 826 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
e1b2cb65 827 tunnel->encap.flags))
473ab820
TH
828 goto nla_put_failure;
829
cfc7381b
AS
830 if (tunnel->collect_md)
831 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
832 goto nla_put_failure;
11cd9476 833 if (fan_has_map(&tunnel->fan)) {
0b500f42 834 struct nlattr *fan_nest;
11cd9476 835 struct ip_fan_map *fan_map;
0b500f42
JV
836
837 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
838 if (!fan_nest)
839 goto nla_put_failure;
11cd9476
JV
840 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
841 struct ifla_fan_map map;
842
843 map.underlay = fan_map->underlay;
844 map.underlay_prefix = fan_map->underlay_prefix;
845 map.overlay = fan_map->overlay;
846 map.overlay_prefix = fan_map->overlay_prefix;
847 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
848 goto nla_put_failure;
0b500f42
JV
849 }
850 nla_nest_end(skb, fan_nest);
851 }
852
0974658d
ND
853 return 0;
854
855nla_put_failure:
856 return -EMSGSIZE;
857}
858
be42da0e
ND
859static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
860 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
861 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
862 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
863 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
864 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1b69e7e6 865 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
be42da0e 866 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
473ab820
TH
867 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
868 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
869 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
870 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
cfc7381b 871 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
9830ad4c 872 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
0b500f42
JV
873
874 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
875 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
be42da0e
ND
876};
877
0974658d
ND
878static struct rtnl_link_ops ipip_link_ops __read_mostly = {
879 .kind = "ipip",
880 .maxtype = IFLA_IPTUN_MAX,
be42da0e 881 .policy = ipip_policy,
0974658d 882 .priv_size = sizeof(struct ip_tunnel),
be42da0e 883 .setup = ipip_tunnel_setup,
1b69e7e6 884 .validate = ipip_tunnel_validate,
be42da0e
ND
885 .newlink = ipip_newlink,
886 .changelink = ipip_changelink,
fd58156e 887 .dellink = ip_tunnel_dellink,
0974658d
ND
888 .get_size = ipip_get_size,
889 .fill_info = ipip_fill_info,
1728d4fa 890 .get_link_net = ip_tunnel_get_link_net,
0974658d
ND
891};
892
6dcd814b 893static struct xfrm_tunnel ipip_handler __read_mostly = {
1da177e4
LT
894 .handler = ipip_rcv,
895 .err_handler = ipip_err,
d2acc347 896 .priority = 1,
1da177e4
LT
897};
898
1b69e7e6
SH
899#if IS_ENABLED(CONFIG_MPLS)
900static struct xfrm_tunnel mplsip_handler __read_mostly = {
901 .handler = mplsip_rcv,
902 .err_handler = ipip_err,
903 .priority = 1,
904};
905#endif
906
2c8c1e72 907static int __net_init ipip_init_net(struct net *net)
10dc4c7b 908{
fd58156e 909 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
10dc4c7b
PE
910}
911
2c8c1e72 912static void __net_exit ipip_exit_net(struct net *net)
10dc4c7b 913{
fd58156e 914 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
6c742e71 915 ip_tunnel_delete_net(itn, &ipip_link_ops);
10dc4c7b
PE
916}
917
918static struct pernet_operations ipip_net_ops = {
919 .init = ipip_init_net,
920 .exit = ipip_exit_net,
86de8a63 921 .id = &ipip_net_id,
fd58156e 922 .size = sizeof(struct ip_tunnel_net),
10dc4c7b
PE
923};
924
0b500f42
JV
925#ifdef CONFIG_SYSCTL
926static struct ctl_table_header *ipip_fan_header;
927static unsigned int ipip_fan_version = 3;
928
929static struct ctl_table ipip_fan_sysctls[] = {
930 {
931 .procname = "version",
932 .data = &ipip_fan_version,
933 .maxlen = sizeof(ipip_fan_version),
934 .mode = 0444,
935 .proc_handler = proc_dointvec,
936 },
937 {},
938};
939
940#endif /* CONFIG_SYSCTL */
941
1da177e4
LT
942static int __init ipip_init(void)
943{
944 int err;
945
1b69e7e6 946 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
1da177e4 947
d5aa407f
AD
948 err = register_pernet_device(&ipip_net_ops);
949 if (err < 0)
950 return err;
951 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
952 if (err < 0) {
058bd4d2 953 pr_info("%s: can't register tunnel\n", __func__);
1b69e7e6
SH
954 goto xfrm_tunnel_ipip_failed;
955 }
956#if IS_ENABLED(CONFIG_MPLS)
957 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
958 if (err < 0) {
959 pr_info("%s: can't register tunnel\n", __func__);
960 goto xfrm_tunnel_mplsip_failed;
1da177e4 961 }
1b69e7e6 962#endif
0974658d
ND
963 err = rtnl_link_register(&ipip_link_ops);
964 if (err < 0)
965 goto rtnl_link_failed;
966
0b500f42
JV
967#ifdef CONFIG_SYSCTL
968 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
969 ipip_fan_sysctls);
970 if (!ipip_fan_header) {
971 err = -ENOMEM;
972 goto sysctl_failed;
973 }
974#endif /* CONFIG_SYSCTL */
975
0974658d 976out:
1da177e4 977 return err;
0974658d 978
0b500f42
JV
979#ifdef CONFIG_SYSCTL
980sysctl_failed:
981 rtnl_link_unregister(&ipip_link_ops);
982#endif /* CONFIG_SYSCTL */
0974658d 983rtnl_link_failed:
1b69e7e6
SH
984#if IS_ENABLED(CONFIG_MPLS)
985 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
986xfrm_tunnel_mplsip_failed:
987
988#endif
0974658d 989 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1b69e7e6 990xfrm_tunnel_ipip_failed:
0974658d
ND
991 unregister_pernet_device(&ipip_net_ops);
992 goto out;
1da177e4
LT
993}
994
995static void __exit ipip_fini(void)
996{
0b500f42
JV
997#ifdef CONFIG_SYSCTL
998 unregister_net_sysctl_table(ipip_fan_header);
999#endif /* CONFIG_SYSCTL */
0974658d 1000 rtnl_link_unregister(&ipip_link_ops);
c0d56408 1001 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
058bd4d2 1002 pr_info("%s: can't deregister tunnel\n", __func__);
1b69e7e6
SH
1003#if IS_ENABLED(CONFIG_MPLS)
1004 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1005 pr_info("%s: can't deregister tunnel\n", __func__);
1006#endif
86de8a63 1007 unregister_pernet_device(&ipip_net_ops);
1da177e4
LT
1008}
1009
1010module_init(ipip_init);
1011module_exit(ipip_fini);
1012MODULE_LICENSE("GPL");
f98f89a0 1013MODULE_ALIAS_RTNL_LINK("ipip");
8909c9ad 1014MODULE_ALIAS_NETDEV("tunl0");