]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/ipip.c
cpufreq: CPPC: Don't set transition_latency
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4 3 *
1da177e4
LT
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27/* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 36
1da177e4
LT
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
e905a9ed 43
113aa838 44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
1da177e4
LT
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 53
1da177e4
LT
54*/
55
56/* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
e905a9ed 76 find out how much more space you can allocate by calling
1da177e4
LT
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87/*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
e905a9ed 93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/module.h>
96#include <linux/types.h>
1da177e4 97#include <linux/kernel.h>
5a0e3ad6 98#include <linux/slab.h>
7c0f6ba6 99#include <linux/uaccess.h>
1da177e4
LT
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
1da177e4
LT
106#include <linux/init.h>
107#include <linux/netfilter_ipv4.h>
46f25dff 108#include <linux/if_ether.h>
55ff02b2 109#include <linux/inetdevice.h>
d57420a1 110#include <linux/rculist.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
c5441932 115#include <net/ip_tunnels.h>
1da177e4
LT
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
10dc4c7b
PE
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
cfc7381b 120#include <net/dst_metadata.h>
1da177e4 121
eccc1bb8 122static bool log_ecn_error = true;
123module_param(log_ecn_error, bool, 0644);
124MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
c7d03a00 126static unsigned int ipip_net_id __read_mostly;
10dc4c7b 127
3c97af99 128static int ipip_tunnel_init(struct net_device *dev);
0974658d 129static struct rtnl_link_ops ipip_link_ops __read_mostly;
1da177e4 130
d2acc347 131static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4 132{
f3594f0a
XL
133 /* All the routers (except for Linux) return only
134 * 8 bytes of packet payload. It means, that precise relaying of
135 * ICMP in the real Internet is absolutely infeasible.
136 */
fd58156e
PS
137 struct net *net = dev_net(skb->dev);
138 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
b71d1d42 139 const struct iphdr *iph = (const struct iphdr *)skb->data;
fd58156e
PS
140 const int type = icmp_hdr(skb)->type;
141 const int code = icmp_hdr(skb)->code;
f3594f0a
XL
142 struct ip_tunnel *t;
143 int err = 0;
144
145 switch (type) {
146 case ICMP_DEST_UNREACH:
147 switch (code) {
148 case ICMP_SR_FAILED:
149 /* Impossible event. */
150 goto out;
151 default:
152 /* All others are translated to HOST_UNREACH.
153 * rfc2003 contains "deep thoughts" about NET_UNREACH,
154 * I believe they are just ether pollution. --ANK
155 */
156 break;
157 }
158 break;
159
160 case ICMP_TIME_EXCEEDED:
161 if (code != ICMP_EXC_TTL)
162 goto out;
163 break;
164
165 case ICMP_REDIRECT:
166 break;
167
168 default:
169 goto out;
170 }
1da177e4 171
fd58156e
PS
172 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
173 iph->daddr, iph->saddr, 0);
f3594f0a
XL
174 if (!t) {
175 err = -ENOENT;
36393395 176 goto out;
f3594f0a 177 }
36393395
DM
178
179 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
f3594f0a
XL
180 ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
181 iph->protocol, 0);
36393395
DM
182 goto out;
183 }
184
55be7a9c 185 if (type == ICMP_REDIRECT) {
f3594f0a 186 ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
55be7a9c
DM
187 goto out;
188 }
189
f3594f0a
XL
190 if (t->parms.iph.daddr == 0) {
191 err = -ENOENT;
1da177e4 192 goto out;
f3594f0a 193 }
d2acc347 194
1da177e4
LT
195 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
196 goto out;
197
26d94b46 198 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
199 t->err_count++;
200 else
201 t->err_count = 1;
202 t->err_time = jiffies;
b0558ef2 203
fd58156e 204out:
d2acc347 205 return err;
1da177e4
LT
206}
207
1b69e7e6 208static const struct tnl_ptk_info ipip_tpi = {
fd58156e
PS
209 /* no tunnel info required for ipip. */
210 .proto = htons(ETH_P_IP),
211};
212
1b69e7e6
SH
213#if IS_ENABLED(CONFIG_MPLS)
214static const struct tnl_ptk_info mplsip_tpi = {
215 /* no tunnel info required for mplsip. */
216 .proto = htons(ETH_P_MPLS_UC),
217};
218#endif
219
220static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
1da177e4 221{
fd58156e
PS
222 struct net *net = dev_net(skb->dev);
223 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
cfc7381b 224 struct metadata_dst *tun_dst = NULL;
1da177e4 225 struct ip_tunnel *tunnel;
3d7b46cd 226 const struct iphdr *iph;
3c97af99 227
3d7b46cd 228 iph = ip_hdr(skb);
fd58156e
PS
229 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
230 iph->saddr, iph->daddr, 0);
231 if (tunnel) {
1b69e7e6
SH
232 const struct tnl_ptk_info *tpi;
233
234 if (tunnel->parms.iph.protocol != ipproto &&
235 tunnel->parms.iph.protocol != 0)
236 goto drop;
237
eccc1bb8 238 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
239 goto drop;
1b69e7e6
SH
240#if IS_ENABLED(CONFIG_MPLS)
241 if (ipproto == IPPROTO_MPLS)
242 tpi = &mplsip_tpi;
243 else
244#endif
245 tpi = &ipip_tpi;
246 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
737e828b 247 goto drop;
cfc7381b
AS
248 if (tunnel->collect_md) {
249 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
250 if (!tun_dst)
251 return 0;
252 }
253 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
1da177e4 254 }
1da177e4 255
1da177e4 256 return -1;
eccc1bb8 257
258drop:
259 kfree_skb(skb);
260 return 0;
1da177e4
LT
261}
262
1b69e7e6
SH
263static int ipip_rcv(struct sk_buff *skb)
264{
265 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
266}
267
268#if IS_ENABLED(CONFIG_MPLS)
269static int mplsip_rcv(struct sk_buff *skb)
270{
271 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
272}
273#endif
274
d57420a1 275static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
55ff02b2 276{
d57420a1
JV
277 struct ip_fan_map *fan_map;
278
279 rcu_read_lock();
280 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
281 if (fan_map->overlay ==
282 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
283 rcu_read_unlock();
284 return fan_map;
285 }
286 }
287 rcu_read_unlock();
288
289 return NULL;
55ff02b2
JV
290}
291
d57420a1
JV
292/* Determine fan tunnel endpoint to send packet to, based on the inner IP
293 * address.
294 *
295 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
296 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
297 * two octets of the underlay network (the network portion of a /16), "A"
298 * and "B" are the low order two octets of the underlay network host (the
299 * host portion of a /16), and "Y" is a configured first octet of the
300 * overlay network.
301 *
302 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
303 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
304 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
305 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
306 * described in detail further below.
307 *
308 * Using netmasks for the overlay and underlay other than /8 and /16, as
309 * shown above, can yield larger (or smaller) overlay subnets, with the
310 * trade-off of allowing fewer (or more) underlay hosts to participate.
311 *
312 * The size of each overlay network subnet is defined by the total of the
313 * network mask of the overlay plus the size of host portion of the
314 * underlay network. In the above example, /8 + /16 = /24.
315 *
316 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
317 * this case, the network portion of the underlay is 10.99.224.0/20, and
318 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
319 * network subnet, the 12 bits of host portion are left shifted 12 bits
320 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
321 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
322 * 12 bits underlay. This yields 12 bits in the overlay network portion,
323 * allowing for 4094 addresses in each overlay network subnet. The
324 * trade-off is that fewer hosts may participate in the underlay network,
325 * as its host address size has shrunk from 16 bits (65534 addresses) in
326 * the first example to 12 bits (4094 addresses) here.
327 *
328 * For fewer hosts per overlay subnet (permitting a larger number of
329 * underlay hosts to participate), the underlay netmask may be made
330 * smaller.
331 *
332 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
333 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
334 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
335 * the lowest order bit of the /8 overlay). This yields an overlay subnet
336 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
337 * the underlay). This provides more addresses for the underlay network
338 * (approximately 2^20), but each host's segment of the overlay provides
339 * only 4 bits of addresses (14 usable).
340 *
341 * It is also possible to adjust the overlay subnet.
342 *
343 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
344 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
345 * shifted 15 bits (/20 - /5), yielding an overlay network of
346 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
347 * overlay network of 242.107.128.0/17.
348 *
349 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
350 * underlay host 10.224.220.10, the underlay host portion (.10) is left
351 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
352 * This would permit 254 addresses on the underlay, with each overlay
353 * segment providing approximately 2^14 - 2 addresses (16382).
354 *
355 * For packets being encapsulated, the overlay network destination IP
356 * address is deconstructed into its overlay and underlay-derived
357 * portions. The underlay portion (determined by the overlay mask and
358 * overlay subnet mask) is right shifted according to the size of the
359 * underlay network mask. This value is then ORed with the network
360 * portion of the underlay network to produce the underlay network
361 * destination for the encapsulated datagram.
362 *
363 * For example, using the initial example of underlay 10.88.3.4/16 and
364 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
365 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
366 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
367 * of the address extracted. This is a number of bits equal to underlay
368 * network host portion. In the destination address, the highest order of
369 * these bits is one bit lower than the lowest order bit from the overlay
370 * network mask.
371 *
372 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
373 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
374 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
375 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
376 * which is 1 bit lower than the lowest order overlay address bit).
55ff02b2 377 *
d57420a1
JV
378 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
379 * This value is then ORed with the underlay network portion,
380 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
381 * the encapuslated datagram.
382 *
383 * Another transform using the final example: overlay 100.64.0.0/10 and
384 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
385 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
386 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
387 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
388 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
389 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
390 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
391 * the underlay destination of 10.224.220.11 for overlay destination
392 * 100.66.200.5.
55ff02b2
JV
393 */
394static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
395{
d57420a1 396 struct ip_fan_map *f_map;
55ff02b2
JV
397 u32 daddr, underlay;
398
d57420a1
JV
399 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
400 if (!f_map)
401 return -ENOENT;
402
55ff02b2 403 daddr = ntohl(ip_hdr(skb)->daddr);
d57420a1 404 underlay = ntohl(f_map->underlay);
55ff02b2
JV
405 if (!underlay)
406 return -EINVAL;
407
408 *iph = tunnel->parms.iph;
d57420a1
JV
409 iph->daddr = htonl(underlay |
410 ((daddr & ~f_map->overlay_mask) >>
411 (32 - f_map->overlay_prefix -
412 (32 - f_map->underlay_prefix))));
55ff02b2
JV
413 return 0;
414}
415
1da177e4
LT
416/*
417 * This function assumes it is being called from dev_queue_xmit()
418 * and that skb is filled properly by that function.
419 */
1b69e7e6
SH
420static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
421 struct net_device *dev)
1da177e4 422{
2941a486 423 struct ip_tunnel *tunnel = netdev_priv(dev);
b71d1d42 424 const struct iphdr *tiph = &tunnel->parms.iph;
1b69e7e6 425 u8 ipproto;
55ff02b2 426 struct iphdr fiph;
1b69e7e6
SH
427
428 switch (skb->protocol) {
429 case htons(ETH_P_IP):
430 ipproto = IPPROTO_IPIP;
431 break;
432#if IS_ENABLED(CONFIG_MPLS)
433 case htons(ETH_P_MPLS_UC):
434 ipproto = IPPROTO_MPLS;
435 break;
436#endif
437 default:
438 goto tx_error;
439 }
1da177e4 440
1b69e7e6 441 if (tiph->protocol != ipproto && tiph->protocol != 0)
1da177e4 442 goto tx_error;
1da177e4 443
7e13318d 444 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
aed069df 445 goto tx_error;
8344bfc6 446
d57420a1 447 if (fan_has_map(&tunnel->fan)) {
55ff02b2
JV
448 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
449 goto tx_error;
450 tiph = &fiph;
451 } else {
452 tiph = &tunnel->parms.iph;
453 }
454
1b69e7e6 455 skb_set_inner_ipproto(skb, ipproto);
077c5a09 456
cfc7381b
AS
457 if (tunnel->collect_md)
458 ip_md_tunnel_xmit(skb, dev, ipproto);
459 else
460 ip_tunnel_xmit(skb, dev, tiph, ipproto);
6ed10654 461 return NETDEV_TX_OK;
1da177e4 462
1da177e4 463tx_error:
3acfa1e7 464 kfree_skb(skb);
aed069df 465
cb32f511 466 dev->stats.tx_errors++;
6ed10654 467 return NETDEV_TX_OK;
1da177e4
LT
468}
469
1b69e7e6
SH
470static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
471{
472 switch (ipproto) {
473 case 0:
474 case IPPROTO_IPIP:
475#if IS_ENABLED(CONFIG_MPLS)
476 case IPPROTO_MPLS:
477#endif
478 return true;
479 }
480
481 return false;
482}
483
1da177e4 484static int
fd58156e 485ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1da177e4
LT
486{
487 int err = 0;
488 struct ip_tunnel_parm p;
1da177e4 489
fd58156e
PS
490 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
491 return -EFAULT;
1da177e4 492
3b7b514f 493 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
1b69e7e6
SH
494 if (p.iph.version != 4 ||
495 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
3b7b514f
CW
496 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
497 return -EINVAL;
498 }
499
252a8fbe
ED
500 p.i_key = p.o_key = 0;
501 p.i_flags = p.o_flags = 0;
fd58156e
PS
502 err = ip_tunnel_ioctl(dev, &p, cmd);
503 if (err)
504 return err;
505
506 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
507 return -EFAULT;
508
1da177e4
LT
509 return 0;
510}
511
23a12b14 512static const struct net_device_ops ipip_netdev_ops = {
fd58156e
PS
513 .ndo_init = ipip_tunnel_init,
514 .ndo_uninit = ip_tunnel_uninit,
23a12b14
SH
515 .ndo_start_xmit = ipip_tunnel_xmit,
516 .ndo_do_ioctl = ipip_tunnel_ioctl,
fd58156e
PS
517 .ndo_change_mtu = ip_tunnel_change_mtu,
518 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 519 .ndo_get_iflink = ip_tunnel_get_iflink,
23a12b14
SH
520};
521
c3b89fbb
ED
522#define IPIP_FEATURES (NETIF_F_SG | \
523 NETIF_F_FRAGLIST | \
524 NETIF_F_HIGHDMA | \
cb32f511 525 NETIF_F_GSO_SOFTWARE | \
c3b89fbb
ED
526 NETIF_F_HW_CSUM)
527
1da177e4
LT
528static void ipip_tunnel_setup(struct net_device *dev)
529{
d57420a1
JV
530 struct ip_tunnel *t = netdev_priv(dev);
531
23a12b14 532 dev->netdev_ops = &ipip_netdev_ops;
1da177e4
LT
533
534 dev->type = ARPHRD_TUNNEL;
1da177e4 535 dev->flags = IFF_NOARP;
1da177e4 536 dev->addr_len = 4;
153f0943 537 dev->features |= NETIF_F_LLTX;
02875878 538 netif_keep_dst(dev);
c3b89fbb
ED
539
540 dev->features |= IPIP_FEATURES;
541 dev->hw_features |= IPIP_FEATURES;
fd58156e 542 ip_tunnel_setup(dev, ipip_net_id);
d57420a1 543 INIT_LIST_HEAD(&t->fan.fan_maps);
1da177e4
LT
544}
545
3c97af99 546static int ipip_tunnel_init(struct net_device *dev)
1da177e4 547{
23a12b14 548 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 549
1da177e4
LT
550 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
551 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
552
473ab820
TH
553 tunnel->tun_hlen = 0;
554 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
fd58156e 555 return ip_tunnel_init(dev);
1da177e4
LT
556}
557
a8b8a889
MS
558static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
559 struct netlink_ext_ack *extack)
1b69e7e6
SH
560{
561 u8 proto;
562
563 if (!data || !data[IFLA_IPTUN_PROTO])
564 return 0;
565
566 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
567 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
568 return -EINVAL;
569
570 return 0;
571}
572
be42da0e 573static void ipip_netlink_parms(struct nlattr *data[],
9830ad4c
CG
574 struct ip_tunnel_parm *parms, bool *collect_md,
575 __u32 *fwmark)
be42da0e
ND
576{
577 memset(parms, 0, sizeof(*parms));
578
579 parms->iph.version = 4;
580 parms->iph.protocol = IPPROTO_IPIP;
581 parms->iph.ihl = 5;
cfc7381b 582 *collect_md = false;
be42da0e
ND
583
584 if (!data)
585 return;
586
587 if (data[IFLA_IPTUN_LINK])
588 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
589
590 if (data[IFLA_IPTUN_LOCAL])
67b61f6c 591 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
be42da0e
ND
592
593 if (data[IFLA_IPTUN_REMOTE])
67b61f6c 594 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
be42da0e
ND
595
596 if (data[IFLA_IPTUN_TTL]) {
597 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
598 if (parms->iph.ttl)
599 parms->iph.frag_off = htons(IP_DF);
600 }
601
602 if (data[IFLA_IPTUN_TOS])
603 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
604
1b69e7e6
SH
605 if (data[IFLA_IPTUN_PROTO])
606 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
607
be42da0e
ND
608 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
609 parms->iph.frag_off = htons(IP_DF);
cfc7381b
AS
610
611 if (data[IFLA_IPTUN_COLLECT_METADATA])
612 *collect_md = true;
9830ad4c
CG
613
614 if (data[IFLA_IPTUN_FWMARK])
615 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
be42da0e
ND
616}
617
473ab820
TH
618/* This function returns true when ENCAP attributes are present in the nl msg */
619static bool ipip_netlink_encap_parms(struct nlattr *data[],
620 struct ip_tunnel_encap *ipencap)
621{
622 bool ret = false;
623
624 memset(ipencap, 0, sizeof(*ipencap));
625
626 if (!data)
627 return ret;
628
629 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
630 ret = true;
631 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
632 }
633
634 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
635 ret = true;
636 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
637 }
638
639 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
640 ret = true;
3e97fa70 641 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
473ab820
TH
642 }
643
644 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
645 ret = true;
3e97fa70 646 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
473ab820
TH
647 }
648
649 return ret;
650}
651
d57420a1 652static void ipip_fan_flush_map(struct ip_tunnel *t)
55ff02b2 653{
d57420a1
JV
654 struct ip_fan_map *fan_map;
655
656 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
657 list_del_rcu(&fan_map->list);
658 kfree_rcu(fan_map, rcu);
659 }
55ff02b2
JV
660}
661
d57420a1 662static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
55ff02b2 663{
d57420a1 664 struct ip_fan_map *fan_map;
55ff02b2 665
d57420a1
JV
666 fan_map = ipip_fan_find_map(t, overlay);
667 if (!fan_map)
668 return -ENOENT;
669
670 list_del_rcu(&fan_map->list);
671 kfree_rcu(fan_map, rcu);
55ff02b2 672
d57420a1
JV
673 return 0;
674}
55ff02b2 675
d57420a1
JV
676static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
677{
678 __be32 overlay_mask, underlay_mask;
679 struct ip_fan_map *fan_map;
55ff02b2 680
d57420a1
JV
681 overlay_mask = inet_make_mask(map->overlay_prefix);
682 underlay_mask = inet_make_mask(map->underlay_prefix);
55ff02b2 683
d57420a1 684 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
55ff02b2
JV
685 return -EINVAL;
686
d57420a1
JV
687 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
688 return -EINVAL;
55ff02b2 689
d57420a1
JV
690 /* Special case: overlay 0 and underlay 0: flush all mappings */
691 if (!map->overlay && !map->underlay) {
692 ipip_fan_flush_map(t);
55ff02b2
JV
693 return 0;
694 }
d57420a1
JV
695
696 /* Special case: overlay set and underlay 0: clear map for overlay */
697 if (!map->underlay)
698 return ipip_fan_del_map(t, map->overlay);
699
700 if (ipip_fan_find_map(t, map->overlay))
701 return -EEXIST;
702
703 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
704 fan_map->underlay = map->underlay;
705 fan_map->overlay = map->overlay;
706 fan_map->underlay_prefix = map->underlay_prefix;
707 fan_map->overlay_mask = ntohl(overlay_mask);
708 fan_map->overlay_prefix = map->overlay_prefix;
55ff02b2 709
d57420a1 710 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
55ff02b2
JV
711
712 return 0;
713}
714
715
716static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
717 struct ip_tunnel_parm *parms)
718{
d57420a1 719 struct ifla_fan_map *map;
55ff02b2
JV
720 struct nlattr *attr;
721 int rem, rv;
722
723 if (!data[IFLA_IPTUN_FAN_MAP])
724 return 0;
725
726 if (parms->iph.daddr)
727 return -EINVAL;
728
729 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
730 map = nla_data(attr);
d57420a1 731 rv = ipip_fan_add_map(t, map);
55ff02b2
JV
732 if (rv)
733 return rv;
734 }
735
736 return 0;
737}
738
be42da0e 739static int ipip_newlink(struct net *src_net, struct net_device *dev,
7a3f4a18
MS
740 struct nlattr *tb[], struct nlattr *data[],
741 struct netlink_ext_ack *extack)
be42da0e 742{
cfc7381b 743 struct ip_tunnel *t = netdev_priv(dev);
fd58156e 744 struct ip_tunnel_parm p;
473ab820 745 struct ip_tunnel_encap ipencap;
9830ad4c 746 __u32 fwmark = 0;
55ff02b2 747 int err;
473ab820
TH
748
749 if (ipip_netlink_encap_parms(data, &ipencap)) {
55ff02b2 750 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
751
752 if (err < 0)
753 return err;
754 }
be42da0e 755
9830ad4c 756 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
55ff02b2
JV
757 err = ipip_netlink_fan(data, t, &p);
758 if (err < 0)
759 return err;
9830ad4c 760 return ip_tunnel_newlink(dev, tb, &p, fwmark);
be42da0e
ND
761}
762
763static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
ad744b22
MS
764 struct nlattr *data[],
765 struct netlink_ext_ack *extack)
be42da0e 766{
9830ad4c 767 struct ip_tunnel *t = netdev_priv(dev);
be42da0e 768 struct ip_tunnel_parm p;
473ab820 769 struct ip_tunnel_encap ipencap;
cfc7381b 770 bool collect_md;
9830ad4c 771 __u32 fwmark = t->fwmark;
55ff02b2 772 int err;
473ab820
TH
773
774 if (ipip_netlink_encap_parms(data, &ipencap)) {
55ff02b2 775 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
776
777 if (err < 0)
778 return err;
779 }
be42da0e 780
9830ad4c 781 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
cfc7381b
AS
782 if (collect_md)
783 return -EINVAL;
55ff02b2
JV
784 err = ipip_netlink_fan(data, t, &p);
785 if (err < 0)
786 return err;
be42da0e
ND
787
788 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
789 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
790 return -EINVAL;
791
9830ad4c 792 return ip_tunnel_changelink(dev, tb, &p, fwmark);
be42da0e
ND
793}
794
0974658d
ND
795static size_t ipip_get_size(const struct net_device *dev)
796{
797 return
798 /* IFLA_IPTUN_LINK */
799 nla_total_size(4) +
800 /* IFLA_IPTUN_LOCAL */
801 nla_total_size(4) +
802 /* IFLA_IPTUN_REMOTE */
803 nla_total_size(4) +
804 /* IFLA_IPTUN_TTL */
805 nla_total_size(1) +
806 /* IFLA_IPTUN_TOS */
807 nla_total_size(1) +
1b69e7e6
SH
808 /* IFLA_IPTUN_PROTO */
809 nla_total_size(1) +
befe2aa1
ND
810 /* IFLA_IPTUN_PMTUDISC */
811 nla_total_size(1) +
473ab820
TH
812 /* IFLA_IPTUN_ENCAP_TYPE */
813 nla_total_size(2) +
814 /* IFLA_IPTUN_ENCAP_FLAGS */
815 nla_total_size(2) +
816 /* IFLA_IPTUN_ENCAP_SPORT */
817 nla_total_size(2) +
818 /* IFLA_IPTUN_ENCAP_DPORT */
819 nla_total_size(2) +
cfc7381b
AS
820 /* IFLA_IPTUN_COLLECT_METADATA */
821 nla_total_size(0) +
9830ad4c
CG
822 /* IFLA_IPTUN_FWMARK */
823 nla_total_size(4) +
55ff02b2 824 /* IFLA_IPTUN_FAN_MAP */
d57420a1 825 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
0974658d
ND
826 0;
827}
828
829static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
830{
831 struct ip_tunnel *tunnel = netdev_priv(dev);
832 struct ip_tunnel_parm *parm = &tunnel->parms;
833
834 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
930345ea
JB
835 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
836 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
0974658d 837 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
befe2aa1 838 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1b69e7e6 839 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
befe2aa1 840 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
9830ad4c
CG
841 !!(parm->iph.frag_off & htons(IP_DF))) ||
842 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
0974658d 843 goto nla_put_failure;
473ab820
TH
844
845 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
846 tunnel->encap.type) ||
3e97fa70
SD
847 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
848 tunnel->encap.sport) ||
849 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
850 tunnel->encap.dport) ||
473ab820 851 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
e1b2cb65 852 tunnel->encap.flags))
473ab820
TH
853 goto nla_put_failure;
854
cfc7381b
AS
855 if (tunnel->collect_md)
856 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
857 goto nla_put_failure;
d57420a1 858 if (fan_has_map(&tunnel->fan)) {
55ff02b2 859 struct nlattr *fan_nest;
d57420a1 860 struct ip_fan_map *fan_map;
55ff02b2
JV
861
862 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
863 if (!fan_nest)
864 goto nla_put_failure;
d57420a1
JV
865 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
866 struct ifla_fan_map map;
867
868 map.underlay = fan_map->underlay;
869 map.underlay_prefix = fan_map->underlay_prefix;
870 map.overlay = fan_map->overlay;
871 map.overlay_prefix = fan_map->overlay_prefix;
872 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
873 goto nla_put_failure;
55ff02b2
JV
874 }
875 nla_nest_end(skb, fan_nest);
876 }
877
0974658d
ND
878 return 0;
879
880nla_put_failure:
881 return -EMSGSIZE;
882}
883
be42da0e
ND
884static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
885 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
886 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
887 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
888 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
889 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1b69e7e6 890 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
be42da0e 891 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
473ab820
TH
892 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
893 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
894 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
895 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
cfc7381b 896 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
9830ad4c 897 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
55ff02b2
JV
898
899 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
900 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
be42da0e
ND
901};
902
0974658d
ND
903static struct rtnl_link_ops ipip_link_ops __read_mostly = {
904 .kind = "ipip",
905 .maxtype = IFLA_IPTUN_MAX,
be42da0e 906 .policy = ipip_policy,
0974658d 907 .priv_size = sizeof(struct ip_tunnel),
be42da0e 908 .setup = ipip_tunnel_setup,
1b69e7e6 909 .validate = ipip_tunnel_validate,
be42da0e
ND
910 .newlink = ipip_newlink,
911 .changelink = ipip_changelink,
fd58156e 912 .dellink = ip_tunnel_dellink,
0974658d
ND
913 .get_size = ipip_get_size,
914 .fill_info = ipip_fill_info,
1728d4fa 915 .get_link_net = ip_tunnel_get_link_net,
0974658d
ND
916};
917
6dcd814b 918static struct xfrm_tunnel ipip_handler __read_mostly = {
1da177e4
LT
919 .handler = ipip_rcv,
920 .err_handler = ipip_err,
d2acc347 921 .priority = 1,
1da177e4
LT
922};
923
1b69e7e6
SH
924#if IS_ENABLED(CONFIG_MPLS)
925static struct xfrm_tunnel mplsip_handler __read_mostly = {
926 .handler = mplsip_rcv,
927 .err_handler = ipip_err,
928 .priority = 1,
929};
930#endif
931
2c8c1e72 932static int __net_init ipip_init_net(struct net *net)
10dc4c7b 933{
fd58156e 934 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
10dc4c7b
PE
935}
936
64bc1781 937static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
10dc4c7b 938{
64bc1781 939 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
10dc4c7b
PE
940}
941
942static struct pernet_operations ipip_net_ops = {
943 .init = ipip_init_net,
64bc1781 944 .exit_batch = ipip_exit_batch_net,
86de8a63 945 .id = &ipip_net_id,
fd58156e 946 .size = sizeof(struct ip_tunnel_net),
10dc4c7b
PE
947};
948
55ff02b2
JV
949#ifdef CONFIG_SYSCTL
950static struct ctl_table_header *ipip_fan_header;
951static unsigned int ipip_fan_version = 3;
952
953static struct ctl_table ipip_fan_sysctls[] = {
954 {
955 .procname = "version",
956 .data = &ipip_fan_version,
957 .maxlen = sizeof(ipip_fan_version),
958 .mode = 0444,
959 .proc_handler = proc_dointvec,
960 },
961 {},
962};
963
964#endif /* CONFIG_SYSCTL */
965
1da177e4
LT
966static int __init ipip_init(void)
967{
968 int err;
969
1b69e7e6 970 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
1da177e4 971
d5aa407f
AD
972 err = register_pernet_device(&ipip_net_ops);
973 if (err < 0)
974 return err;
975 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
976 if (err < 0) {
058bd4d2 977 pr_info("%s: can't register tunnel\n", __func__);
1b69e7e6
SH
978 goto xfrm_tunnel_ipip_failed;
979 }
980#if IS_ENABLED(CONFIG_MPLS)
981 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
982 if (err < 0) {
983 pr_info("%s: can't register tunnel\n", __func__);
984 goto xfrm_tunnel_mplsip_failed;
1da177e4 985 }
1b69e7e6 986#endif
0974658d
ND
987 err = rtnl_link_register(&ipip_link_ops);
988 if (err < 0)
989 goto rtnl_link_failed;
990
55ff02b2
JV
991#ifdef CONFIG_SYSCTL
992 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
993 ipip_fan_sysctls);
994 if (!ipip_fan_header) {
995 err = -ENOMEM;
996 goto sysctl_failed;
997 }
998#endif /* CONFIG_SYSCTL */
999
0974658d 1000out:
1da177e4 1001 return err;
0974658d 1002
55ff02b2
JV
1003#ifdef CONFIG_SYSCTL
1004sysctl_failed:
1005 rtnl_link_unregister(&ipip_link_ops);
1006#endif /* CONFIG_SYSCTL */
0974658d 1007rtnl_link_failed:
1b69e7e6
SH
1008#if IS_ENABLED(CONFIG_MPLS)
1009 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
1010xfrm_tunnel_mplsip_failed:
1011
1012#endif
0974658d 1013 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1b69e7e6 1014xfrm_tunnel_ipip_failed:
0974658d
ND
1015 unregister_pernet_device(&ipip_net_ops);
1016 goto out;
1da177e4
LT
1017}
1018
1019static void __exit ipip_fini(void)
1020{
55ff02b2
JV
1021#ifdef CONFIG_SYSCTL
1022 unregister_net_sysctl_table(ipip_fan_header);
1023#endif /* CONFIG_SYSCTL */
0974658d 1024 rtnl_link_unregister(&ipip_link_ops);
c0d56408 1025 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
058bd4d2 1026 pr_info("%s: can't deregister tunnel\n", __func__);
1b69e7e6
SH
1027#if IS_ENABLED(CONFIG_MPLS)
1028 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1029 pr_info("%s: can't deregister tunnel\n", __func__);
1030#endif
86de8a63 1031 unregister_pernet_device(&ipip_net_ops);
1da177e4
LT
1032}
1033
1034module_init(ipip_init);
1035module_exit(ipip_fini);
1036MODULE_LICENSE("GPL");
f98f89a0 1037MODULE_ALIAS_RTNL_LINK("ipip");
8909c9ad 1038MODULE_ALIAS_NETDEV("tunl0");