]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - net/ipv4/ipip.c
UBUNTU: Ubuntu-4.15.0-96.97
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ipip.c
CommitLineData
1da177e4 1/*
e905a9ed 2 * Linux NET3: IP/IP protocol decoder.
1da177e4 3 *
1da177e4
LT
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27/* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
e905a9ed 36
1da177e4
LT
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
e905a9ed 43
113aa838 44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
1da177e4
LT
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
e905a9ed 53
1da177e4
LT
54*/
55
56/* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
e905a9ed 76 find out how much more space you can allocate by calling
1da177e4
LT
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87/*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
e905a9ed 93
4fc268d2 94#include <linux/capability.h>
1da177e4
LT
95#include <linux/module.h>
96#include <linux/types.h>
1da177e4 97#include <linux/kernel.h>
5a0e3ad6 98#include <linux/slab.h>
7c0f6ba6 99#include <linux/uaccess.h>
1da177e4
LT
100#include <linux/skbuff.h>
101#include <linux/netdevice.h>
102#include <linux/in.h>
103#include <linux/tcp.h>
104#include <linux/udp.h>
105#include <linux/if_arp.h>
1da177e4
LT
106#include <linux/init.h>
107#include <linux/netfilter_ipv4.h>
46f25dff 108#include <linux/if_ether.h>
55ff02b2 109#include <linux/inetdevice.h>
d57420a1 110#include <linux/rculist.h>
1da177e4
LT
111
112#include <net/sock.h>
113#include <net/ip.h>
114#include <net/icmp.h>
c5441932 115#include <net/ip_tunnels.h>
1da177e4
LT
116#include <net/inet_ecn.h>
117#include <net/xfrm.h>
10dc4c7b
PE
118#include <net/net_namespace.h>
119#include <net/netns/generic.h>
cfc7381b 120#include <net/dst_metadata.h>
1da177e4 121
eccc1bb8 122static bool log_ecn_error = true;
123module_param(log_ecn_error, bool, 0644);
124MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
c7d03a00 126static unsigned int ipip_net_id __read_mostly;
10dc4c7b 127
3c97af99 128static int ipip_tunnel_init(struct net_device *dev);
0974658d 129static struct rtnl_link_ops ipip_link_ops __read_mostly;
1da177e4 130
d2acc347 131static int ipip_err(struct sk_buff *skb, u32 info)
1da177e4 132{
f3594f0a
XL
133 /* All the routers (except for Linux) return only
134 * 8 bytes of packet payload. It means, that precise relaying of
135 * ICMP in the real Internet is absolutely infeasible.
136 */
fd58156e
PS
137 struct net *net = dev_net(skb->dev);
138 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
b71d1d42 139 const struct iphdr *iph = (const struct iphdr *)skb->data;
fd58156e
PS
140 const int type = icmp_hdr(skb)->type;
141 const int code = icmp_hdr(skb)->code;
f3594f0a
XL
142 struct ip_tunnel *t;
143 int err = 0;
144
145 switch (type) {
146 case ICMP_DEST_UNREACH:
147 switch (code) {
148 case ICMP_SR_FAILED:
149 /* Impossible event. */
150 goto out;
151 default:
152 /* All others are translated to HOST_UNREACH.
153 * rfc2003 contains "deep thoughts" about NET_UNREACH,
154 * I believe they are just ether pollution. --ANK
155 */
156 break;
157 }
158 break;
159
160 case ICMP_TIME_EXCEEDED:
161 if (code != ICMP_EXC_TTL)
162 goto out;
163 break;
164
165 case ICMP_REDIRECT:
166 break;
167
168 default:
169 goto out;
170 }
1da177e4 171
fd58156e
PS
172 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
173 iph->daddr, iph->saddr, 0);
f3594f0a
XL
174 if (!t) {
175 err = -ENOENT;
36393395 176 goto out;
f3594f0a 177 }
36393395
DM
178
179 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
f3594f0a
XL
180 ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
181 iph->protocol, 0);
36393395
DM
182 goto out;
183 }
184
55be7a9c 185 if (type == ICMP_REDIRECT) {
f3594f0a 186 ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
55be7a9c
DM
187 goto out;
188 }
189
f3594f0a
XL
190 if (t->parms.iph.daddr == 0) {
191 err = -ENOENT;
1da177e4 192 goto out;
f3594f0a 193 }
d2acc347 194
1da177e4
LT
195 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
196 goto out;
197
26d94b46 198 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
1da177e4
LT
199 t->err_count++;
200 else
201 t->err_count = 1;
202 t->err_time = jiffies;
b0558ef2 203
fd58156e 204out:
d2acc347 205 return err;
1da177e4
LT
206}
207
1b69e7e6 208static const struct tnl_ptk_info ipip_tpi = {
fd58156e
PS
209 /* no tunnel info required for ipip. */
210 .proto = htons(ETH_P_IP),
211};
212
1b69e7e6
SH
213#if IS_ENABLED(CONFIG_MPLS)
214static const struct tnl_ptk_info mplsip_tpi = {
215 /* no tunnel info required for mplsip. */
216 .proto = htons(ETH_P_MPLS_UC),
217};
218#endif
219
220static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
1da177e4 221{
fd58156e
PS
222 struct net *net = dev_net(skb->dev);
223 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
cfc7381b 224 struct metadata_dst *tun_dst = NULL;
1da177e4 225 struct ip_tunnel *tunnel;
3d7b46cd 226 const struct iphdr *iph;
3c97af99 227
3d7b46cd 228 iph = ip_hdr(skb);
fd58156e
PS
229 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
230 iph->saddr, iph->daddr, 0);
231 if (tunnel) {
1b69e7e6
SH
232 const struct tnl_ptk_info *tpi;
233
234 if (tunnel->parms.iph.protocol != ipproto &&
235 tunnel->parms.iph.protocol != 0)
236 goto drop;
237
eccc1bb8 238 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
239 goto drop;
1b69e7e6
SH
240#if IS_ENABLED(CONFIG_MPLS)
241 if (ipproto == IPPROTO_MPLS)
242 tpi = &mplsip_tpi;
243 else
244#endif
245 tpi = &ipip_tpi;
246 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
737e828b 247 goto drop;
cfc7381b
AS
248 if (tunnel->collect_md) {
249 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
250 if (!tun_dst)
251 return 0;
252 }
253 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
1da177e4 254 }
1da177e4 255
1da177e4 256 return -1;
eccc1bb8 257
258drop:
259 kfree_skb(skb);
260 return 0;
1da177e4
LT
261}
262
1b69e7e6
SH
263static int ipip_rcv(struct sk_buff *skb)
264{
265 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
266}
267
268#if IS_ENABLED(CONFIG_MPLS)
269static int mplsip_rcv(struct sk_buff *skb)
270{
271 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
272}
273#endif
274
d57420a1 275static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
55ff02b2 276{
d57420a1
JV
277 struct ip_fan_map *fan_map;
278
279 rcu_read_lock();
280 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
281 if (fan_map->overlay ==
282 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
283 rcu_read_unlock();
284 return fan_map;
285 }
286 }
287 rcu_read_unlock();
288
289 return NULL;
55ff02b2
JV
290}
291
d57420a1
JV
292/* Determine fan tunnel endpoint to send packet to, based on the inner IP
293 * address.
294 *
295 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
296 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
297 * two octets of the underlay network (the network portion of a /16), "A"
298 * and "B" are the low order two octets of the underlay network host (the
299 * host portion of a /16), and "Y" is a configured first octet of the
300 * overlay network.
301 *
302 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
303 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
304 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
305 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
306 * described in detail further below.
307 *
308 * Using netmasks for the overlay and underlay other than /8 and /16, as
309 * shown above, can yield larger (or smaller) overlay subnets, with the
310 * trade-off of allowing fewer (or more) underlay hosts to participate.
311 *
312 * The size of each overlay network subnet is defined by the total of the
313 * network mask of the overlay plus the size of host portion of the
314 * underlay network. In the above example, /8 + /16 = /24.
315 *
316 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
317 * this case, the network portion of the underlay is 10.99.224.0/20, and
318 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
319 * network subnet, the 12 bits of host portion are left shifted 12 bits
320 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
321 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
322 * 12 bits underlay. This yields 12 bits in the overlay network portion,
323 * allowing for 4094 addresses in each overlay network subnet. The
324 * trade-off is that fewer hosts may participate in the underlay network,
325 * as its host address size has shrunk from 16 bits (65534 addresses) in
326 * the first example to 12 bits (4094 addresses) here.
327 *
328 * For fewer hosts per overlay subnet (permitting a larger number of
329 * underlay hosts to participate), the underlay netmask may be made
330 * smaller.
331 *
332 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
333 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
334 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
335 * the lowest order bit of the /8 overlay). This yields an overlay subnet
336 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
337 * the underlay). This provides more addresses for the underlay network
338 * (approximately 2^20), but each host's segment of the overlay provides
339 * only 4 bits of addresses (14 usable).
340 *
341 * It is also possible to adjust the overlay subnet.
342 *
343 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
344 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
345 * shifted 15 bits (/20 - /5), yielding an overlay network of
346 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
347 * overlay network of 242.107.128.0/17.
348 *
349 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
350 * underlay host 10.224.220.10, the underlay host portion (.10) is left
351 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
352 * This would permit 254 addresses on the underlay, with each overlay
353 * segment providing approximately 2^14 - 2 addresses (16382).
354 *
355 * For packets being encapsulated, the overlay network destination IP
356 * address is deconstructed into its overlay and underlay-derived
357 * portions. The underlay portion (determined by the overlay mask and
358 * overlay subnet mask) is right shifted according to the size of the
359 * underlay network mask. This value is then ORed with the network
360 * portion of the underlay network to produce the underlay network
361 * destination for the encapsulated datagram.
362 *
363 * For example, using the initial example of underlay 10.88.3.4/16 and
364 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
365 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
366 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
367 * of the address extracted. This is a number of bits equal to underlay
368 * network host portion. In the destination address, the highest order of
369 * these bits is one bit lower than the lowest order bit from the overlay
370 * network mask.
371 *
372 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
373 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
374 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
375 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
376 * which is 1 bit lower than the lowest order overlay address bit).
55ff02b2 377 *
d57420a1
JV
378 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
379 * This value is then ORed with the underlay network portion,
380 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
381 * the encapuslated datagram.
382 *
383 * Another transform using the final example: overlay 100.64.0.0/10 and
384 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
385 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
386 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
387 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
388 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
389 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
390 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
391 * the underlay destination of 10.224.220.11 for overlay destination
392 * 100.66.200.5.
55ff02b2
JV
393 */
394static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
395{
d57420a1 396 struct ip_fan_map *f_map;
55ff02b2
JV
397 u32 daddr, underlay;
398
d57420a1
JV
399 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
400 if (!f_map)
401 return -ENOENT;
402
55ff02b2 403 daddr = ntohl(ip_hdr(skb)->daddr);
d57420a1 404 underlay = ntohl(f_map->underlay);
55ff02b2
JV
405 if (!underlay)
406 return -EINVAL;
407
408 *iph = tunnel->parms.iph;
d57420a1
JV
409 iph->daddr = htonl(underlay |
410 ((daddr & ~f_map->overlay_mask) >>
411 (32 - f_map->overlay_prefix -
412 (32 - f_map->underlay_prefix))));
55ff02b2
JV
413 return 0;
414}
415
1da177e4
LT
416/*
417 * This function assumes it is being called from dev_queue_xmit()
418 * and that skb is filled properly by that function.
419 */
1b69e7e6
SH
420static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
421 struct net_device *dev)
1da177e4 422{
2941a486 423 struct ip_tunnel *tunnel = netdev_priv(dev);
b71d1d42 424 const struct iphdr *tiph = &tunnel->parms.iph;
1b69e7e6 425 u8 ipproto;
55ff02b2 426 struct iphdr fiph;
1b69e7e6 427
2981cedf
HY
428 if (!pskb_inet_may_pull(skb))
429 goto tx_error;
430
1b69e7e6
SH
431 switch (skb->protocol) {
432 case htons(ETH_P_IP):
433 ipproto = IPPROTO_IPIP;
434 break;
435#if IS_ENABLED(CONFIG_MPLS)
436 case htons(ETH_P_MPLS_UC):
437 ipproto = IPPROTO_MPLS;
438 break;
439#endif
440 default:
441 goto tx_error;
442 }
1da177e4 443
1b69e7e6 444 if (tiph->protocol != ipproto && tiph->protocol != 0)
1da177e4 445 goto tx_error;
1da177e4 446
7e13318d 447 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
aed069df 448 goto tx_error;
8344bfc6 449
d57420a1 450 if (fan_has_map(&tunnel->fan)) {
55ff02b2
JV
451 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
452 goto tx_error;
453 tiph = &fiph;
454 } else {
455 tiph = &tunnel->parms.iph;
456 }
457
1b69e7e6 458 skb_set_inner_ipproto(skb, ipproto);
077c5a09 459
cfc7381b
AS
460 if (tunnel->collect_md)
461 ip_md_tunnel_xmit(skb, dev, ipproto);
462 else
463 ip_tunnel_xmit(skb, dev, tiph, ipproto);
6ed10654 464 return NETDEV_TX_OK;
1da177e4 465
1da177e4 466tx_error:
3acfa1e7 467 kfree_skb(skb);
aed069df 468
cb32f511 469 dev->stats.tx_errors++;
6ed10654 470 return NETDEV_TX_OK;
1da177e4
LT
471}
472
1b69e7e6
SH
473static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
474{
475 switch (ipproto) {
476 case 0:
477 case IPPROTO_IPIP:
478#if IS_ENABLED(CONFIG_MPLS)
479 case IPPROTO_MPLS:
480#endif
481 return true;
482 }
483
484 return false;
485}
486
1da177e4 487static int
fd58156e 488ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
1da177e4
LT
489{
490 int err = 0;
491 struct ip_tunnel_parm p;
1da177e4 492
fd58156e
PS
493 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
494 return -EFAULT;
1da177e4 495
3b7b514f 496 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
1b69e7e6
SH
497 if (p.iph.version != 4 ||
498 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
3b7b514f
CW
499 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
500 return -EINVAL;
501 }
502
252a8fbe
ED
503 p.i_key = p.o_key = 0;
504 p.i_flags = p.o_flags = 0;
fd58156e
PS
505 err = ip_tunnel_ioctl(dev, &p, cmd);
506 if (err)
507 return err;
508
509 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
510 return -EFAULT;
511
1da177e4
LT
512 return 0;
513}
514
23a12b14 515static const struct net_device_ops ipip_netdev_ops = {
fd58156e
PS
516 .ndo_init = ipip_tunnel_init,
517 .ndo_uninit = ip_tunnel_uninit,
23a12b14
SH
518 .ndo_start_xmit = ipip_tunnel_xmit,
519 .ndo_do_ioctl = ipip_tunnel_ioctl,
fd58156e
PS
520 .ndo_change_mtu = ip_tunnel_change_mtu,
521 .ndo_get_stats64 = ip_tunnel_get_stats64,
1e99584b 522 .ndo_get_iflink = ip_tunnel_get_iflink,
23a12b14
SH
523};
524
c3b89fbb
ED
525#define IPIP_FEATURES (NETIF_F_SG | \
526 NETIF_F_FRAGLIST | \
527 NETIF_F_HIGHDMA | \
cb32f511 528 NETIF_F_GSO_SOFTWARE | \
c3b89fbb
ED
529 NETIF_F_HW_CSUM)
530
1da177e4
LT
531static void ipip_tunnel_setup(struct net_device *dev)
532{
d57420a1
JV
533 struct ip_tunnel *t = netdev_priv(dev);
534
23a12b14 535 dev->netdev_ops = &ipip_netdev_ops;
1da177e4
LT
536
537 dev->type = ARPHRD_TUNNEL;
1da177e4 538 dev->flags = IFF_NOARP;
1da177e4 539 dev->addr_len = 4;
153f0943 540 dev->features |= NETIF_F_LLTX;
02875878 541 netif_keep_dst(dev);
c3b89fbb
ED
542
543 dev->features |= IPIP_FEATURES;
544 dev->hw_features |= IPIP_FEATURES;
fd58156e 545 ip_tunnel_setup(dev, ipip_net_id);
d57420a1 546 INIT_LIST_HEAD(&t->fan.fan_maps);
1da177e4
LT
547}
548
3c97af99 549static int ipip_tunnel_init(struct net_device *dev)
1da177e4 550{
23a12b14 551 struct ip_tunnel *tunnel = netdev_priv(dev);
1da177e4 552
1da177e4
LT
553 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
554 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
555
473ab820
TH
556 tunnel->tun_hlen = 0;
557 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
fd58156e 558 return ip_tunnel_init(dev);
1da177e4
LT
559}
560
a8b8a889
MS
561static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
562 struct netlink_ext_ack *extack)
1b69e7e6
SH
563{
564 u8 proto;
565
566 if (!data || !data[IFLA_IPTUN_PROTO])
567 return 0;
568
569 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
570 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
571 return -EINVAL;
572
573 return 0;
574}
575
be42da0e 576static void ipip_netlink_parms(struct nlattr *data[],
9830ad4c
CG
577 struct ip_tunnel_parm *parms, bool *collect_md,
578 __u32 *fwmark)
be42da0e
ND
579{
580 memset(parms, 0, sizeof(*parms));
581
582 parms->iph.version = 4;
583 parms->iph.protocol = IPPROTO_IPIP;
584 parms->iph.ihl = 5;
cfc7381b 585 *collect_md = false;
be42da0e
ND
586
587 if (!data)
588 return;
589
590 if (data[IFLA_IPTUN_LINK])
591 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
592
593 if (data[IFLA_IPTUN_LOCAL])
67b61f6c 594 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
be42da0e
ND
595
596 if (data[IFLA_IPTUN_REMOTE])
67b61f6c 597 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
be42da0e
ND
598
599 if (data[IFLA_IPTUN_TTL]) {
600 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
601 if (parms->iph.ttl)
602 parms->iph.frag_off = htons(IP_DF);
603 }
604
605 if (data[IFLA_IPTUN_TOS])
606 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
607
1b69e7e6
SH
608 if (data[IFLA_IPTUN_PROTO])
609 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
610
be42da0e
ND
611 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
612 parms->iph.frag_off = htons(IP_DF);
cfc7381b
AS
613
614 if (data[IFLA_IPTUN_COLLECT_METADATA])
615 *collect_md = true;
9830ad4c
CG
616
617 if (data[IFLA_IPTUN_FWMARK])
618 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
be42da0e
ND
619}
620
473ab820
TH
621/* This function returns true when ENCAP attributes are present in the nl msg */
622static bool ipip_netlink_encap_parms(struct nlattr *data[],
623 struct ip_tunnel_encap *ipencap)
624{
625 bool ret = false;
626
627 memset(ipencap, 0, sizeof(*ipencap));
628
629 if (!data)
630 return ret;
631
632 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
633 ret = true;
634 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
635 }
636
637 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
638 ret = true;
639 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
640 }
641
642 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
643 ret = true;
3e97fa70 644 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
473ab820
TH
645 }
646
647 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
648 ret = true;
3e97fa70 649 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
473ab820
TH
650 }
651
652 return ret;
653}
654
d57420a1 655static void ipip_fan_flush_map(struct ip_tunnel *t)
55ff02b2 656{
d57420a1
JV
657 struct ip_fan_map *fan_map;
658
659 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
660 list_del_rcu(&fan_map->list);
661 kfree_rcu(fan_map, rcu);
662 }
55ff02b2
JV
663}
664
d57420a1 665static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
55ff02b2 666{
d57420a1 667 struct ip_fan_map *fan_map;
55ff02b2 668
d57420a1
JV
669 fan_map = ipip_fan_find_map(t, overlay);
670 if (!fan_map)
671 return -ENOENT;
672
673 list_del_rcu(&fan_map->list);
674 kfree_rcu(fan_map, rcu);
55ff02b2 675
d57420a1
JV
676 return 0;
677}
55ff02b2 678
d57420a1
JV
679static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
680{
681 __be32 overlay_mask, underlay_mask;
682 struct ip_fan_map *fan_map;
55ff02b2 683
d57420a1
JV
684 overlay_mask = inet_make_mask(map->overlay_prefix);
685 underlay_mask = inet_make_mask(map->underlay_prefix);
55ff02b2 686
d57420a1 687 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
55ff02b2
JV
688 return -EINVAL;
689
d57420a1
JV
690 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
691 return -EINVAL;
55ff02b2 692
d57420a1
JV
693 /* Special case: overlay 0 and underlay 0: flush all mappings */
694 if (!map->overlay && !map->underlay) {
695 ipip_fan_flush_map(t);
55ff02b2
JV
696 return 0;
697 }
d57420a1
JV
698
699 /* Special case: overlay set and underlay 0: clear map for overlay */
700 if (!map->underlay)
701 return ipip_fan_del_map(t, map->overlay);
702
703 if (ipip_fan_find_map(t, map->overlay))
704 return -EEXIST;
705
706 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
707 fan_map->underlay = map->underlay;
708 fan_map->overlay = map->overlay;
709 fan_map->underlay_prefix = map->underlay_prefix;
710 fan_map->overlay_mask = ntohl(overlay_mask);
711 fan_map->overlay_prefix = map->overlay_prefix;
55ff02b2 712
d57420a1 713 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
55ff02b2
JV
714
715 return 0;
716}
717
718
719static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
720 struct ip_tunnel_parm *parms)
721{
d57420a1 722 struct ifla_fan_map *map;
55ff02b2
JV
723 struct nlattr *attr;
724 int rem, rv;
725
a402714c 726 if (data == NULL || !data[IFLA_IPTUN_FAN_MAP])
55ff02b2
JV
727 return 0;
728
729 if (parms->iph.daddr)
730 return -EINVAL;
731
732 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
733 map = nla_data(attr);
d57420a1 734 rv = ipip_fan_add_map(t, map);
55ff02b2
JV
735 if (rv)
736 return rv;
737 }
738
739 return 0;
740}
741
be42da0e 742static int ipip_newlink(struct net *src_net, struct net_device *dev,
7a3f4a18
MS
743 struct nlattr *tb[], struct nlattr *data[],
744 struct netlink_ext_ack *extack)
be42da0e 745{
cfc7381b 746 struct ip_tunnel *t = netdev_priv(dev);
fd58156e 747 struct ip_tunnel_parm p;
473ab820 748 struct ip_tunnel_encap ipencap;
9830ad4c 749 __u32 fwmark = 0;
55ff02b2 750 int err;
473ab820
TH
751
752 if (ipip_netlink_encap_parms(data, &ipencap)) {
55ff02b2 753 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
754
755 if (err < 0)
756 return err;
757 }
be42da0e 758
9830ad4c 759 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
55ff02b2
JV
760 err = ipip_netlink_fan(data, t, &p);
761 if (err < 0)
762 return err;
9830ad4c 763 return ip_tunnel_newlink(dev, tb, &p, fwmark);
be42da0e
ND
764}
765
766static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
ad744b22
MS
767 struct nlattr *data[],
768 struct netlink_ext_ack *extack)
be42da0e 769{
9830ad4c 770 struct ip_tunnel *t = netdev_priv(dev);
be42da0e 771 struct ip_tunnel_parm p;
473ab820 772 struct ip_tunnel_encap ipencap;
cfc7381b 773 bool collect_md;
9830ad4c 774 __u32 fwmark = t->fwmark;
55ff02b2 775 int err;
473ab820
TH
776
777 if (ipip_netlink_encap_parms(data, &ipencap)) {
55ff02b2 778 err = ip_tunnel_encap_setup(t, &ipencap);
473ab820
TH
779
780 if (err < 0)
781 return err;
782 }
be42da0e 783
9830ad4c 784 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
cfc7381b
AS
785 if (collect_md)
786 return -EINVAL;
55ff02b2
JV
787 err = ipip_netlink_fan(data, t, &p);
788 if (err < 0)
789 return err;
be42da0e
ND
790
791 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
792 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
793 return -EINVAL;
794
9830ad4c 795 return ip_tunnel_changelink(dev, tb, &p, fwmark);
be42da0e
ND
796}
797
0974658d
ND
798static size_t ipip_get_size(const struct net_device *dev)
799{
800 return
801 /* IFLA_IPTUN_LINK */
802 nla_total_size(4) +
803 /* IFLA_IPTUN_LOCAL */
804 nla_total_size(4) +
805 /* IFLA_IPTUN_REMOTE */
806 nla_total_size(4) +
807 /* IFLA_IPTUN_TTL */
808 nla_total_size(1) +
809 /* IFLA_IPTUN_TOS */
810 nla_total_size(1) +
1b69e7e6
SH
811 /* IFLA_IPTUN_PROTO */
812 nla_total_size(1) +
befe2aa1
ND
813 /* IFLA_IPTUN_PMTUDISC */
814 nla_total_size(1) +
473ab820
TH
815 /* IFLA_IPTUN_ENCAP_TYPE */
816 nla_total_size(2) +
817 /* IFLA_IPTUN_ENCAP_FLAGS */
818 nla_total_size(2) +
819 /* IFLA_IPTUN_ENCAP_SPORT */
820 nla_total_size(2) +
821 /* IFLA_IPTUN_ENCAP_DPORT */
822 nla_total_size(2) +
cfc7381b
AS
823 /* IFLA_IPTUN_COLLECT_METADATA */
824 nla_total_size(0) +
9830ad4c
CG
825 /* IFLA_IPTUN_FWMARK */
826 nla_total_size(4) +
55ff02b2 827 /* IFLA_IPTUN_FAN_MAP */
d57420a1 828 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
0974658d
ND
829 0;
830}
831
832static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
833{
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835 struct ip_tunnel_parm *parm = &tunnel->parms;
836
837 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
930345ea
JB
838 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
839 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
0974658d 840 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
befe2aa1 841 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
1b69e7e6 842 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
befe2aa1 843 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
9830ad4c
CG
844 !!(parm->iph.frag_off & htons(IP_DF))) ||
845 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
0974658d 846 goto nla_put_failure;
473ab820
TH
847
848 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
849 tunnel->encap.type) ||
3e97fa70
SD
850 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
851 tunnel->encap.sport) ||
852 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
853 tunnel->encap.dport) ||
473ab820 854 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
e1b2cb65 855 tunnel->encap.flags))
473ab820
TH
856 goto nla_put_failure;
857
cfc7381b
AS
858 if (tunnel->collect_md)
859 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
860 goto nla_put_failure;
d57420a1 861 if (fan_has_map(&tunnel->fan)) {
55ff02b2 862 struct nlattr *fan_nest;
d57420a1 863 struct ip_fan_map *fan_map;
55ff02b2
JV
864
865 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
866 if (!fan_nest)
867 goto nla_put_failure;
d57420a1
JV
868 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
869 struct ifla_fan_map map;
870
871 map.underlay = fan_map->underlay;
872 map.underlay_prefix = fan_map->underlay_prefix;
873 map.overlay = fan_map->overlay;
874 map.overlay_prefix = fan_map->overlay_prefix;
875 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
876 goto nla_put_failure;
55ff02b2
JV
877 }
878 nla_nest_end(skb, fan_nest);
879 }
880
0974658d
ND
881 return 0;
882
883nla_put_failure:
884 return -EMSGSIZE;
885}
886
be42da0e
ND
887static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
888 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
889 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
890 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
891 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
892 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
1b69e7e6 893 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
be42da0e 894 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
473ab820
TH
895 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
896 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
897 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
898 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
cfc7381b 899 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
9830ad4c 900 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
55ff02b2
JV
901
902 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
903 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
be42da0e
ND
904};
905
0974658d
ND
906static struct rtnl_link_ops ipip_link_ops __read_mostly = {
907 .kind = "ipip",
908 .maxtype = IFLA_IPTUN_MAX,
be42da0e 909 .policy = ipip_policy,
0974658d 910 .priv_size = sizeof(struct ip_tunnel),
be42da0e 911 .setup = ipip_tunnel_setup,
1b69e7e6 912 .validate = ipip_tunnel_validate,
be42da0e
ND
913 .newlink = ipip_newlink,
914 .changelink = ipip_changelink,
fd58156e 915 .dellink = ip_tunnel_dellink,
0974658d
ND
916 .get_size = ipip_get_size,
917 .fill_info = ipip_fill_info,
1728d4fa 918 .get_link_net = ip_tunnel_get_link_net,
0974658d
ND
919};
920
6dcd814b 921static struct xfrm_tunnel ipip_handler __read_mostly = {
1da177e4
LT
922 .handler = ipip_rcv,
923 .err_handler = ipip_err,
d2acc347 924 .priority = 1,
1da177e4
LT
925};
926
1b69e7e6
SH
927#if IS_ENABLED(CONFIG_MPLS)
928static struct xfrm_tunnel mplsip_handler __read_mostly = {
929 .handler = mplsip_rcv,
930 .err_handler = ipip_err,
931 .priority = 1,
932};
933#endif
934
2c8c1e72 935static int __net_init ipip_init_net(struct net *net)
10dc4c7b 936{
fd58156e 937 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
10dc4c7b
PE
938}
939
64bc1781 940static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
10dc4c7b 941{
64bc1781 942 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
10dc4c7b
PE
943}
944
945static struct pernet_operations ipip_net_ops = {
946 .init = ipip_init_net,
64bc1781 947 .exit_batch = ipip_exit_batch_net,
86de8a63 948 .id = &ipip_net_id,
fd58156e 949 .size = sizeof(struct ip_tunnel_net),
10dc4c7b
PE
950};
951
55ff02b2
JV
952#ifdef CONFIG_SYSCTL
953static struct ctl_table_header *ipip_fan_header;
954static unsigned int ipip_fan_version = 3;
955
956static struct ctl_table ipip_fan_sysctls[] = {
957 {
958 .procname = "version",
959 .data = &ipip_fan_version,
960 .maxlen = sizeof(ipip_fan_version),
961 .mode = 0444,
962 .proc_handler = proc_dointvec,
963 },
964 {},
965};
966
967#endif /* CONFIG_SYSCTL */
968
1da177e4
LT
969static int __init ipip_init(void)
970{
971 int err;
972
1b69e7e6 973 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
1da177e4 974
d5aa407f
AD
975 err = register_pernet_device(&ipip_net_ops);
976 if (err < 0)
977 return err;
978 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
979 if (err < 0) {
058bd4d2 980 pr_info("%s: can't register tunnel\n", __func__);
1b69e7e6
SH
981 goto xfrm_tunnel_ipip_failed;
982 }
983#if IS_ENABLED(CONFIG_MPLS)
984 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
985 if (err < 0) {
986 pr_info("%s: can't register tunnel\n", __func__);
987 goto xfrm_tunnel_mplsip_failed;
1da177e4 988 }
1b69e7e6 989#endif
0974658d
ND
990 err = rtnl_link_register(&ipip_link_ops);
991 if (err < 0)
992 goto rtnl_link_failed;
993
55ff02b2
JV
994#ifdef CONFIG_SYSCTL
995 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
996 ipip_fan_sysctls);
997 if (!ipip_fan_header) {
998 err = -ENOMEM;
999 goto sysctl_failed;
1000 }
1001#endif /* CONFIG_SYSCTL */
1002
0974658d 1003out:
1da177e4 1004 return err;
0974658d 1005
55ff02b2
JV
1006#ifdef CONFIG_SYSCTL
1007sysctl_failed:
1008 rtnl_link_unregister(&ipip_link_ops);
1009#endif /* CONFIG_SYSCTL */
0974658d 1010rtnl_link_failed:
1b69e7e6
SH
1011#if IS_ENABLED(CONFIG_MPLS)
1012 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
1013xfrm_tunnel_mplsip_failed:
1014
1015#endif
0974658d 1016 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1b69e7e6 1017xfrm_tunnel_ipip_failed:
0974658d
ND
1018 unregister_pernet_device(&ipip_net_ops);
1019 goto out;
1da177e4
LT
1020}
1021
1022static void __exit ipip_fini(void)
1023{
55ff02b2
JV
1024#ifdef CONFIG_SYSCTL
1025 unregister_net_sysctl_table(ipip_fan_header);
1026#endif /* CONFIG_SYSCTL */
0974658d 1027 rtnl_link_unregister(&ipip_link_ops);
c0d56408 1028 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
058bd4d2 1029 pr_info("%s: can't deregister tunnel\n", __func__);
1b69e7e6
SH
1030#if IS_ENABLED(CONFIG_MPLS)
1031 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1032 pr_info("%s: can't deregister tunnel\n", __func__);
1033#endif
86de8a63 1034 unregister_pernet_device(&ipip_net_ops);
1da177e4
LT
1035}
1036
1037module_init(ipip_init);
1038module_exit(ipip_fini);
1039MODULE_LICENSE("GPL");
f98f89a0 1040MODULE_ALIAS_RTNL_LINK("ipip");
8909c9ad 1041MODULE_ALIAS_NETDEV("tunl0");