]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - net/ipv4/ipip.c
bcm2835-camera: Correct port_parameter_get return value
[mirror_ubuntu-zesty-kernel.git] / net / ipv4 / ipip.c
1 /*
2 * Linux NET3: IP/IP protocol decoder.
3 *
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27 /* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
36
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
43
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87 /*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
121
122 static bool log_ecn_error = true;
123 module_param(log_ecn_error, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
126 static unsigned int ipip_net_id __read_mostly;
127
128 static int ipip_tunnel_init(struct net_device *dev);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly;
130
131 static int ipip_err(struct sk_buff *skb, u32 info)
132 {
133
134 /* All the routers (except for Linux) return only
135 8 bytes of packet payload. It means, that precise relaying of
136 ICMP in the real Internet is absolutely infeasible.
137 */
138 struct net *net = dev_net(skb->dev);
139 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
140 const struct iphdr *iph = (const struct iphdr *)skb->data;
141 struct ip_tunnel *t;
142 int err;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145
146 err = -ENOENT;
147 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
148 iph->daddr, iph->saddr, 0);
149 if (!t)
150 goto out;
151
152 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
153 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
154 t->parms.link, 0, iph->protocol, 0);
155 err = 0;
156 goto out;
157 }
158
159 if (type == ICMP_REDIRECT) {
160 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
161 iph->protocol, 0);
162 err = 0;
163 goto out;
164 }
165
166 if (t->parms.iph.daddr == 0)
167 goto out;
168
169 err = 0;
170 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
171 goto out;
172
173 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
174 t->err_count++;
175 else
176 t->err_count = 1;
177 t->err_time = jiffies;
178
179 out:
180 return err;
181 }
182
183 static const struct tnl_ptk_info ipip_tpi = {
184 /* no tunnel info required for ipip. */
185 .proto = htons(ETH_P_IP),
186 };
187
188 #if IS_ENABLED(CONFIG_MPLS)
189 static const struct tnl_ptk_info mplsip_tpi = {
190 /* no tunnel info required for mplsip. */
191 .proto = htons(ETH_P_MPLS_UC),
192 };
193 #endif
194
195 static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
196 {
197 struct net *net = dev_net(skb->dev);
198 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
199 struct metadata_dst *tun_dst = NULL;
200 struct ip_tunnel *tunnel;
201 const struct iphdr *iph;
202
203 iph = ip_hdr(skb);
204 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
205 iph->saddr, iph->daddr, 0);
206 if (tunnel) {
207 const struct tnl_ptk_info *tpi;
208
209 if (tunnel->parms.iph.protocol != ipproto &&
210 tunnel->parms.iph.protocol != 0)
211 goto drop;
212
213 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
214 goto drop;
215 #if IS_ENABLED(CONFIG_MPLS)
216 if (ipproto == IPPROTO_MPLS)
217 tpi = &mplsip_tpi;
218 else
219 #endif
220 tpi = &ipip_tpi;
221 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
222 goto drop;
223 if (tunnel->collect_md) {
224 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
225 if (!tun_dst)
226 return 0;
227 }
228 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
229 }
230
231 return -1;
232
233 drop:
234 kfree_skb(skb);
235 return 0;
236 }
237
238 static int ipip_rcv(struct sk_buff *skb)
239 {
240 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
241 }
242
243 #if IS_ENABLED(CONFIG_MPLS)
244 static int mplsip_rcv(struct sk_buff *skb)
245 {
246 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
247 }
248 #endif
249
250 static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
251 {
252 struct ip_fan_map *fan_map;
253
254 rcu_read_lock();
255 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
256 if (fan_map->overlay ==
257 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
258 rcu_read_unlock();
259 return fan_map;
260 }
261 }
262 rcu_read_unlock();
263
264 return NULL;
265 }
266
267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
268 * address.
269 *
270 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
271 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
272 * two octets of the underlay network (the network portion of a /16), "A"
273 * and "B" are the low order two octets of the underlay network host (the
274 * host portion of a /16), and "Y" is a configured first octet of the
275 * overlay network.
276 *
277 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
278 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
279 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
280 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
281 * described in detail further below.
282 *
283 * Using netmasks for the overlay and underlay other than /8 and /16, as
284 * shown above, can yield larger (or smaller) overlay subnets, with the
285 * trade-off of allowing fewer (or more) underlay hosts to participate.
286 *
287 * The size of each overlay network subnet is defined by the total of the
288 * network mask of the overlay plus the size of host portion of the
289 * underlay network. In the above example, /8 + /16 = /24.
290 *
291 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
292 * this case, the network portion of the underlay is 10.99.224.0/20, and
293 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
294 * network subnet, the 12 bits of host portion are left shifted 12 bits
295 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
296 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
297 * 12 bits underlay. This yields 12 bits in the overlay network portion,
298 * allowing for 4094 addresses in each overlay network subnet. The
299 * trade-off is that fewer hosts may participate in the underlay network,
300 * as its host address size has shrunk from 16 bits (65534 addresses) in
301 * the first example to 12 bits (4094 addresses) here.
302 *
303 * For fewer hosts per overlay subnet (permitting a larger number of
304 * underlay hosts to participate), the underlay netmask may be made
305 * smaller.
306 *
307 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
308 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
309 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
310 * the lowest order bit of the /8 overlay). This yields an overlay subnet
311 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
312 * the underlay). This provides more addresses for the underlay network
313 * (approximately 2^20), but each host's segment of the overlay provides
314 * only 4 bits of addresses (14 usable).
315 *
316 * It is also possible to adjust the overlay subnet.
317 *
318 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
319 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
320 * shifted 15 bits (/20 - /5), yielding an overlay network of
321 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
322 * overlay network of 242.107.128.0/17.
323 *
324 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
325 * underlay host 10.224.220.10, the underlay host portion (.10) is left
326 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
327 * This would permit 254 addresses on the underlay, with each overlay
328 * segment providing approximately 2^14 - 2 addresses (16382).
329 *
330 * For packets being encapsulated, the overlay network destination IP
331 * address is deconstructed into its overlay and underlay-derived
332 * portions. The underlay portion (determined by the overlay mask and
333 * overlay subnet mask) is right shifted according to the size of the
334 * underlay network mask. This value is then ORed with the network
335 * portion of the underlay network to produce the underlay network
336 * destination for the encapsulated datagram.
337 *
338 * For example, using the initial example of underlay 10.88.3.4/16 and
339 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
340 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
341 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
342 * of the address extracted. This is a number of bits equal to underlay
343 * network host portion. In the destination address, the highest order of
344 * these bits is one bit lower than the lowest order bit from the overlay
345 * network mask.
346 *
347 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
348 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
349 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
350 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
351 * which is 1 bit lower than the lowest order overlay address bit).
352 *
353 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
354 * This value is then ORed with the underlay network portion,
355 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
356 * the encapuslated datagram.
357 *
358 * Another transform using the final example: overlay 100.64.0.0/10 and
359 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
360 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
361 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
362 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
363 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
364 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
365 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
366 * the underlay destination of 10.224.220.11 for overlay destination
367 * 100.66.200.5.
368 */
369 static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
370 {
371 struct ip_fan_map *f_map;
372 u32 daddr, underlay;
373
374 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
375 if (!f_map)
376 return -ENOENT;
377
378 daddr = ntohl(ip_hdr(skb)->daddr);
379 underlay = ntohl(f_map->underlay);
380 if (!underlay)
381 return -EINVAL;
382
383 *iph = tunnel->parms.iph;
384 iph->daddr = htonl(underlay |
385 ((daddr & ~f_map->overlay_mask) >>
386 (32 - f_map->overlay_prefix -
387 (32 - f_map->underlay_prefix))));
388 return 0;
389 }
390
391 /*
392 * This function assumes it is being called from dev_queue_xmit()
393 * and that skb is filled properly by that function.
394 */
395 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
396 struct net_device *dev)
397 {
398 struct ip_tunnel *tunnel = netdev_priv(dev);
399 const struct iphdr *tiph = &tunnel->parms.iph;
400 u8 ipproto;
401 struct iphdr fiph;
402
403 switch (skb->protocol) {
404 case htons(ETH_P_IP):
405 ipproto = IPPROTO_IPIP;
406 break;
407 #if IS_ENABLED(CONFIG_MPLS)
408 case htons(ETH_P_MPLS_UC):
409 ipproto = IPPROTO_MPLS;
410 break;
411 #endif
412 default:
413 goto tx_error;
414 }
415
416 if (tiph->protocol != ipproto && tiph->protocol != 0)
417 goto tx_error;
418
419 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
420 goto tx_error;
421
422 if (fan_has_map(&tunnel->fan)) {
423 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
424 goto tx_error;
425 tiph = &fiph;
426 } else {
427 tiph = &tunnel->parms.iph;
428 }
429
430 skb_set_inner_ipproto(skb, ipproto);
431
432 if (tunnel->collect_md)
433 ip_md_tunnel_xmit(skb, dev, ipproto);
434 else
435 ip_tunnel_xmit(skb, dev, tiph, ipproto);
436 return NETDEV_TX_OK;
437
438 tx_error:
439 kfree_skb(skb);
440
441 dev->stats.tx_errors++;
442 return NETDEV_TX_OK;
443 }
444
445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
446 {
447 switch (ipproto) {
448 case 0:
449 case IPPROTO_IPIP:
450 #if IS_ENABLED(CONFIG_MPLS)
451 case IPPROTO_MPLS:
452 #endif
453 return true;
454 }
455
456 return false;
457 }
458
459 static int
460 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
461 {
462 int err = 0;
463 struct ip_tunnel_parm p;
464
465 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
466 return -EFAULT;
467
468 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
469 if (p.iph.version != 4 ||
470 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
471 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
472 return -EINVAL;
473 }
474
475 p.i_key = p.o_key = 0;
476 p.i_flags = p.o_flags = 0;
477 err = ip_tunnel_ioctl(dev, &p, cmd);
478 if (err)
479 return err;
480
481 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
482 return -EFAULT;
483
484 return 0;
485 }
486
487 static const struct net_device_ops ipip_netdev_ops = {
488 .ndo_init = ipip_tunnel_init,
489 .ndo_uninit = ip_tunnel_uninit,
490 .ndo_start_xmit = ipip_tunnel_xmit,
491 .ndo_do_ioctl = ipip_tunnel_ioctl,
492 .ndo_change_mtu = ip_tunnel_change_mtu,
493 .ndo_get_stats64 = ip_tunnel_get_stats64,
494 .ndo_get_iflink = ip_tunnel_get_iflink,
495 };
496
497 #define IPIP_FEATURES (NETIF_F_SG | \
498 NETIF_F_FRAGLIST | \
499 NETIF_F_HIGHDMA | \
500 NETIF_F_GSO_SOFTWARE | \
501 NETIF_F_HW_CSUM)
502
503 static void ipip_tunnel_setup(struct net_device *dev)
504 {
505 struct ip_tunnel *t = netdev_priv(dev);
506
507 dev->netdev_ops = &ipip_netdev_ops;
508
509 dev->type = ARPHRD_TUNNEL;
510 dev->flags = IFF_NOARP;
511 dev->addr_len = 4;
512 dev->features |= NETIF_F_LLTX;
513 netif_keep_dst(dev);
514
515 dev->features |= IPIP_FEATURES;
516 dev->hw_features |= IPIP_FEATURES;
517 ip_tunnel_setup(dev, ipip_net_id);
518 INIT_LIST_HEAD(&t->fan.fan_maps);
519 }
520
521 static int ipip_tunnel_init(struct net_device *dev)
522 {
523 struct ip_tunnel *tunnel = netdev_priv(dev);
524
525 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
526 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
527
528 tunnel->tun_hlen = 0;
529 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
530 return ip_tunnel_init(dev);
531 }
532
533 static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
534 {
535 u8 proto;
536
537 if (!data || !data[IFLA_IPTUN_PROTO])
538 return 0;
539
540 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
541 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
542 return -EINVAL;
543
544 return 0;
545 }
546
547 static void ipip_netlink_parms(struct nlattr *data[],
548 struct ip_tunnel_parm *parms, bool *collect_md)
549 {
550 memset(parms, 0, sizeof(*parms));
551
552 parms->iph.version = 4;
553 parms->iph.protocol = IPPROTO_IPIP;
554 parms->iph.ihl = 5;
555 *collect_md = false;
556
557 if (!data)
558 return;
559
560 if (data[IFLA_IPTUN_LINK])
561 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
562
563 if (data[IFLA_IPTUN_LOCAL])
564 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
565
566 if (data[IFLA_IPTUN_REMOTE])
567 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
568
569 if (data[IFLA_IPTUN_TTL]) {
570 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
571 if (parms->iph.ttl)
572 parms->iph.frag_off = htons(IP_DF);
573 }
574
575 if (data[IFLA_IPTUN_TOS])
576 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
577
578 if (data[IFLA_IPTUN_PROTO])
579 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
580
581 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
582 parms->iph.frag_off = htons(IP_DF);
583
584 if (data[IFLA_IPTUN_COLLECT_METADATA])
585 *collect_md = true;
586 }
587
588 /* This function returns true when ENCAP attributes are present in the nl msg */
589 static bool ipip_netlink_encap_parms(struct nlattr *data[],
590 struct ip_tunnel_encap *ipencap)
591 {
592 bool ret = false;
593
594 memset(ipencap, 0, sizeof(*ipencap));
595
596 if (!data)
597 return ret;
598
599 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
600 ret = true;
601 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
602 }
603
604 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
605 ret = true;
606 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
607 }
608
609 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
610 ret = true;
611 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
612 }
613
614 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
615 ret = true;
616 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
617 }
618
619 return ret;
620 }
621
622 static void ipip_fan_flush_map(struct ip_tunnel *t)
623 {
624 struct ip_fan_map *fan_map;
625
626 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
627 list_del_rcu(&fan_map->list);
628 kfree_rcu(fan_map, rcu);
629 }
630 }
631
632 static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
633 {
634 struct ip_fan_map *fan_map;
635
636 fan_map = ipip_fan_find_map(t, overlay);
637 if (!fan_map)
638 return -ENOENT;
639
640 list_del_rcu(&fan_map->list);
641 kfree_rcu(fan_map, rcu);
642
643 return 0;
644 }
645
646 static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
647 {
648 __be32 overlay_mask, underlay_mask;
649 struct ip_fan_map *fan_map;
650
651 overlay_mask = inet_make_mask(map->overlay_prefix);
652 underlay_mask = inet_make_mask(map->underlay_prefix);
653
654 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
655 return -EINVAL;
656
657 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
658 return -EINVAL;
659
660 /* Special case: overlay 0 and underlay 0: flush all mappings */
661 if (!map->overlay && !map->underlay) {
662 ipip_fan_flush_map(t);
663 return 0;
664 }
665
666 /* Special case: overlay set and underlay 0: clear map for overlay */
667 if (!map->underlay)
668 return ipip_fan_del_map(t, map->overlay);
669
670 if (ipip_fan_find_map(t, map->overlay))
671 return -EEXIST;
672
673 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
674 fan_map->underlay = map->underlay;
675 fan_map->overlay = map->overlay;
676 fan_map->underlay_prefix = map->underlay_prefix;
677 fan_map->overlay_mask = ntohl(overlay_mask);
678 fan_map->overlay_prefix = map->overlay_prefix;
679
680 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
681
682 return 0;
683 }
684
685
686 static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
687 struct ip_tunnel_parm *parms)
688 {
689 struct ifla_fan_map *map;
690 struct nlattr *attr;
691 int rem, rv;
692
693 if (!data[IFLA_IPTUN_FAN_MAP])
694 return 0;
695
696 if (parms->iph.daddr)
697 return -EINVAL;
698
699 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
700 map = nla_data(attr);
701 rv = ipip_fan_add_map(t, map);
702 if (rv)
703 return rv;
704 }
705
706 return 0;
707 }
708
709 static int ipip_newlink(struct net *src_net, struct net_device *dev,
710 struct nlattr *tb[], struct nlattr *data[])
711 {
712 struct ip_tunnel *t = netdev_priv(dev);
713 struct ip_tunnel_parm p;
714 struct ip_tunnel_encap ipencap;
715 int err;
716
717 if (ipip_netlink_encap_parms(data, &ipencap)) {
718 err = ip_tunnel_encap_setup(t, &ipencap);
719
720 if (err < 0)
721 return err;
722 }
723
724 ipip_netlink_parms(data, &p, &t->collect_md);
725 err = ipip_netlink_fan(data, t, &p);
726 if (err < 0)
727 return err;
728 return ip_tunnel_newlink(dev, tb, &p);
729 }
730
731 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
732 struct nlattr *data[])
733 {
734 struct ip_tunnel_parm p;
735 struct ip_tunnel_encap ipencap;
736 bool collect_md;
737 struct ip_tunnel *t = netdev_priv(dev);
738 int err;
739
740 if (ipip_netlink_encap_parms(data, &ipencap)) {
741 err = ip_tunnel_encap_setup(t, &ipencap);
742
743 if (err < 0)
744 return err;
745 }
746
747 ipip_netlink_parms(data, &p, &collect_md);
748 if (collect_md)
749 return -EINVAL;
750 err = ipip_netlink_fan(data, t, &p);
751 if (err < 0)
752 return err;
753
754 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
755 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
756 return -EINVAL;
757
758 return ip_tunnel_changelink(dev, tb, &p);
759 }
760
761 static size_t ipip_get_size(const struct net_device *dev)
762 {
763 return
764 /* IFLA_IPTUN_LINK */
765 nla_total_size(4) +
766 /* IFLA_IPTUN_LOCAL */
767 nla_total_size(4) +
768 /* IFLA_IPTUN_REMOTE */
769 nla_total_size(4) +
770 /* IFLA_IPTUN_TTL */
771 nla_total_size(1) +
772 /* IFLA_IPTUN_TOS */
773 nla_total_size(1) +
774 /* IFLA_IPTUN_PROTO */
775 nla_total_size(1) +
776 /* IFLA_IPTUN_PMTUDISC */
777 nla_total_size(1) +
778 /* IFLA_IPTUN_ENCAP_TYPE */
779 nla_total_size(2) +
780 /* IFLA_IPTUN_ENCAP_FLAGS */
781 nla_total_size(2) +
782 /* IFLA_IPTUN_ENCAP_SPORT */
783 nla_total_size(2) +
784 /* IFLA_IPTUN_ENCAP_DPORT */
785 nla_total_size(2) +
786 /* IFLA_IPTUN_COLLECT_METADATA */
787 nla_total_size(0) +
788 /* IFLA_IPTUN_FAN_MAP */
789 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
790 0;
791 }
792
793 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
794 {
795 struct ip_tunnel *tunnel = netdev_priv(dev);
796 struct ip_tunnel_parm *parm = &tunnel->parms;
797
798 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
799 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
800 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
801 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
802 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
803 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
804 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
805 !!(parm->iph.frag_off & htons(IP_DF))))
806 goto nla_put_failure;
807
808 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
809 tunnel->encap.type) ||
810 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
811 tunnel->encap.sport) ||
812 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
813 tunnel->encap.dport) ||
814 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
815 tunnel->encap.flags))
816 goto nla_put_failure;
817
818 if (tunnel->collect_md)
819 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
820 goto nla_put_failure;
821 if (fan_has_map(&tunnel->fan)) {
822 struct nlattr *fan_nest;
823 struct ip_fan_map *fan_map;
824
825 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
826 if (!fan_nest)
827 goto nla_put_failure;
828 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
829 struct ifla_fan_map map;
830
831 map.underlay = fan_map->underlay;
832 map.underlay_prefix = fan_map->underlay_prefix;
833 map.overlay = fan_map->overlay;
834 map.overlay_prefix = fan_map->overlay_prefix;
835 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
836 goto nla_put_failure;
837 }
838 nla_nest_end(skb, fan_nest);
839 }
840
841 return 0;
842
843 nla_put_failure:
844 return -EMSGSIZE;
845 }
846
847 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
848 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
849 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
850 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
851 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
852 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
853 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
854 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
855 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
856 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
857 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
858 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
859 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
860
861 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
862 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
863 };
864
865 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
866 .kind = "ipip",
867 .maxtype = IFLA_IPTUN_MAX,
868 .policy = ipip_policy,
869 .priv_size = sizeof(struct ip_tunnel),
870 .setup = ipip_tunnel_setup,
871 .validate = ipip_tunnel_validate,
872 .newlink = ipip_newlink,
873 .changelink = ipip_changelink,
874 .dellink = ip_tunnel_dellink,
875 .get_size = ipip_get_size,
876 .fill_info = ipip_fill_info,
877 .get_link_net = ip_tunnel_get_link_net,
878 };
879
880 static struct xfrm_tunnel ipip_handler __read_mostly = {
881 .handler = ipip_rcv,
882 .err_handler = ipip_err,
883 .priority = 1,
884 };
885
886 #if IS_ENABLED(CONFIG_MPLS)
887 static struct xfrm_tunnel mplsip_handler __read_mostly = {
888 .handler = mplsip_rcv,
889 .err_handler = ipip_err,
890 .priority = 1,
891 };
892 #endif
893
894 static int __net_init ipip_init_net(struct net *net)
895 {
896 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
897 }
898
899 static void __net_exit ipip_exit_net(struct net *net)
900 {
901 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
902 ip_tunnel_delete_net(itn, &ipip_link_ops);
903 }
904
905 static struct pernet_operations ipip_net_ops = {
906 .init = ipip_init_net,
907 .exit = ipip_exit_net,
908 .id = &ipip_net_id,
909 .size = sizeof(struct ip_tunnel_net),
910 };
911
912 #ifdef CONFIG_SYSCTL
913 static struct ctl_table_header *ipip_fan_header;
914 static unsigned int ipip_fan_version = 3;
915
916 static struct ctl_table ipip_fan_sysctls[] = {
917 {
918 .procname = "version",
919 .data = &ipip_fan_version,
920 .maxlen = sizeof(ipip_fan_version),
921 .mode = 0444,
922 .proc_handler = proc_dointvec,
923 },
924 {},
925 };
926
927 #endif /* CONFIG_SYSCTL */
928
929 static int __init ipip_init(void)
930 {
931 int err;
932
933 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
934
935 err = register_pernet_device(&ipip_net_ops);
936 if (err < 0)
937 return err;
938 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
939 if (err < 0) {
940 pr_info("%s: can't register tunnel\n", __func__);
941 goto xfrm_tunnel_ipip_failed;
942 }
943 #if IS_ENABLED(CONFIG_MPLS)
944 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
945 if (err < 0) {
946 pr_info("%s: can't register tunnel\n", __func__);
947 goto xfrm_tunnel_mplsip_failed;
948 }
949 #endif
950 err = rtnl_link_register(&ipip_link_ops);
951 if (err < 0)
952 goto rtnl_link_failed;
953
954 #ifdef CONFIG_SYSCTL
955 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
956 ipip_fan_sysctls);
957 if (!ipip_fan_header) {
958 err = -ENOMEM;
959 goto sysctl_failed;
960 }
961 #endif /* CONFIG_SYSCTL */
962
963 out:
964 return err;
965
966 #ifdef CONFIG_SYSCTL
967 sysctl_failed:
968 rtnl_link_unregister(&ipip_link_ops);
969 #endif /* CONFIG_SYSCTL */
970 rtnl_link_failed:
971 #if IS_ENABLED(CONFIG_MPLS)
972 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
973 xfrm_tunnel_mplsip_failed:
974
975 #endif
976 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
977 xfrm_tunnel_ipip_failed:
978 unregister_pernet_device(&ipip_net_ops);
979 goto out;
980 }
981
982 static void __exit ipip_fini(void)
983 {
984 #ifdef CONFIG_SYSCTL
985 unregister_net_sysctl_table(ipip_fan_header);
986 #endif /* CONFIG_SYSCTL */
987 rtnl_link_unregister(&ipip_link_ops);
988 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
989 pr_info("%s: can't deregister tunnel\n", __func__);
990 #if IS_ENABLED(CONFIG_MPLS)
991 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
992 pr_info("%s: can't deregister tunnel\n", __func__);
993 #endif
994 unregister_pernet_device(&ipip_net_ops);
995 }
996
997 module_init(ipip_init);
998 module_exit(ipip_fini);
999 MODULE_LICENSE("GPL");
1000 MODULE_ALIAS_RTNL_LINK("ipip");
1001 MODULE_ALIAS_NETDEV("tunl0");