]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - net/ipv4/ipip.c
UBUNTU: [Config] CONFIG_SND_SOC_ES8316=m
[mirror_ubuntu-artful-kernel.git] / net / ipv4 / ipip.c
1 /*
2 * Linux NET3: IP/IP protocol decoder.
3 *
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27 /* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
36
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
43
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87 /*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
121
122 static bool log_ecn_error = true;
123 module_param(log_ecn_error, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
126 static unsigned int ipip_net_id __read_mostly;
127
128 static int ipip_tunnel_init(struct net_device *dev);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly;
130
131 static int ipip_err(struct sk_buff *skb, u32 info)
132 {
133
134 /* All the routers (except for Linux) return only
135 8 bytes of packet payload. It means, that precise relaying of
136 ICMP in the real Internet is absolutely infeasible.
137 */
138 struct net *net = dev_net(skb->dev);
139 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
140 const struct iphdr *iph = (const struct iphdr *)skb->data;
141 struct ip_tunnel *t;
142 int err;
143 const int type = icmp_hdr(skb)->type;
144 const int code = icmp_hdr(skb)->code;
145
146 err = -ENOENT;
147 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
148 iph->daddr, iph->saddr, 0);
149 if (!t)
150 goto out;
151
152 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
153 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
154 t->parms.link, 0, iph->protocol, 0);
155 err = 0;
156 goto out;
157 }
158
159 if (type == ICMP_REDIRECT) {
160 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
161 iph->protocol, 0);
162 err = 0;
163 goto out;
164 }
165
166 if (t->parms.iph.daddr == 0)
167 goto out;
168
169 err = 0;
170 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
171 goto out;
172
173 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
174 t->err_count++;
175 else
176 t->err_count = 1;
177 t->err_time = jiffies;
178
179 out:
180 return err;
181 }
182
183 static const struct tnl_ptk_info ipip_tpi = {
184 /* no tunnel info required for ipip. */
185 .proto = htons(ETH_P_IP),
186 };
187
188 #if IS_ENABLED(CONFIG_MPLS)
189 static const struct tnl_ptk_info mplsip_tpi = {
190 /* no tunnel info required for mplsip. */
191 .proto = htons(ETH_P_MPLS_UC),
192 };
193 #endif
194
195 static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
196 {
197 struct net *net = dev_net(skb->dev);
198 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
199 struct metadata_dst *tun_dst = NULL;
200 struct ip_tunnel *tunnel;
201 const struct iphdr *iph;
202
203 iph = ip_hdr(skb);
204 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
205 iph->saddr, iph->daddr, 0);
206 if (tunnel) {
207 const struct tnl_ptk_info *tpi;
208
209 if (tunnel->parms.iph.protocol != ipproto &&
210 tunnel->parms.iph.protocol != 0)
211 goto drop;
212
213 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
214 goto drop;
215 #if IS_ENABLED(CONFIG_MPLS)
216 if (ipproto == IPPROTO_MPLS)
217 tpi = &mplsip_tpi;
218 else
219 #endif
220 tpi = &ipip_tpi;
221 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
222 goto drop;
223 if (tunnel->collect_md) {
224 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
225 if (!tun_dst)
226 return 0;
227 }
228 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
229 }
230
231 return -1;
232
233 drop:
234 kfree_skb(skb);
235 return 0;
236 }
237
238 static int ipip_rcv(struct sk_buff *skb)
239 {
240 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
241 }
242
243 #if IS_ENABLED(CONFIG_MPLS)
244 static int mplsip_rcv(struct sk_buff *skb)
245 {
246 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
247 }
248 #endif
249
250 static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
251 {
252 struct ip_fan_map *fan_map;
253
254 rcu_read_lock();
255 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
256 if (fan_map->overlay ==
257 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
258 rcu_read_unlock();
259 return fan_map;
260 }
261 }
262 rcu_read_unlock();
263
264 return NULL;
265 }
266
267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
268 * address.
269 *
270 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
271 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
272 * two octets of the underlay network (the network portion of a /16), "A"
273 * and "B" are the low order two octets of the underlay network host (the
274 * host portion of a /16), and "Y" is a configured first octet of the
275 * overlay network.
276 *
277 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
278 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
279 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
280 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
281 * described in detail further below.
282 *
283 * Using netmasks for the overlay and underlay other than /8 and /16, as
284 * shown above, can yield larger (or smaller) overlay subnets, with the
285 * trade-off of allowing fewer (or more) underlay hosts to participate.
286 *
287 * The size of each overlay network subnet is defined by the total of the
288 * network mask of the overlay plus the size of host portion of the
289 * underlay network. In the above example, /8 + /16 = /24.
290 *
291 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
292 * this case, the network portion of the underlay is 10.99.224.0/20, and
293 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
294 * network subnet, the 12 bits of host portion are left shifted 12 bits
295 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
296 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
297 * 12 bits underlay. This yields 12 bits in the overlay network portion,
298 * allowing for 4094 addresses in each overlay network subnet. The
299 * trade-off is that fewer hosts may participate in the underlay network,
300 * as its host address size has shrunk from 16 bits (65534 addresses) in
301 * the first example to 12 bits (4094 addresses) here.
302 *
303 * For fewer hosts per overlay subnet (permitting a larger number of
304 * underlay hosts to participate), the underlay netmask may be made
305 * smaller.
306 *
307 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
308 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
309 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
310 * the lowest order bit of the /8 overlay). This yields an overlay subnet
311 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
312 * the underlay). This provides more addresses for the underlay network
313 * (approximately 2^20), but each host's segment of the overlay provides
314 * only 4 bits of addresses (14 usable).
315 *
316 * It is also possible to adjust the overlay subnet.
317 *
318 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
319 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
320 * shifted 15 bits (/20 - /5), yielding an overlay network of
321 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
322 * overlay network of 242.107.128.0/17.
323 *
324 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
325 * underlay host 10.224.220.10, the underlay host portion (.10) is left
326 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
327 * This would permit 254 addresses on the underlay, with each overlay
328 * segment providing approximately 2^14 - 2 addresses (16382).
329 *
330 * For packets being encapsulated, the overlay network destination IP
331 * address is deconstructed into its overlay and underlay-derived
332 * portions. The underlay portion (determined by the overlay mask and
333 * overlay subnet mask) is right shifted according to the size of the
334 * underlay network mask. This value is then ORed with the network
335 * portion of the underlay network to produce the underlay network
336 * destination for the encapsulated datagram.
337 *
338 * For example, using the initial example of underlay 10.88.3.4/16 and
339 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
340 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
341 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
342 * of the address extracted. This is a number of bits equal to underlay
343 * network host portion. In the destination address, the highest order of
344 * these bits is one bit lower than the lowest order bit from the overlay
345 * network mask.
346 *
347 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
348 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
349 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
350 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
351 * which is 1 bit lower than the lowest order overlay address bit).
352 *
353 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
354 * This value is then ORed with the underlay network portion,
355 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
356 * the encapuslated datagram.
357 *
358 * Another transform using the final example: overlay 100.64.0.0/10 and
359 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
360 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
361 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
362 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
363 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
364 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
365 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
366 * the underlay destination of 10.224.220.11 for overlay destination
367 * 100.66.200.5.
368 */
369 static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
370 {
371 struct ip_fan_map *f_map;
372 u32 daddr, underlay;
373
374 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
375 if (!f_map)
376 return -ENOENT;
377
378 daddr = ntohl(ip_hdr(skb)->daddr);
379 underlay = ntohl(f_map->underlay);
380 if (!underlay)
381 return -EINVAL;
382
383 *iph = tunnel->parms.iph;
384 iph->daddr = htonl(underlay |
385 ((daddr & ~f_map->overlay_mask) >>
386 (32 - f_map->overlay_prefix -
387 (32 - f_map->underlay_prefix))));
388 return 0;
389 }
390
391 /*
392 * This function assumes it is being called from dev_queue_xmit()
393 * and that skb is filled properly by that function.
394 */
395 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
396 struct net_device *dev)
397 {
398 struct ip_tunnel *tunnel = netdev_priv(dev);
399 const struct iphdr *tiph = &tunnel->parms.iph;
400 u8 ipproto;
401 struct iphdr fiph;
402
403 switch (skb->protocol) {
404 case htons(ETH_P_IP):
405 ipproto = IPPROTO_IPIP;
406 break;
407 #if IS_ENABLED(CONFIG_MPLS)
408 case htons(ETH_P_MPLS_UC):
409 ipproto = IPPROTO_MPLS;
410 break;
411 #endif
412 default:
413 goto tx_error;
414 }
415
416 if (tiph->protocol != ipproto && tiph->protocol != 0)
417 goto tx_error;
418
419 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
420 goto tx_error;
421
422 if (fan_has_map(&tunnel->fan)) {
423 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
424 goto tx_error;
425 tiph = &fiph;
426 } else {
427 tiph = &tunnel->parms.iph;
428 }
429
430 skb_set_inner_ipproto(skb, ipproto);
431
432 if (tunnel->collect_md)
433 ip_md_tunnel_xmit(skb, dev, ipproto);
434 else
435 ip_tunnel_xmit(skb, dev, tiph, ipproto);
436 return NETDEV_TX_OK;
437
438 tx_error:
439 kfree_skb(skb);
440
441 dev->stats.tx_errors++;
442 return NETDEV_TX_OK;
443 }
444
445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
446 {
447 switch (ipproto) {
448 case 0:
449 case IPPROTO_IPIP:
450 #if IS_ENABLED(CONFIG_MPLS)
451 case IPPROTO_MPLS:
452 #endif
453 return true;
454 }
455
456 return false;
457 }
458
459 static int
460 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
461 {
462 int err = 0;
463 struct ip_tunnel_parm p;
464
465 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
466 return -EFAULT;
467
468 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
469 if (p.iph.version != 4 ||
470 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
471 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
472 return -EINVAL;
473 }
474
475 p.i_key = p.o_key = 0;
476 p.i_flags = p.o_flags = 0;
477 err = ip_tunnel_ioctl(dev, &p, cmd);
478 if (err)
479 return err;
480
481 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
482 return -EFAULT;
483
484 return 0;
485 }
486
487 static const struct net_device_ops ipip_netdev_ops = {
488 .ndo_init = ipip_tunnel_init,
489 .ndo_uninit = ip_tunnel_uninit,
490 .ndo_start_xmit = ipip_tunnel_xmit,
491 .ndo_do_ioctl = ipip_tunnel_ioctl,
492 .ndo_change_mtu = ip_tunnel_change_mtu,
493 .ndo_get_stats64 = ip_tunnel_get_stats64,
494 .ndo_get_iflink = ip_tunnel_get_iflink,
495 };
496
497 #define IPIP_FEATURES (NETIF_F_SG | \
498 NETIF_F_FRAGLIST | \
499 NETIF_F_HIGHDMA | \
500 NETIF_F_GSO_SOFTWARE | \
501 NETIF_F_HW_CSUM)
502
503 static void ipip_tunnel_setup(struct net_device *dev)
504 {
505 struct ip_tunnel *t = netdev_priv(dev);
506
507 dev->netdev_ops = &ipip_netdev_ops;
508
509 dev->type = ARPHRD_TUNNEL;
510 dev->flags = IFF_NOARP;
511 dev->addr_len = 4;
512 dev->features |= NETIF_F_LLTX;
513 netif_keep_dst(dev);
514
515 dev->features |= IPIP_FEATURES;
516 dev->hw_features |= IPIP_FEATURES;
517 ip_tunnel_setup(dev, ipip_net_id);
518 INIT_LIST_HEAD(&t->fan.fan_maps);
519 }
520
521 static int ipip_tunnel_init(struct net_device *dev)
522 {
523 struct ip_tunnel *tunnel = netdev_priv(dev);
524
525 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
526 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
527
528 tunnel->tun_hlen = 0;
529 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
530 return ip_tunnel_init(dev);
531 }
532
533 static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
534 struct netlink_ext_ack *extack)
535 {
536 u8 proto;
537
538 if (!data || !data[IFLA_IPTUN_PROTO])
539 return 0;
540
541 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
542 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
543 return -EINVAL;
544
545 return 0;
546 }
547
548 static void ipip_netlink_parms(struct nlattr *data[],
549 struct ip_tunnel_parm *parms, bool *collect_md,
550 __u32 *fwmark)
551 {
552 memset(parms, 0, sizeof(*parms));
553
554 parms->iph.version = 4;
555 parms->iph.protocol = IPPROTO_IPIP;
556 parms->iph.ihl = 5;
557 *collect_md = false;
558
559 if (!data)
560 return;
561
562 if (data[IFLA_IPTUN_LINK])
563 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
564
565 if (data[IFLA_IPTUN_LOCAL])
566 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
567
568 if (data[IFLA_IPTUN_REMOTE])
569 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
570
571 if (data[IFLA_IPTUN_TTL]) {
572 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
573 if (parms->iph.ttl)
574 parms->iph.frag_off = htons(IP_DF);
575 }
576
577 if (data[IFLA_IPTUN_TOS])
578 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
579
580 if (data[IFLA_IPTUN_PROTO])
581 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
582
583 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
584 parms->iph.frag_off = htons(IP_DF);
585
586 if (data[IFLA_IPTUN_COLLECT_METADATA])
587 *collect_md = true;
588
589 if (data[IFLA_IPTUN_FWMARK])
590 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
591 }
592
593 /* This function returns true when ENCAP attributes are present in the nl msg */
594 static bool ipip_netlink_encap_parms(struct nlattr *data[],
595 struct ip_tunnel_encap *ipencap)
596 {
597 bool ret = false;
598
599 memset(ipencap, 0, sizeof(*ipencap));
600
601 if (!data)
602 return ret;
603
604 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
605 ret = true;
606 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
607 }
608
609 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
610 ret = true;
611 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
612 }
613
614 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
615 ret = true;
616 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
617 }
618
619 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
620 ret = true;
621 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
622 }
623
624 return ret;
625 }
626
627 static void ipip_fan_flush_map(struct ip_tunnel *t)
628 {
629 struct ip_fan_map *fan_map;
630
631 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
632 list_del_rcu(&fan_map->list);
633 kfree_rcu(fan_map, rcu);
634 }
635 }
636
637 static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
638 {
639 struct ip_fan_map *fan_map;
640
641 fan_map = ipip_fan_find_map(t, overlay);
642 if (!fan_map)
643 return -ENOENT;
644
645 list_del_rcu(&fan_map->list);
646 kfree_rcu(fan_map, rcu);
647
648 return 0;
649 }
650
651 static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
652 {
653 __be32 overlay_mask, underlay_mask;
654 struct ip_fan_map *fan_map;
655
656 overlay_mask = inet_make_mask(map->overlay_prefix);
657 underlay_mask = inet_make_mask(map->underlay_prefix);
658
659 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
660 return -EINVAL;
661
662 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
663 return -EINVAL;
664
665 /* Special case: overlay 0 and underlay 0: flush all mappings */
666 if (!map->overlay && !map->underlay) {
667 ipip_fan_flush_map(t);
668 return 0;
669 }
670
671 /* Special case: overlay set and underlay 0: clear map for overlay */
672 if (!map->underlay)
673 return ipip_fan_del_map(t, map->overlay);
674
675 if (ipip_fan_find_map(t, map->overlay))
676 return -EEXIST;
677
678 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
679 fan_map->underlay = map->underlay;
680 fan_map->overlay = map->overlay;
681 fan_map->underlay_prefix = map->underlay_prefix;
682 fan_map->overlay_mask = ntohl(overlay_mask);
683 fan_map->overlay_prefix = map->overlay_prefix;
684
685 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
686
687 return 0;
688 }
689
690
691 static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
692 struct ip_tunnel_parm *parms)
693 {
694 struct ifla_fan_map *map;
695 struct nlattr *attr;
696 int rem, rv;
697
698 if (!data[IFLA_IPTUN_FAN_MAP])
699 return 0;
700
701 if (parms->iph.daddr)
702 return -EINVAL;
703
704 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
705 map = nla_data(attr);
706 rv = ipip_fan_add_map(t, map);
707 if (rv)
708 return rv;
709 }
710
711 return 0;
712 }
713
714 static int ipip_newlink(struct net *src_net, struct net_device *dev,
715 struct nlattr *tb[], struct nlattr *data[],
716 struct netlink_ext_ack *extack)
717 {
718 struct ip_tunnel *t = netdev_priv(dev);
719 struct ip_tunnel_parm p;
720 struct ip_tunnel_encap ipencap;
721 __u32 fwmark = 0;
722 int err;
723
724 if (ipip_netlink_encap_parms(data, &ipencap)) {
725 err = ip_tunnel_encap_setup(t, &ipencap);
726
727 if (err < 0)
728 return err;
729 }
730
731 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
732 err = ipip_netlink_fan(data, t, &p);
733 if (err < 0)
734 return err;
735 return ip_tunnel_newlink(dev, tb, &p, fwmark);
736 }
737
738 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
739 struct nlattr *data[],
740 struct netlink_ext_ack *extack)
741 {
742 struct ip_tunnel *t = netdev_priv(dev);
743 struct ip_tunnel_parm p;
744 struct ip_tunnel_encap ipencap;
745 bool collect_md;
746 __u32 fwmark = t->fwmark;
747 int err;
748
749 if (ipip_netlink_encap_parms(data, &ipencap)) {
750 err = ip_tunnel_encap_setup(t, &ipencap);
751
752 if (err < 0)
753 return err;
754 }
755
756 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
757 if (collect_md)
758 return -EINVAL;
759 err = ipip_netlink_fan(data, t, &p);
760 if (err < 0)
761 return err;
762
763 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
764 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
765 return -EINVAL;
766
767 return ip_tunnel_changelink(dev, tb, &p, fwmark);
768 }
769
770 static size_t ipip_get_size(const struct net_device *dev)
771 {
772 return
773 /* IFLA_IPTUN_LINK */
774 nla_total_size(4) +
775 /* IFLA_IPTUN_LOCAL */
776 nla_total_size(4) +
777 /* IFLA_IPTUN_REMOTE */
778 nla_total_size(4) +
779 /* IFLA_IPTUN_TTL */
780 nla_total_size(1) +
781 /* IFLA_IPTUN_TOS */
782 nla_total_size(1) +
783 /* IFLA_IPTUN_PROTO */
784 nla_total_size(1) +
785 /* IFLA_IPTUN_PMTUDISC */
786 nla_total_size(1) +
787 /* IFLA_IPTUN_ENCAP_TYPE */
788 nla_total_size(2) +
789 /* IFLA_IPTUN_ENCAP_FLAGS */
790 nla_total_size(2) +
791 /* IFLA_IPTUN_ENCAP_SPORT */
792 nla_total_size(2) +
793 /* IFLA_IPTUN_ENCAP_DPORT */
794 nla_total_size(2) +
795 /* IFLA_IPTUN_COLLECT_METADATA */
796 nla_total_size(0) +
797 /* IFLA_IPTUN_FWMARK */
798 nla_total_size(4) +
799 /* IFLA_IPTUN_FAN_MAP */
800 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
801 0;
802 }
803
804 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
805 {
806 struct ip_tunnel *tunnel = netdev_priv(dev);
807 struct ip_tunnel_parm *parm = &tunnel->parms;
808
809 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
810 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
811 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
812 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
813 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
814 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
815 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
816 !!(parm->iph.frag_off & htons(IP_DF))) ||
817 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
818 goto nla_put_failure;
819
820 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
821 tunnel->encap.type) ||
822 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
823 tunnel->encap.sport) ||
824 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
825 tunnel->encap.dport) ||
826 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
827 tunnel->encap.flags))
828 goto nla_put_failure;
829
830 if (tunnel->collect_md)
831 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
832 goto nla_put_failure;
833 if (fan_has_map(&tunnel->fan)) {
834 struct nlattr *fan_nest;
835 struct ip_fan_map *fan_map;
836
837 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
838 if (!fan_nest)
839 goto nla_put_failure;
840 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
841 struct ifla_fan_map map;
842
843 map.underlay = fan_map->underlay;
844 map.underlay_prefix = fan_map->underlay_prefix;
845 map.overlay = fan_map->overlay;
846 map.overlay_prefix = fan_map->overlay_prefix;
847 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
848 goto nla_put_failure;
849 }
850 nla_nest_end(skb, fan_nest);
851 }
852
853 return 0;
854
855 nla_put_failure:
856 return -EMSGSIZE;
857 }
858
859 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
860 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
861 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
862 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
863 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
864 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
865 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
866 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
867 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
868 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
869 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
870 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
871 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
872 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
873
874 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
875 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
876 };
877
878 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
879 .kind = "ipip",
880 .maxtype = IFLA_IPTUN_MAX,
881 .policy = ipip_policy,
882 .priv_size = sizeof(struct ip_tunnel),
883 .setup = ipip_tunnel_setup,
884 .validate = ipip_tunnel_validate,
885 .newlink = ipip_newlink,
886 .changelink = ipip_changelink,
887 .dellink = ip_tunnel_dellink,
888 .get_size = ipip_get_size,
889 .fill_info = ipip_fill_info,
890 .get_link_net = ip_tunnel_get_link_net,
891 };
892
893 static struct xfrm_tunnel ipip_handler __read_mostly = {
894 .handler = ipip_rcv,
895 .err_handler = ipip_err,
896 .priority = 1,
897 };
898
899 #if IS_ENABLED(CONFIG_MPLS)
900 static struct xfrm_tunnel mplsip_handler __read_mostly = {
901 .handler = mplsip_rcv,
902 .err_handler = ipip_err,
903 .priority = 1,
904 };
905 #endif
906
907 static int __net_init ipip_init_net(struct net *net)
908 {
909 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
910 }
911
912 static void __net_exit ipip_exit_net(struct net *net)
913 {
914 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
915 ip_tunnel_delete_net(itn, &ipip_link_ops);
916 }
917
918 static struct pernet_operations ipip_net_ops = {
919 .init = ipip_init_net,
920 .exit = ipip_exit_net,
921 .id = &ipip_net_id,
922 .size = sizeof(struct ip_tunnel_net),
923 };
924
925 #ifdef CONFIG_SYSCTL
926 static struct ctl_table_header *ipip_fan_header;
927 static unsigned int ipip_fan_version = 3;
928
929 static struct ctl_table ipip_fan_sysctls[] = {
930 {
931 .procname = "version",
932 .data = &ipip_fan_version,
933 .maxlen = sizeof(ipip_fan_version),
934 .mode = 0444,
935 .proc_handler = proc_dointvec,
936 },
937 {},
938 };
939
940 #endif /* CONFIG_SYSCTL */
941
942 static int __init ipip_init(void)
943 {
944 int err;
945
946 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
947
948 err = register_pernet_device(&ipip_net_ops);
949 if (err < 0)
950 return err;
951 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
952 if (err < 0) {
953 pr_info("%s: can't register tunnel\n", __func__);
954 goto xfrm_tunnel_ipip_failed;
955 }
956 #if IS_ENABLED(CONFIG_MPLS)
957 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
958 if (err < 0) {
959 pr_info("%s: can't register tunnel\n", __func__);
960 goto xfrm_tunnel_mplsip_failed;
961 }
962 #endif
963 err = rtnl_link_register(&ipip_link_ops);
964 if (err < 0)
965 goto rtnl_link_failed;
966
967 #ifdef CONFIG_SYSCTL
968 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
969 ipip_fan_sysctls);
970 if (!ipip_fan_header) {
971 err = -ENOMEM;
972 goto sysctl_failed;
973 }
974 #endif /* CONFIG_SYSCTL */
975
976 out:
977 return err;
978
979 #ifdef CONFIG_SYSCTL
980 sysctl_failed:
981 rtnl_link_unregister(&ipip_link_ops);
982 #endif /* CONFIG_SYSCTL */
983 rtnl_link_failed:
984 #if IS_ENABLED(CONFIG_MPLS)
985 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
986 xfrm_tunnel_mplsip_failed:
987
988 #endif
989 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
990 xfrm_tunnel_ipip_failed:
991 unregister_pernet_device(&ipip_net_ops);
992 goto out;
993 }
994
995 static void __exit ipip_fini(void)
996 {
997 #ifdef CONFIG_SYSCTL
998 unregister_net_sysctl_table(ipip_fan_header);
999 #endif /* CONFIG_SYSCTL */
1000 rtnl_link_unregister(&ipip_link_ops);
1001 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1002 pr_info("%s: can't deregister tunnel\n", __func__);
1003 #if IS_ENABLED(CONFIG_MPLS)
1004 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1005 pr_info("%s: can't deregister tunnel\n", __func__);
1006 #endif
1007 unregister_pernet_device(&ipip_net_ops);
1008 }
1009
1010 module_init(ipip_init);
1011 module_exit(ipip_fini);
1012 MODULE_LICENSE("GPL");
1013 MODULE_ALIAS_RTNL_LINK("ipip");
1014 MODULE_ALIAS_NETDEV("tunl0");