]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - net/ipv4/ipip.c
net_sched: fix ops->bind_class() implementations
[mirror_ubuntu-bionic-kernel.git] / net / ipv4 / ipip.c
1 /*
2 * Linux NET3: IP/IP protocol decoder.
3 *
4 * Authors:
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
6 *
7 * Fixes:
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
19 *
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
24 *
25 */
26
27 /* tunnel.c: an IP tunnel driver
28
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
31
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
34
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
36
37 Minor tweaks:
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
42 Added tx_dropped stat
43
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46 Reworked:
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
51
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
61
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
64
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
77 "skb_tailroom(skb)".
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
85 */
86
87 /*
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90 For comments look at net/ipv4/ip_gre.c --ANK
91 */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
111
112 #include <net/sock.h>
113 #include <net/ip.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
121
122 static bool log_ecn_error = true;
123 module_param(log_ecn_error, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
125
126 static unsigned int ipip_net_id __read_mostly;
127
128 static int ipip_tunnel_init(struct net_device *dev);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly;
130
131 static int ipip_err(struct sk_buff *skb, u32 info)
132 {
133 /* All the routers (except for Linux) return only
134 * 8 bytes of packet payload. It means, that precise relaying of
135 * ICMP in the real Internet is absolutely infeasible.
136 */
137 struct net *net = dev_net(skb->dev);
138 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
139 const struct iphdr *iph = (const struct iphdr *)skb->data;
140 const int type = icmp_hdr(skb)->type;
141 const int code = icmp_hdr(skb)->code;
142 struct ip_tunnel *t;
143 int err = 0;
144
145 switch (type) {
146 case ICMP_DEST_UNREACH:
147 switch (code) {
148 case ICMP_SR_FAILED:
149 /* Impossible event. */
150 goto out;
151 default:
152 /* All others are translated to HOST_UNREACH.
153 * rfc2003 contains "deep thoughts" about NET_UNREACH,
154 * I believe they are just ether pollution. --ANK
155 */
156 break;
157 }
158 break;
159
160 case ICMP_TIME_EXCEEDED:
161 if (code != ICMP_EXC_TTL)
162 goto out;
163 break;
164
165 case ICMP_REDIRECT:
166 break;
167
168 default:
169 goto out;
170 }
171
172 t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
173 iph->daddr, iph->saddr, 0);
174 if (!t) {
175 err = -ENOENT;
176 goto out;
177 }
178
179 if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
180 ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
181 iph->protocol, 0);
182 goto out;
183 }
184
185 if (type == ICMP_REDIRECT) {
186 ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
187 goto out;
188 }
189
190 if (t->parms.iph.daddr == 0) {
191 err = -ENOENT;
192 goto out;
193 }
194
195 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
196 goto out;
197
198 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
199 t->err_count++;
200 else
201 t->err_count = 1;
202 t->err_time = jiffies;
203
204 out:
205 return err;
206 }
207
208 static const struct tnl_ptk_info ipip_tpi = {
209 /* no tunnel info required for ipip. */
210 .proto = htons(ETH_P_IP),
211 };
212
213 #if IS_ENABLED(CONFIG_MPLS)
214 static const struct tnl_ptk_info mplsip_tpi = {
215 /* no tunnel info required for mplsip. */
216 .proto = htons(ETH_P_MPLS_UC),
217 };
218 #endif
219
220 static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
221 {
222 struct net *net = dev_net(skb->dev);
223 struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
224 struct metadata_dst *tun_dst = NULL;
225 struct ip_tunnel *tunnel;
226 const struct iphdr *iph;
227
228 iph = ip_hdr(skb);
229 tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
230 iph->saddr, iph->daddr, 0);
231 if (tunnel) {
232 const struct tnl_ptk_info *tpi;
233
234 if (tunnel->parms.iph.protocol != ipproto &&
235 tunnel->parms.iph.protocol != 0)
236 goto drop;
237
238 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
239 goto drop;
240 #if IS_ENABLED(CONFIG_MPLS)
241 if (ipproto == IPPROTO_MPLS)
242 tpi = &mplsip_tpi;
243 else
244 #endif
245 tpi = &ipip_tpi;
246 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
247 goto drop;
248 if (tunnel->collect_md) {
249 tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
250 if (!tun_dst)
251 return 0;
252 }
253 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
254 }
255
256 return -1;
257
258 drop:
259 kfree_skb(skb);
260 return 0;
261 }
262
263 static int ipip_rcv(struct sk_buff *skb)
264 {
265 return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
266 }
267
268 #if IS_ENABLED(CONFIG_MPLS)
269 static int mplsip_rcv(struct sk_buff *skb)
270 {
271 return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
272 }
273 #endif
274
275 static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
276 {
277 struct ip_fan_map *fan_map;
278
279 rcu_read_lock();
280 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
281 if (fan_map->overlay ==
282 (daddr & inet_make_mask(fan_map->overlay_prefix))) {
283 rcu_read_unlock();
284 return fan_map;
285 }
286 }
287 rcu_read_unlock();
288
289 return NULL;
290 }
291
292 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
293 * address.
294 *
295 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
296 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
297 * two octets of the underlay network (the network portion of a /16), "A"
298 * and "B" are the low order two octets of the underlay network host (the
299 * host portion of a /16), and "Y" is a configured first octet of the
300 * overlay network.
301 *
302 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
303 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
304 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
305 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
306 * described in detail further below.
307 *
308 * Using netmasks for the overlay and underlay other than /8 and /16, as
309 * shown above, can yield larger (or smaller) overlay subnets, with the
310 * trade-off of allowing fewer (or more) underlay hosts to participate.
311 *
312 * The size of each overlay network subnet is defined by the total of the
313 * network mask of the overlay plus the size of host portion of the
314 * underlay network. In the above example, /8 + /16 = /24.
315 *
316 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
317 * this case, the network portion of the underlay is 10.99.224.0/20, and
318 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
319 * network subnet, the 12 bits of host portion are left shifted 12 bits
320 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
321 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
322 * 12 bits underlay. This yields 12 bits in the overlay network portion,
323 * allowing for 4094 addresses in each overlay network subnet. The
324 * trade-off is that fewer hosts may participate in the underlay network,
325 * as its host address size has shrunk from 16 bits (65534 addresses) in
326 * the first example to 12 bits (4094 addresses) here.
327 *
328 * For fewer hosts per overlay subnet (permitting a larger number of
329 * underlay hosts to participate), the underlay netmask may be made
330 * smaller.
331 *
332 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
333 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
334 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
335 * the lowest order bit of the /8 overlay). This yields an overlay subnet
336 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
337 * the underlay). This provides more addresses for the underlay network
338 * (approximately 2^20), but each host's segment of the overlay provides
339 * only 4 bits of addresses (14 usable).
340 *
341 * It is also possible to adjust the overlay subnet.
342 *
343 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
344 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
345 * shifted 15 bits (/20 - /5), yielding an overlay network of
346 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
347 * overlay network of 242.107.128.0/17.
348 *
349 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
350 * underlay host 10.224.220.10, the underlay host portion (.10) is left
351 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
352 * This would permit 254 addresses on the underlay, with each overlay
353 * segment providing approximately 2^14 - 2 addresses (16382).
354 *
355 * For packets being encapsulated, the overlay network destination IP
356 * address is deconstructed into its overlay and underlay-derived
357 * portions. The underlay portion (determined by the overlay mask and
358 * overlay subnet mask) is right shifted according to the size of the
359 * underlay network mask. This value is then ORed with the network
360 * portion of the underlay network to produce the underlay network
361 * destination for the encapsulated datagram.
362 *
363 * For example, using the initial example of underlay 10.88.3.4/16 and
364 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
365 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
366 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
367 * of the address extracted. This is a number of bits equal to underlay
368 * network host portion. In the destination address, the highest order of
369 * these bits is one bit lower than the lowest order bit from the overlay
370 * network mask.
371 *
372 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
373 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
374 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
375 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
376 * which is 1 bit lower than the lowest order overlay address bit).
377 *
378 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
379 * This value is then ORed with the underlay network portion,
380 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
381 * the encapuslated datagram.
382 *
383 * Another transform using the final example: overlay 100.64.0.0/10 and
384 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
385 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
386 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
387 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
388 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
389 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
390 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
391 * the underlay destination of 10.224.220.11 for overlay destination
392 * 100.66.200.5.
393 */
394 static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
395 {
396 struct ip_fan_map *f_map;
397 u32 daddr, underlay;
398
399 f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
400 if (!f_map)
401 return -ENOENT;
402
403 daddr = ntohl(ip_hdr(skb)->daddr);
404 underlay = ntohl(f_map->underlay);
405 if (!underlay)
406 return -EINVAL;
407
408 *iph = tunnel->parms.iph;
409 iph->daddr = htonl(underlay |
410 ((daddr & ~f_map->overlay_mask) >>
411 (32 - f_map->overlay_prefix -
412 (32 - f_map->underlay_prefix))));
413 return 0;
414 }
415
416 /*
417 * This function assumes it is being called from dev_queue_xmit()
418 * and that skb is filled properly by that function.
419 */
420 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
421 struct net_device *dev)
422 {
423 struct ip_tunnel *tunnel = netdev_priv(dev);
424 const struct iphdr *tiph = &tunnel->parms.iph;
425 u8 ipproto;
426 struct iphdr fiph;
427
428 if (!pskb_inet_may_pull(skb))
429 goto tx_error;
430
431 switch (skb->protocol) {
432 case htons(ETH_P_IP):
433 ipproto = IPPROTO_IPIP;
434 break;
435 #if IS_ENABLED(CONFIG_MPLS)
436 case htons(ETH_P_MPLS_UC):
437 ipproto = IPPROTO_MPLS;
438 break;
439 #endif
440 default:
441 goto tx_error;
442 }
443
444 if (tiph->protocol != ipproto && tiph->protocol != 0)
445 goto tx_error;
446
447 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
448 goto tx_error;
449
450 if (fan_has_map(&tunnel->fan)) {
451 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
452 goto tx_error;
453 tiph = &fiph;
454 } else {
455 tiph = &tunnel->parms.iph;
456 }
457
458 skb_set_inner_ipproto(skb, ipproto);
459
460 if (tunnel->collect_md)
461 ip_md_tunnel_xmit(skb, dev, ipproto);
462 else
463 ip_tunnel_xmit(skb, dev, tiph, ipproto);
464 return NETDEV_TX_OK;
465
466 tx_error:
467 kfree_skb(skb);
468
469 dev->stats.tx_errors++;
470 return NETDEV_TX_OK;
471 }
472
473 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
474 {
475 switch (ipproto) {
476 case 0:
477 case IPPROTO_IPIP:
478 #if IS_ENABLED(CONFIG_MPLS)
479 case IPPROTO_MPLS:
480 #endif
481 return true;
482 }
483
484 return false;
485 }
486
487 static int
488 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
489 {
490 int err = 0;
491 struct ip_tunnel_parm p;
492
493 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
494 return -EFAULT;
495
496 if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
497 if (p.iph.version != 4 ||
498 !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
499 p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
500 return -EINVAL;
501 }
502
503 p.i_key = p.o_key = 0;
504 p.i_flags = p.o_flags = 0;
505 err = ip_tunnel_ioctl(dev, &p, cmd);
506 if (err)
507 return err;
508
509 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
510 return -EFAULT;
511
512 return 0;
513 }
514
515 static const struct net_device_ops ipip_netdev_ops = {
516 .ndo_init = ipip_tunnel_init,
517 .ndo_uninit = ip_tunnel_uninit,
518 .ndo_start_xmit = ipip_tunnel_xmit,
519 .ndo_do_ioctl = ipip_tunnel_ioctl,
520 .ndo_change_mtu = ip_tunnel_change_mtu,
521 .ndo_get_stats64 = ip_tunnel_get_stats64,
522 .ndo_get_iflink = ip_tunnel_get_iflink,
523 };
524
525 #define IPIP_FEATURES (NETIF_F_SG | \
526 NETIF_F_FRAGLIST | \
527 NETIF_F_HIGHDMA | \
528 NETIF_F_GSO_SOFTWARE | \
529 NETIF_F_HW_CSUM)
530
531 static void ipip_tunnel_setup(struct net_device *dev)
532 {
533 struct ip_tunnel *t = netdev_priv(dev);
534
535 dev->netdev_ops = &ipip_netdev_ops;
536
537 dev->type = ARPHRD_TUNNEL;
538 dev->flags = IFF_NOARP;
539 dev->addr_len = 4;
540 dev->features |= NETIF_F_LLTX;
541 netif_keep_dst(dev);
542
543 dev->features |= IPIP_FEATURES;
544 dev->hw_features |= IPIP_FEATURES;
545 ip_tunnel_setup(dev, ipip_net_id);
546 INIT_LIST_HEAD(&t->fan.fan_maps);
547 }
548
549 static int ipip_tunnel_init(struct net_device *dev)
550 {
551 struct ip_tunnel *tunnel = netdev_priv(dev);
552
553 memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
554 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
555
556 tunnel->tun_hlen = 0;
557 tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
558 return ip_tunnel_init(dev);
559 }
560
561 static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
562 struct netlink_ext_ack *extack)
563 {
564 u8 proto;
565
566 if (!data || !data[IFLA_IPTUN_PROTO])
567 return 0;
568
569 proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
570 if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
571 return -EINVAL;
572
573 return 0;
574 }
575
576 static void ipip_netlink_parms(struct nlattr *data[],
577 struct ip_tunnel_parm *parms, bool *collect_md,
578 __u32 *fwmark)
579 {
580 memset(parms, 0, sizeof(*parms));
581
582 parms->iph.version = 4;
583 parms->iph.protocol = IPPROTO_IPIP;
584 parms->iph.ihl = 5;
585 *collect_md = false;
586
587 if (!data)
588 return;
589
590 if (data[IFLA_IPTUN_LINK])
591 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
592
593 if (data[IFLA_IPTUN_LOCAL])
594 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
595
596 if (data[IFLA_IPTUN_REMOTE])
597 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
598
599 if (data[IFLA_IPTUN_TTL]) {
600 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
601 if (parms->iph.ttl)
602 parms->iph.frag_off = htons(IP_DF);
603 }
604
605 if (data[IFLA_IPTUN_TOS])
606 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
607
608 if (data[IFLA_IPTUN_PROTO])
609 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
610
611 if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
612 parms->iph.frag_off = htons(IP_DF);
613
614 if (data[IFLA_IPTUN_COLLECT_METADATA])
615 *collect_md = true;
616
617 if (data[IFLA_IPTUN_FWMARK])
618 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
619 }
620
621 /* This function returns true when ENCAP attributes are present in the nl msg */
622 static bool ipip_netlink_encap_parms(struct nlattr *data[],
623 struct ip_tunnel_encap *ipencap)
624 {
625 bool ret = false;
626
627 memset(ipencap, 0, sizeof(*ipencap));
628
629 if (!data)
630 return ret;
631
632 if (data[IFLA_IPTUN_ENCAP_TYPE]) {
633 ret = true;
634 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
635 }
636
637 if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
638 ret = true;
639 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
640 }
641
642 if (data[IFLA_IPTUN_ENCAP_SPORT]) {
643 ret = true;
644 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
645 }
646
647 if (data[IFLA_IPTUN_ENCAP_DPORT]) {
648 ret = true;
649 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
650 }
651
652 return ret;
653 }
654
655 static void ipip_fan_flush_map(struct ip_tunnel *t)
656 {
657 struct ip_fan_map *fan_map;
658
659 list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
660 list_del_rcu(&fan_map->list);
661 kfree_rcu(fan_map, rcu);
662 }
663 }
664
665 static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
666 {
667 struct ip_fan_map *fan_map;
668
669 fan_map = ipip_fan_find_map(t, overlay);
670 if (!fan_map)
671 return -ENOENT;
672
673 list_del_rcu(&fan_map->list);
674 kfree_rcu(fan_map, rcu);
675
676 return 0;
677 }
678
679 static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
680 {
681 __be32 overlay_mask, underlay_mask;
682 struct ip_fan_map *fan_map;
683
684 overlay_mask = inet_make_mask(map->overlay_prefix);
685 underlay_mask = inet_make_mask(map->underlay_prefix);
686
687 if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
688 return -EINVAL;
689
690 if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
691 return -EINVAL;
692
693 /* Special case: overlay 0 and underlay 0: flush all mappings */
694 if (!map->overlay && !map->underlay) {
695 ipip_fan_flush_map(t);
696 return 0;
697 }
698
699 /* Special case: overlay set and underlay 0: clear map for overlay */
700 if (!map->underlay)
701 return ipip_fan_del_map(t, map->overlay);
702
703 if (ipip_fan_find_map(t, map->overlay))
704 return -EEXIST;
705
706 fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
707 fan_map->underlay = map->underlay;
708 fan_map->overlay = map->overlay;
709 fan_map->underlay_prefix = map->underlay_prefix;
710 fan_map->overlay_mask = ntohl(overlay_mask);
711 fan_map->overlay_prefix = map->overlay_prefix;
712
713 list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
714
715 return 0;
716 }
717
718
719 static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
720 struct ip_tunnel_parm *parms)
721 {
722 struct ifla_fan_map *map;
723 struct nlattr *attr;
724 int rem, rv;
725
726 if (data == NULL || !data[IFLA_IPTUN_FAN_MAP])
727 return 0;
728
729 if (parms->iph.daddr)
730 return -EINVAL;
731
732 nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
733 map = nla_data(attr);
734 rv = ipip_fan_add_map(t, map);
735 if (rv)
736 return rv;
737 }
738
739 return 0;
740 }
741
742 static int ipip_newlink(struct net *src_net, struct net_device *dev,
743 struct nlattr *tb[], struct nlattr *data[],
744 struct netlink_ext_ack *extack)
745 {
746 struct ip_tunnel *t = netdev_priv(dev);
747 struct ip_tunnel_parm p;
748 struct ip_tunnel_encap ipencap;
749 __u32 fwmark = 0;
750 int err;
751
752 if (ipip_netlink_encap_parms(data, &ipencap)) {
753 err = ip_tunnel_encap_setup(t, &ipencap);
754
755 if (err < 0)
756 return err;
757 }
758
759 ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
760 err = ipip_netlink_fan(data, t, &p);
761 if (err < 0)
762 return err;
763 return ip_tunnel_newlink(dev, tb, &p, fwmark);
764 }
765
766 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
767 struct nlattr *data[],
768 struct netlink_ext_ack *extack)
769 {
770 struct ip_tunnel *t = netdev_priv(dev);
771 struct ip_tunnel_parm p;
772 struct ip_tunnel_encap ipencap;
773 bool collect_md;
774 __u32 fwmark = t->fwmark;
775 int err;
776
777 if (ipip_netlink_encap_parms(data, &ipencap)) {
778 err = ip_tunnel_encap_setup(t, &ipencap);
779
780 if (err < 0)
781 return err;
782 }
783
784 ipip_netlink_parms(data, &p, &collect_md, &fwmark);
785 if (collect_md)
786 return -EINVAL;
787 err = ipip_netlink_fan(data, t, &p);
788 if (err < 0)
789 return err;
790
791 if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
792 (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
793 return -EINVAL;
794
795 return ip_tunnel_changelink(dev, tb, &p, fwmark);
796 }
797
798 static size_t ipip_get_size(const struct net_device *dev)
799 {
800 return
801 /* IFLA_IPTUN_LINK */
802 nla_total_size(4) +
803 /* IFLA_IPTUN_LOCAL */
804 nla_total_size(4) +
805 /* IFLA_IPTUN_REMOTE */
806 nla_total_size(4) +
807 /* IFLA_IPTUN_TTL */
808 nla_total_size(1) +
809 /* IFLA_IPTUN_TOS */
810 nla_total_size(1) +
811 /* IFLA_IPTUN_PROTO */
812 nla_total_size(1) +
813 /* IFLA_IPTUN_PMTUDISC */
814 nla_total_size(1) +
815 /* IFLA_IPTUN_ENCAP_TYPE */
816 nla_total_size(2) +
817 /* IFLA_IPTUN_ENCAP_FLAGS */
818 nla_total_size(2) +
819 /* IFLA_IPTUN_ENCAP_SPORT */
820 nla_total_size(2) +
821 /* IFLA_IPTUN_ENCAP_DPORT */
822 nla_total_size(2) +
823 /* IFLA_IPTUN_COLLECT_METADATA */
824 nla_total_size(0) +
825 /* IFLA_IPTUN_FWMARK */
826 nla_total_size(4) +
827 /* IFLA_IPTUN_FAN_MAP */
828 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
829 0;
830 }
831
832 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
833 {
834 struct ip_tunnel *tunnel = netdev_priv(dev);
835 struct ip_tunnel_parm *parm = &tunnel->parms;
836
837 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
838 nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
839 nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
840 nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
841 nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
842 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
843 nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
844 !!(parm->iph.frag_off & htons(IP_DF))) ||
845 nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
846 goto nla_put_failure;
847
848 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
849 tunnel->encap.type) ||
850 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
851 tunnel->encap.sport) ||
852 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
853 tunnel->encap.dport) ||
854 nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
855 tunnel->encap.flags))
856 goto nla_put_failure;
857
858 if (tunnel->collect_md)
859 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
860 goto nla_put_failure;
861 if (fan_has_map(&tunnel->fan)) {
862 struct nlattr *fan_nest;
863 struct ip_fan_map *fan_map;
864
865 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
866 if (!fan_nest)
867 goto nla_put_failure;
868 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
869 struct ifla_fan_map map;
870
871 map.underlay = fan_map->underlay;
872 map.underlay_prefix = fan_map->underlay_prefix;
873 map.overlay = fan_map->overlay;
874 map.overlay_prefix = fan_map->overlay_prefix;
875 if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
876 goto nla_put_failure;
877 }
878 nla_nest_end(skb, fan_nest);
879 }
880
881 return 0;
882
883 nla_put_failure:
884 return -EMSGSIZE;
885 }
886
887 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
888 [IFLA_IPTUN_LINK] = { .type = NLA_U32 },
889 [IFLA_IPTUN_LOCAL] = { .type = NLA_U32 },
890 [IFLA_IPTUN_REMOTE] = { .type = NLA_U32 },
891 [IFLA_IPTUN_TTL] = { .type = NLA_U8 },
892 [IFLA_IPTUN_TOS] = { .type = NLA_U8 },
893 [IFLA_IPTUN_PROTO] = { .type = NLA_U8 },
894 [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 },
895 [IFLA_IPTUN_ENCAP_TYPE] = { .type = NLA_U16 },
896 [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 },
897 [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 },
898 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
899 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
900 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
901
902 [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX] = { .type = NLA_BINARY },
903 [IFLA_IPTUN_FAN_MAP] = { .type = NLA_NESTED },
904 };
905
906 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
907 .kind = "ipip",
908 .maxtype = IFLA_IPTUN_MAX,
909 .policy = ipip_policy,
910 .priv_size = sizeof(struct ip_tunnel),
911 .setup = ipip_tunnel_setup,
912 .validate = ipip_tunnel_validate,
913 .newlink = ipip_newlink,
914 .changelink = ipip_changelink,
915 .dellink = ip_tunnel_dellink,
916 .get_size = ipip_get_size,
917 .fill_info = ipip_fill_info,
918 .get_link_net = ip_tunnel_get_link_net,
919 };
920
921 static struct xfrm_tunnel ipip_handler __read_mostly = {
922 .handler = ipip_rcv,
923 .err_handler = ipip_err,
924 .priority = 1,
925 };
926
927 #if IS_ENABLED(CONFIG_MPLS)
928 static struct xfrm_tunnel mplsip_handler __read_mostly = {
929 .handler = mplsip_rcv,
930 .err_handler = ipip_err,
931 .priority = 1,
932 };
933 #endif
934
935 static int __net_init ipip_init_net(struct net *net)
936 {
937 return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
938 }
939
940 static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
941 {
942 ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
943 }
944
945 static struct pernet_operations ipip_net_ops = {
946 .init = ipip_init_net,
947 .exit_batch = ipip_exit_batch_net,
948 .id = &ipip_net_id,
949 .size = sizeof(struct ip_tunnel_net),
950 };
951
952 #ifdef CONFIG_SYSCTL
953 static struct ctl_table_header *ipip_fan_header;
954 static unsigned int ipip_fan_version = 3;
955
956 static struct ctl_table ipip_fan_sysctls[] = {
957 {
958 .procname = "version",
959 .data = &ipip_fan_version,
960 .maxlen = sizeof(ipip_fan_version),
961 .mode = 0444,
962 .proc_handler = proc_dointvec,
963 },
964 {},
965 };
966
967 #endif /* CONFIG_SYSCTL */
968
969 static int __init ipip_init(void)
970 {
971 int err;
972
973 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
974
975 err = register_pernet_device(&ipip_net_ops);
976 if (err < 0)
977 return err;
978 err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
979 if (err < 0) {
980 pr_info("%s: can't register tunnel\n", __func__);
981 goto xfrm_tunnel_ipip_failed;
982 }
983 #if IS_ENABLED(CONFIG_MPLS)
984 err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
985 if (err < 0) {
986 pr_info("%s: can't register tunnel\n", __func__);
987 goto xfrm_tunnel_mplsip_failed;
988 }
989 #endif
990 err = rtnl_link_register(&ipip_link_ops);
991 if (err < 0)
992 goto rtnl_link_failed;
993
994 #ifdef CONFIG_SYSCTL
995 ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
996 ipip_fan_sysctls);
997 if (!ipip_fan_header) {
998 err = -ENOMEM;
999 goto sysctl_failed;
1000 }
1001 #endif /* CONFIG_SYSCTL */
1002
1003 out:
1004 return err;
1005
1006 #ifdef CONFIG_SYSCTL
1007 sysctl_failed:
1008 rtnl_link_unregister(&ipip_link_ops);
1009 #endif /* CONFIG_SYSCTL */
1010 rtnl_link_failed:
1011 #if IS_ENABLED(CONFIG_MPLS)
1012 xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
1013 xfrm_tunnel_mplsip_failed:
1014
1015 #endif
1016 xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
1017 xfrm_tunnel_ipip_failed:
1018 unregister_pernet_device(&ipip_net_ops);
1019 goto out;
1020 }
1021
1022 static void __exit ipip_fini(void)
1023 {
1024 #ifdef CONFIG_SYSCTL
1025 unregister_net_sysctl_table(ipip_fan_header);
1026 #endif /* CONFIG_SYSCTL */
1027 rtnl_link_unregister(&ipip_link_ops);
1028 if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1029 pr_info("%s: can't deregister tunnel\n", __func__);
1030 #if IS_ENABLED(CONFIG_MPLS)
1031 if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1032 pr_info("%s: can't deregister tunnel\n", __func__);
1033 #endif
1034 unregister_pernet_device(&ipip_net_ops);
1035 }
1036
1037 module_init(ipip_init);
1038 module_exit(ipip_fini);
1039 MODULE_LICENSE("GPL");
1040 MODULE_ALIAS_RTNL_LINK("ipip");
1041 MODULE_ALIAS_NETDEV("tunl0");