net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <linux/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/init.h>
 107 #include <linux/netfilter_ipv4.h>
 108 #include <linux/if_ether.h>
 109 #include <linux/inetdevice.h>
 110 #include <linux/rculist.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ip_tunnels.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118 #include <net/net_namespace.h>
 119 #include <net/netns/generic.h>
 120 #include <net/dst_metadata.h>
 121
 122 static bool log_ecn_error = true;
 123 module_param(log_ecn_error, bool, 0644);
 124 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 125
 126 static unsigned int ipip_net_id __read_mostly;
 127
 128 static int ipip_tunnel_init(struct net_device *dev);
 129 static struct rtnl_link_ops ipip_link_ops __read_mostly;
 130
 131 static int ipip_err(struct sk_buff *skb, u32 info)
 132 {
 133
 134 /* All the routers (except for Linux) return only
 135    8 bytes of packet payload. It means, that precise relaying of
 136    ICMP in the real Internet is absolutely infeasible.
 137  */
 138         struct net *net = dev_net(skb->dev);
 139         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 140         const struct iphdr *iph = (const struct iphdr *)skb->data;
 141         struct ip_tunnel *t;
 142         int err;
 143         const int type = icmp_hdr(skb)->type;
 144         const int code = icmp_hdr(skb)->code;
 145
 146         err = -ENOENT;
 147         t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 148                              iph->daddr, iph->saddr, 0);
 149         if (!t)
 150                 goto out;
 151
 152         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 153                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 154                                  t->parms.link, 0, iph->protocol, 0);
 155                 err = 0;
 156                 goto out;
 157         }
 158
 159         if (type == ICMP_REDIRECT) {
 160                 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 161                               iph->protocol, 0);
 162                 err = 0;
 163                 goto out;
 164         }
 165
 166         if (t->parms.iph.daddr == 0)
 167                 goto out;
 168
 169         err = 0;
 170         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 171                 goto out;
 172
 173         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 174                 t->err_count++;
 175         else
 176                 t->err_count = 1;
 177         t->err_time = jiffies;
 178
 179 out:
 180         return err;
 181 }
 182
 183 static const struct tnl_ptk_info ipip_tpi = {
 184         /* no tunnel info required for ipip. */
 185         .proto = htons(ETH_P_IP),
 186 };
 187
 188 #if IS_ENABLED(CONFIG_MPLS)
 189 static const struct tnl_ptk_info mplsip_tpi = {
 190         /* no tunnel info required for mplsip. */
 191         .proto = htons(ETH_P_MPLS_UC),
 192 };
 193 #endif
 194
 195 static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 196 {
 197         struct net *net = dev_net(skb->dev);
 198         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 199         struct metadata_dst *tun_dst = NULL;
 200         struct ip_tunnel *tunnel;
 201         const struct iphdr *iph;
 202
 203         iph = ip_hdr(skb);
 204         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 205                         iph->saddr, iph->daddr, 0);
 206         if (tunnel) {
 207                 const struct tnl_ptk_info *tpi;
 208
 209                 if (tunnel->parms.iph.protocol != ipproto &&
 210                     tunnel->parms.iph.protocol != 0)
 211                         goto drop;
 212
 213                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 214                         goto drop;
 215 #if IS_ENABLED(CONFIG_MPLS)
 216                 if (ipproto == IPPROTO_MPLS)
 217                         tpi = &mplsip_tpi;
 218                 else
 219 #endif
 220                         tpi = &ipip_tpi;
 221                 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 222                         goto drop;
 223                 if (tunnel->collect_md) {
 224                         tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
 225                         if (!tun_dst)
 226                                 return 0;
 227                 }
 228                 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 229         }
 230
 231         return -1;
 232
 233 drop:
 234         kfree_skb(skb);
 235         return 0;
 236 }
 237
 238 static int ipip_rcv(struct sk_buff *skb)
 239 {
 240         return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
 241 }
 242
 243 #if IS_ENABLED(CONFIG_MPLS)
 244 static int mplsip_rcv(struct sk_buff *skb)
 245 {
 246         return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
 247 }
 248 #endif
 249
 250 static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
 251 {
 252         struct ip_fan_map *fan_map;
 253
 254         rcu_read_lock();
 255         list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
 256                 if (fan_map->overlay ==
 257                     (daddr & inet_make_mask(fan_map->overlay_prefix))) {
 258                         rcu_read_unlock();
 259                         return fan_map;
 260                 }
 261         }
 262         rcu_read_unlock();
 263
 264         return NULL;
 265 }
 266
 267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
 268  * address.
 269  *
 270  * Given a /8 overlay and /16 underlay, for an overlay (inner) address
 271  * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
 272  * two octets of the underlay network (the network portion of a /16), "A"
 273  * and "B" are the low order two octets of the underlay network host (the
 274  * host portion of a /16), and "Y" is a configured first octet of the
 275  * overlay network.
 276  *
 277  * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
 278  * host overlay subnet 99.3.4.0/24.  An overlay network datagram from
 279  * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
 280  * which hosts overlay network subnet 99.6.7.0/24.  This transformation is
 281  * described in detail further below.
 282  *
 283  * Using netmasks for the overlay and underlay other than /8 and /16, as
 284  * shown above, can yield larger (or smaller) overlay subnets, with the
 285  * trade-off of allowing fewer (or more) underlay hosts to participate.
 286  *
 287  * The size of each overlay network subnet is defined by the total of the
 288  * network mask of the overlay plus the size of host portion of the
 289  * underlay network. In the above example, /8 + /16 = /24.
 290  *
 291  * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
 292  * this case, the network portion of the underlay is 10.99.224.0/20, and
 293  * the host portion is 0.0.14.5 (12 bits).  To determine the overlay
 294  * network subnet, the 12 bits of host portion are left shifted 12 bits
 295  * (/20 - /8) and ORed with the overlay subnet prefix.  This yields an
 296  * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
 297  * 12 bits underlay.  This yields 12 bits in the overlay network portion,
 298  * allowing for 4094 addresses in each overlay network subnet.  The
 299  * trade-off is that fewer hosts may participate in the underlay network,
 300  * as its host address size has shrunk from 16 bits (65534 addresses) in
 301  * the first example to 12 bits (4094 addresses) here.
 302  *
 303  * For fewer hosts per overlay subnet (permitting a larger number of
 304  * underlay hosts to participate), the underlay netmask may be made
 305  * smaller.
 306  *
 307  * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
 308  * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
 309  * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
 310  * the lowest order bit of the /8 overlay).  This yields an overlay subnet
 311  * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
 312  * the underlay).  This provides more addresses for the underlay network
 313  * (approximately 2^20), but each host's segment of the overlay provides
 314  * only 4 bits of addresses (14 usable).
 315  *
 316  * It is also possible to adjust the overlay subnet.
 317  *
 318  * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
 319  * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
 320  * shifted 15 bits (/20 - /5), yielding an overlay network of
 321  * 240.129.0.0/17.  An underlay host of 10.88.244.215 would yield an
 322  * overlay network of 242.107.128.0/17.
 323  *
 324  * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
 325  * underlay host 10.224.220.10, the underlay host portion (.10) is left
 326  * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
 327  * This would permit 254 addresses on the underlay, with each overlay
 328  * segment providing approximately 2^14 - 2 addresses (16382).
 329  *
 330  * For packets being encapsulated, the overlay network destination IP
 331  * address is deconstructed into its overlay and underlay-derived
 332  * portions.  The underlay portion (determined by the overlay mask and
 333  * overlay subnet mask) is right shifted according to the size of the
 334  * underlay network mask.  This value is then ORed with the network
 335  * portion of the underlay network to produce the underlay network
 336  * destination for the encapsulated datagram.
 337  *
 338  * For example, using the initial example of underlay 10.88.3.4/16 and
 339  * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
 340  * subnet 99.3.4.0/24 with specfic host 99.3.4.5.  A datagram from
 341  * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
 342  * of the address extracted.  This is a number of bits equal to underlay
 343  * network host portion.  In the destination address, the highest order of
 344  * these bits is one bit lower than the lowest order bit from the overlay
 345  * network mask.
 346  *
 347  * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
 348  * underlay mask is /16 (leaving 16 bits for the host portion).  The bits
 349  * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
 350  * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
 351  * which is 1 bit lower than the lowest order overlay address bit).
 352  *
 353  * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
 354  * This value is then ORed with the underlay network portion,
 355  * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
 356  * the encapuslated datagram.
 357  *
 358  * Another transform using the final example: overlay 100.64.0.0/10 and
 359  * underlay 10.224.220.0/24.  Consider overlay address 100.66.128.1
 360  * sending a datagram to 100.66.200.5.  In this case, 8 bits (the host
 361  * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
 362  * prefix are masked off, yielding 0.2.192.0.  This is right shifted 14
 363  * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
 364  * network portion and the underlay host portion) bits, yielding 0.0.0.11.
 365  * This is ORed with the underlay network portion, 10.224.220.0/24, giving
 366  * the underlay destination of 10.224.220.11 for overlay destination
 367  * 100.66.200.5.
 368  */
 369 static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
 370 {
 371         struct ip_fan_map *f_map;
 372         u32 daddr, underlay;
 373
 374         f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
 375         if (!f_map)
 376                 return -ENOENT;
 377
 378         daddr = ntohl(ip_hdr(skb)->daddr);
 379         underlay = ntohl(f_map->underlay);
 380         if (!underlay)
 381                 return -EINVAL;
 382
 383         *iph = tunnel->parms.iph;
 384         iph->daddr = htonl(underlay |
 385                            ((daddr & ~f_map->overlay_mask) >>
 386                             (32 - f_map->overlay_prefix -
 387                              (32 - f_map->underlay_prefix))));
 388         return 0;
 389 }
 390
 391 /*
 392  *      This function assumes it is being called from dev_queue_xmit()
 393  *      and that skb is filled properly by that function.
 394  */
 395 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
 396                                     struct net_device *dev)
 397 {
 398         struct ip_tunnel *tunnel = netdev_priv(dev);
 399         const struct iphdr  *tiph = &tunnel->parms.iph;
 400         u8 ipproto;
 401         struct iphdr fiph;
 402
 403         switch (skb->protocol) {
 404         case htons(ETH_P_IP):
 405                 ipproto = IPPROTO_IPIP;
 406                 break;
 407 #if IS_ENABLED(CONFIG_MPLS)
 408         case htons(ETH_P_MPLS_UC):
 409                 ipproto = IPPROTO_MPLS;
 410                 break;
 411 #endif
 412         default:
 413                 goto tx_error;
 414         }
 415
 416         if (tiph->protocol != ipproto && tiph->protocol != 0)
 417                 goto tx_error;
 418
 419         if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 420                 goto tx_error;
 421
 422         if (fan_has_map(&tunnel->fan)) {
 423                 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
 424                         goto tx_error;
 425                 tiph = &fiph;
 426         } else {
 427                 tiph = &tunnel->parms.iph;
 428         }
 429
 430         skb_set_inner_ipproto(skb, ipproto);
 431
 432         if (tunnel->collect_md)
 433                 ip_md_tunnel_xmit(skb, dev, ipproto);
 434         else
 435                 ip_tunnel_xmit(skb, dev, tiph, ipproto);
 436         return NETDEV_TX_OK;
 437
 438 tx_error:
 439         kfree_skb(skb);
 440
 441         dev->stats.tx_errors++;
 442         return NETDEV_TX_OK;
 443 }
 444
 445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
 446 {
 447         switch (ipproto) {
 448         case 0:
 449         case IPPROTO_IPIP:
 450 #if IS_ENABLED(CONFIG_MPLS)
 451         case IPPROTO_MPLS:
 452 #endif
 453                 return true;
 454         }
 455
 456         return false;
 457 }
 458
 459 static int
 460 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 461 {
 462         int err = 0;
 463         struct ip_tunnel_parm p;
 464
 465         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 466                 return -EFAULT;
 467
 468         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 469                 if (p.iph.version != 4 ||
 470                     !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
 471                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 472                         return -EINVAL;
 473         }
 474
 475         p.i_key = p.o_key = 0;
 476         p.i_flags = p.o_flags = 0;
 477         err = ip_tunnel_ioctl(dev, &p, cmd);
 478         if (err)
 479                 return err;
 480
 481         if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 482                 return -EFAULT;
 483
 484         return 0;
 485 }
 486
 487 static const struct net_device_ops ipip_netdev_ops = {
 488         .ndo_init       = ipip_tunnel_init,
 489         .ndo_uninit     = ip_tunnel_uninit,
 490         .ndo_start_xmit = ipip_tunnel_xmit,
 491         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 492         .ndo_change_mtu = ip_tunnel_change_mtu,
 493         .ndo_get_stats64 = ip_tunnel_get_stats64,
 494         .ndo_get_iflink = ip_tunnel_get_iflink,
 495 };
 496
 497 #define IPIP_FEATURES (NETIF_F_SG |             \
 498                        NETIF_F_FRAGLIST |       \
 499                        NETIF_F_HIGHDMA |        \
 500                        NETIF_F_GSO_SOFTWARE |   \
 501                        NETIF_F_HW_CSUM)
 502
 503 static void ipip_tunnel_setup(struct net_device *dev)
 504 {
 505         struct ip_tunnel *t = netdev_priv(dev);
 506
 507         dev->netdev_ops         = &ipip_netdev_ops;
 508
 509         dev->type               = ARPHRD_TUNNEL;
 510         dev->flags              = IFF_NOARP;
 511         dev->addr_len           = 4;
 512         dev->features           |= NETIF_F_LLTX;
 513         netif_keep_dst(dev);
 514
 515         dev->features           |= IPIP_FEATURES;
 516         dev->hw_features        |= IPIP_FEATURES;
 517         ip_tunnel_setup(dev, ipip_net_id);
 518         INIT_LIST_HEAD(&t->fan.fan_maps);
 519 }
 520
 521 static int ipip_tunnel_init(struct net_device *dev)
 522 {
 523         struct ip_tunnel *tunnel = netdev_priv(dev);
 524
 525         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 526         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 527
 528         tunnel->tun_hlen = 0;
 529         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 530         return ip_tunnel_init(dev);
 531 }
 532
 533 static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
 534 {
 535         u8 proto;
 536
 537         if (!data || !data[IFLA_IPTUN_PROTO])
 538                 return 0;
 539
 540         proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
 541         if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
 542                 return -EINVAL;
 543
 544         return 0;
 545 }
 546
 547 static void ipip_netlink_parms(struct nlattr *data[],
 548                                struct ip_tunnel_parm *parms, bool *collect_md)
 549 {
 550         memset(parms, 0, sizeof(*parms));
 551
 552         parms->iph.version = 4;
 553         parms->iph.protocol = IPPROTO_IPIP;
 554         parms->iph.ihl = 5;
 555         *collect_md = false;
 556
 557         if (!data)
 558                 return;
 559
 560         if (data[IFLA_IPTUN_LINK])
 561                 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 562
 563         if (data[IFLA_IPTUN_LOCAL])
 564                 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
 565
 566         if (data[IFLA_IPTUN_REMOTE])
 567                 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
 568
 569         if (data[IFLA_IPTUN_TTL]) {
 570                 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
 571                 if (parms->iph.ttl)
 572                         parms->iph.frag_off = htons(IP_DF);
 573         }
 574
 575         if (data[IFLA_IPTUN_TOS])
 576                 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 577
 578         if (data[IFLA_IPTUN_PROTO])
 579                 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
 580
 581         if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 582                 parms->iph.frag_off = htons(IP_DF);
 583
 584         if (data[IFLA_IPTUN_COLLECT_METADATA])
 585                 *collect_md = true;
 586 }
 587
 588 /* This function returns true when ENCAP attributes are present in the nl msg */
 589 static bool ipip_netlink_encap_parms(struct nlattr *data[],
 590                                      struct ip_tunnel_encap *ipencap)
 591 {
 592         bool ret = false;
 593
 594         memset(ipencap, 0, sizeof(*ipencap));
 595
 596         if (!data)
 597                 return ret;
 598
 599         if (data[IFLA_IPTUN_ENCAP_TYPE]) {
 600                 ret = true;
 601                 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
 602         }
 603
 604         if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
 605                 ret = true;
 606                 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
 607         }
 608
 609         if (data[IFLA_IPTUN_ENCAP_SPORT]) {
 610                 ret = true;
 611                 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
 612         }
 613
 614         if (data[IFLA_IPTUN_ENCAP_DPORT]) {
 615                 ret = true;
 616                 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
 617         }
 618
 619         return ret;
 620 }
 621
 622 static void ipip_fan_flush_map(struct ip_tunnel *t)
 623 {
 624         struct ip_fan_map *fan_map;
 625
 626         list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
 627                 list_del_rcu(&fan_map->list);
 628                 kfree_rcu(fan_map, rcu);
 629         }
 630 }
 631
 632 static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
 633 {
 634         struct ip_fan_map *fan_map;
 635
 636         fan_map = ipip_fan_find_map(t, overlay);
 637         if (!fan_map)
 638                 return -ENOENT;
 639
 640         list_del_rcu(&fan_map->list);
 641         kfree_rcu(fan_map, rcu);
 642
 643         return 0;
 644 }
 645
 646 static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
 647 {
 648         __be32 overlay_mask, underlay_mask;
 649         struct ip_fan_map *fan_map;
 650
 651         overlay_mask = inet_make_mask(map->overlay_prefix);
 652         underlay_mask = inet_make_mask(map->underlay_prefix);
 653
 654         if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
 655                 return -EINVAL;
 656
 657         if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
 658                 return -EINVAL;
 659
 660         /* Special case: overlay 0 and underlay 0: flush all mappings */
 661         if (!map->overlay && !map->underlay) {
 662                 ipip_fan_flush_map(t);
 663                 return 0;
 664         }
 665
 666         /* Special case: overlay set and underlay 0: clear map for overlay */
 667         if (!map->underlay)
 668                 return ipip_fan_del_map(t, map->overlay);
 669
 670         if (ipip_fan_find_map(t, map->overlay))
 671                 return -EEXIST;
 672
 673         fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
 674         fan_map->underlay = map->underlay;
 675         fan_map->overlay = map->overlay;
 676         fan_map->underlay_prefix = map->underlay_prefix;
 677         fan_map->overlay_mask = ntohl(overlay_mask);
 678         fan_map->overlay_prefix = map->overlay_prefix;
 679
 680         list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
 681
 682         return 0;
 683 }
 684
 685
 686 static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
 687                             struct ip_tunnel_parm *parms)
 688 {
 689         struct ifla_fan_map *map;
 690         struct nlattr *attr;
 691         int rem, rv;
 692
 693         if (!data[IFLA_IPTUN_FAN_MAP])
 694                 return 0;
 695
 696         if (parms->iph.daddr)
 697                 return -EINVAL;
 698
 699         nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
 700                 map = nla_data(attr);
 701                 rv = ipip_fan_add_map(t, map);
 702                 if (rv)
 703                         return rv;
 704         }
 705
 706         return 0;
 707 }
 708
 709 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 710                         struct nlattr *tb[], struct nlattr *data[])
 711 {
 712         struct ip_tunnel *t = netdev_priv(dev);
 713         struct ip_tunnel_parm p;
 714         struct ip_tunnel_encap ipencap;
 715         int err;
 716
 717         if (ipip_netlink_encap_parms(data, &ipencap)) {
 718                 err = ip_tunnel_encap_setup(t, &ipencap);
 719
 720                 if (err < 0)
 721                         return err;
 722         }
 723
 724         ipip_netlink_parms(data, &p, &t->collect_md);
 725         err = ipip_netlink_fan(data, t, &p);
 726         if (err < 0)
 727                 return err;
 728         return ip_tunnel_newlink(dev, tb, &p);
 729 }
 730
 731 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 732                            struct nlattr *data[])
 733 {
 734         struct ip_tunnel_parm p;
 735         struct ip_tunnel_encap ipencap;
 736         bool collect_md;
 737         struct ip_tunnel *t = netdev_priv(dev);
 738         int err;
 739
 740         if (ipip_netlink_encap_parms(data, &ipencap)) {
 741                 err = ip_tunnel_encap_setup(t, &ipencap);
 742
 743                 if (err < 0)
 744                         return err;
 745         }
 746
 747         ipip_netlink_parms(data, &p, &collect_md);
 748         if (collect_md)
 749                 return -EINVAL;
 750         err = ipip_netlink_fan(data, t, &p);
 751         if (err < 0)
 752                 return err;
 753
 754         if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 755             (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
 756                 return -EINVAL;
 757
 758         return ip_tunnel_changelink(dev, tb, &p);
 759 }
 760
 761 static size_t ipip_get_size(const struct net_device *dev)
 762 {
 763         return
 764                 /* IFLA_IPTUN_LINK */
 765                 nla_total_size(4) +
 766                 /* IFLA_IPTUN_LOCAL */
 767                 nla_total_size(4) +
 768                 /* IFLA_IPTUN_REMOTE */
 769                 nla_total_size(4) +
 770                 /* IFLA_IPTUN_TTL */
 771                 nla_total_size(1) +
 772                 /* IFLA_IPTUN_TOS */
 773                 nla_total_size(1) +
 774                 /* IFLA_IPTUN_PROTO */
 775                 nla_total_size(1) +
 776                 /* IFLA_IPTUN_PMTUDISC */
 777                 nla_total_size(1) +
 778                 /* IFLA_IPTUN_ENCAP_TYPE */
 779                 nla_total_size(2) +
 780                 /* IFLA_IPTUN_ENCAP_FLAGS */
 781                 nla_total_size(2) +
 782                 /* IFLA_IPTUN_ENCAP_SPORT */
 783                 nla_total_size(2) +
 784                 /* IFLA_IPTUN_ENCAP_DPORT */
 785                 nla_total_size(2) +
 786                 /* IFLA_IPTUN_COLLECT_METADATA */
 787                 nla_total_size(0) +
 788                 /* IFLA_IPTUN_FAN_MAP */
 789                 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
 790                 0;
 791 }
 792
 793 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 794 {
 795         struct ip_tunnel *tunnel = netdev_priv(dev);
 796         struct ip_tunnel_parm *parm = &tunnel->parms;
 797
 798         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
 799             nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
 800             nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 801             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 802             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 803             nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
 804             nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 805                        !!(parm->iph.frag_off & htons(IP_DF))))
 806                 goto nla_put_failure;
 807
 808         if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
 809                         tunnel->encap.type) ||
 810             nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
 811                          tunnel->encap.sport) ||
 812             nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
 813                          tunnel->encap.dport) ||
 814             nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
 815                         tunnel->encap.flags))
 816                 goto nla_put_failure;
 817
 818         if (tunnel->collect_md)
 819                 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
 820                         goto nla_put_failure;
 821         if (fan_has_map(&tunnel->fan)) {
 822                 struct nlattr *fan_nest;
 823                 struct ip_fan_map *fan_map;
 824
 825                 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
 826                 if (!fan_nest)
 827                         goto nla_put_failure;
 828                 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
 829                         struct ifla_fan_map map;
 830
 831                         map.underlay = fan_map->underlay;
 832                         map.underlay_prefix = fan_map->underlay_prefix;
 833                         map.overlay = fan_map->overlay;
 834                         map.overlay_prefix = fan_map->overlay_prefix;
 835                         if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
 836                                 goto nla_put_failure;
 837                 }
 838                 nla_nest_end(skb, fan_nest);
 839         }
 840
 841         return 0;
 842
 843 nla_put_failure:
 844         return -EMSGSIZE;
 845 }
 846
 847 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 848         [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
 849         [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
 850         [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
 851         [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
 852         [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
 853         [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
 854         [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
 855         [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
 856         [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
 857         [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
 858         [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
 859         [IFLA_IPTUN_COLLECT_METADATA]   = { .type = NLA_FLAG },
 860
 861         [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX]  = { .type = NLA_BINARY },
 862         [IFLA_IPTUN_FAN_MAP]            = { .type = NLA_NESTED },
 863 };
 864
 865 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 866         .kind           = "ipip",
 867         .maxtype        = IFLA_IPTUN_MAX,
 868         .policy         = ipip_policy,
 869         .priv_size      = sizeof(struct ip_tunnel),
 870         .setup          = ipip_tunnel_setup,
 871         .validate       = ipip_tunnel_validate,
 872         .newlink        = ipip_newlink,
 873         .changelink     = ipip_changelink,
 874         .dellink        = ip_tunnel_dellink,
 875         .get_size       = ipip_get_size,
 876         .fill_info      = ipip_fill_info,
 877         .get_link_net   = ip_tunnel_get_link_net,
 878 };
 879
 880 static struct xfrm_tunnel ipip_handler __read_mostly = {
 881         .handler        =       ipip_rcv,
 882         .err_handler    =       ipip_err,
 883         .priority       =       1,
 884 };
 885
 886 #if IS_ENABLED(CONFIG_MPLS)
 887 static struct xfrm_tunnel mplsip_handler __read_mostly = {
 888         .handler        =       mplsip_rcv,
 889         .err_handler    =       ipip_err,
 890         .priority       =       1,
 891 };
 892 #endif
 893
 894 static int __net_init ipip_init_net(struct net *net)
 895 {
 896         return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 897 }
 898
 899 static void __net_exit ipip_exit_net(struct net *net)
 900 {
 901         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 902         ip_tunnel_delete_net(itn, &ipip_link_ops);
 903 }
 904
 905 static struct pernet_operations ipip_net_ops = {
 906         .init = ipip_init_net,
 907         .exit = ipip_exit_net,
 908         .id   = &ipip_net_id,
 909         .size = sizeof(struct ip_tunnel_net),
 910 };
 911
 912 #ifdef CONFIG_SYSCTL
 913 static struct ctl_table_header *ipip_fan_header;
 914 static unsigned int ipip_fan_version = 3;
 915
 916 static struct ctl_table ipip_fan_sysctls[] = {
 917         {
 918                 .procname       = "version",
 919                 .data           = &ipip_fan_version,
 920                 .maxlen         = sizeof(ipip_fan_version),
 921                 .mode           = 0444,
 922                 .proc_handler   = proc_dointvec,
 923         },
 924         {},
 925 };
 926
 927 #endif /* CONFIG_SYSCTL */
 928
 929 static int __init ipip_init(void)
 930 {
 931         int err;
 932
 933         pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
 934
 935         err = register_pernet_device(&ipip_net_ops);
 936         if (err < 0)
 937                 return err;
 938         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 939         if (err < 0) {
 940                 pr_info("%s: can't register tunnel\n", __func__);
 941                 goto xfrm_tunnel_ipip_failed;
 942         }
 943 #if IS_ENABLED(CONFIG_MPLS)
 944         err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
 945         if (err < 0) {
 946                 pr_info("%s: can't register tunnel\n", __func__);
 947                 goto xfrm_tunnel_mplsip_failed;
 948         }
 949 #endif
 950         err = rtnl_link_register(&ipip_link_ops);
 951         if (err < 0)
 952                 goto rtnl_link_failed;
 953
 954 #ifdef CONFIG_SYSCTL
 955         ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
 956                                               ipip_fan_sysctls);
 957         if (!ipip_fan_header) {
 958                 err = -ENOMEM;
 959                 goto sysctl_failed;
 960         }
 961 #endif /* CONFIG_SYSCTL */
 962
 963 out:
 964         return err;
 965
 966 #ifdef CONFIG_SYSCTL
 967 sysctl_failed:
 968         rtnl_link_unregister(&ipip_link_ops);
 969 #endif /* CONFIG_SYSCTL */
 970 rtnl_link_failed:
 971 #if IS_ENABLED(CONFIG_MPLS)
 972         xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
 973 xfrm_tunnel_mplsip_failed:
 974
 975 #endif
 976         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 977 xfrm_tunnel_ipip_failed:
 978         unregister_pernet_device(&ipip_net_ops);
 979         goto out;
 980 }
 981
 982 static void __exit ipip_fini(void)
 983 {
 984 #ifdef CONFIG_SYSCTL
 985         unregister_net_sysctl_table(ipip_fan_header);
 986 #endif /* CONFIG_SYSCTL */
 987         rtnl_link_unregister(&ipip_link_ops);
 988         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
 989                 pr_info("%s: can't deregister tunnel\n", __func__);
 990 #if IS_ENABLED(CONFIG_MPLS)
 991         if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
 992                 pr_info("%s: can't deregister tunnel\n", __func__);
 993 #endif
 994         unregister_pernet_device(&ipip_net_ops);
 995 }
 996
 997 module_init(ipip_init);
 998 module_exit(ipip_fini);
 999 MODULE_LICENSE("GPL");
1000 MODULE_ALIAS_RTNL_LINK("ipip");
1001 MODULE_ALIAS_NETDEV("tunl0");