net/ipv4/ipip.c

   1 /*
   2  *      Linux NET3:     IP/IP protocol decoder.
   3  *
   4  *      Authors:
   5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
   6  *
   7  *      Fixes:
   8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
   9  *                                      a module taking up 2 pages).
  10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
  11  *                                      to keep ip_forward happy.
  12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
  13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
  14  *              David Woodhouse :       Perform some basic ICMP handling.
  15  *                                      IPIP Routing without decapsulation.
  16  *              Carlos Picoto   :       GRE over IP support
  17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
  18  *                                      I do not want to merge them together.
  19  *
  20  *      This program is free software; you can redistribute it and/or
  21  *      modify it under the terms of the GNU General Public License
  22  *      as published by the Free Software Foundation; either version
  23  *      2 of the License, or (at your option) any later version.
  24  *
  25  */
  26
  27 /* tunnel.c: an IP tunnel driver
  28
  29         The purpose of this driver is to provide an IP tunnel through
  30         which you can tunnel network traffic transparently across subnets.
  31
  32         This was written by looking at Nick Holloway's dummy driver
  33         Thanks for the great code!
  34
  35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
  36
  37         Minor tweaks:
  38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
  39                 dev->hard_header/hard_header_len changed to use no headers.
  40                 Comments/bracketing tweaked.
  41                 Made the tunnels use dev->name not tunnel: when error reporting.
  42                 Added tx_dropped stat
  43
  44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
  45
  46         Reworked:
  47                 Changed to tunnel to destination gateway in addition to the
  48                         tunnel's pointopoint address
  49                 Almost completely rewritten
  50                 Note:  There is currently no firewall or ICMP handling done.
  51
  52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
  53
  54 */
  55
  56 /* Things I wish I had known when writing the tunnel driver:
  57
  58         When the tunnel_xmit() function is called, the skb contains the
  59         packet to be sent (plus a great deal of extra info), and dev
  60         contains the tunnel device that _we_ are.
  61
  62         When we are passed a packet, we are expected to fill in the
  63         source address with our source IP address.
  64
  65         What is the proper way to allocate, copy and free a buffer?
  66         After you allocate it, it is a "0 length" chunk of memory
  67         starting at zero.  If you want to add headers to the buffer
  68         later, you'll have to call "skb_reserve(skb, amount)" with
  69         the amount of memory you want reserved.  Then, you call
  70         "skb_put(skb, amount)" with the amount of space you want in
  71         the buffer.  skb_put() returns a pointer to the top (#0) of
  72         that buffer.  skb->len is set to the amount of space you have
  73         "allocated" with skb_put().  You can then write up to skb->len
  74         bytes to that buffer.  If you need more, you can call skb_put()
  75         again with the additional amount of space you need.  You can
  76         find out how much more space you can allocate by calling
  77         "skb_tailroom(skb)".
  78         Now, to add header space, call "skb_push(skb, header_len)".
  79         This creates space at the beginning of the buffer and returns
  80         a pointer to this new space.  If later you need to strip a
  81         header from a buffer, call "skb_pull(skb, header_len)".
  82         skb_headroom() will return how much space is left at the top
  83         of the buffer (before the main data).  Remember, this headroom
  84         space must be reserved before the skb_put() function is called.
  85         */
  86
  87 /*
  88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
  89
  90    For comments look at net/ipv4/ip_gre.c --ANK
  91  */
  92
  93
  94 #include <linux/capability.h>
  95 #include <linux/module.h>
  96 #include <linux/types.h>
  97 #include <linux/kernel.h>
  98 #include <linux/slab.h>
  99 #include <linux/uaccess.h>
 100 #include <linux/skbuff.h>
 101 #include <linux/netdevice.h>
 102 #include <linux/in.h>
 103 #include <linux/tcp.h>
 104 #include <linux/udp.h>
 105 #include <linux/if_arp.h>
 106 #include <linux/init.h>
 107 #include <linux/netfilter_ipv4.h>
 108 #include <linux/if_ether.h>
 109 #include <linux/inetdevice.h>
 110 #include <linux/rculist.h>
 111
 112 #include <net/sock.h>
 113 #include <net/ip.h>
 114 #include <net/icmp.h>
 115 #include <net/ip_tunnels.h>
 116 #include <net/inet_ecn.h>
 117 #include <net/xfrm.h>
 118 #include <net/net_namespace.h>
 119 #include <net/netns/generic.h>
 120 #include <net/dst_metadata.h>
 121
 122 static bool log_ecn_error = true;
 123 module_param(log_ecn_error, bool, 0644);
 124 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
 125
 126 static unsigned int ipip_net_id __read_mostly;
 127
 128 static int ipip_tunnel_init(struct net_device *dev);
 129 static struct rtnl_link_ops ipip_link_ops __read_mostly;
 130
 131 static int ipip_err(struct sk_buff *skb, u32 info)
 132 {
 133
 134 /* All the routers (except for Linux) return only
 135    8 bytes of packet payload. It means, that precise relaying of
 136    ICMP in the real Internet is absolutely infeasible.
 137  */
 138         struct net *net = dev_net(skb->dev);
 139         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 140         const struct iphdr *iph = (const struct iphdr *)skb->data;
 141         struct ip_tunnel *t;
 142         int err;
 143         const int type = icmp_hdr(skb)->type;
 144         const int code = icmp_hdr(skb)->code;
 145
 146         err = -ENOENT;
 147         t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 148                              iph->daddr, iph->saddr, 0);
 149         if (!t)
 150                 goto out;
 151
 152         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
 153                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
 154                                  t->parms.link, 0, iph->protocol, 0);
 155                 err = 0;
 156                 goto out;
 157         }
 158
 159         if (type == ICMP_REDIRECT) {
 160                 ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
 161                               iph->protocol, 0);
 162                 err = 0;
 163                 goto out;
 164         }
 165
 166         if (t->parms.iph.daddr == 0)
 167                 goto out;
 168
 169         err = 0;
 170         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
 171                 goto out;
 172
 173         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
 174                 t->err_count++;
 175         else
 176                 t->err_count = 1;
 177         t->err_time = jiffies;
 178
 179 out:
 180         return err;
 181 }
 182
 183 static const struct tnl_ptk_info ipip_tpi = {
 184         /* no tunnel info required for ipip. */
 185         .proto = htons(ETH_P_IP),
 186 };
 187
 188 #if IS_ENABLED(CONFIG_MPLS)
 189 static const struct tnl_ptk_info mplsip_tpi = {
 190         /* no tunnel info required for mplsip. */
 191         .proto = htons(ETH_P_MPLS_UC),
 192 };
 193 #endif
 194
 195 static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto)
 196 {
 197         struct net *net = dev_net(skb->dev);
 198         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 199         struct metadata_dst *tun_dst = NULL;
 200         struct ip_tunnel *tunnel;
 201         const struct iphdr *iph;
 202
 203         iph = ip_hdr(skb);
 204         tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 205                         iph->saddr, iph->daddr, 0);
 206         if (tunnel) {
 207                 const struct tnl_ptk_info *tpi;
 208
 209                 if (tunnel->parms.iph.protocol != ipproto &&
 210                     tunnel->parms.iph.protocol != 0)
 211                         goto drop;
 212
 213                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 214                         goto drop;
 215 #if IS_ENABLED(CONFIG_MPLS)
 216                 if (ipproto == IPPROTO_MPLS)
 217                         tpi = &mplsip_tpi;
 218                 else
 219 #endif
 220                         tpi = &ipip_tpi;
 221                 if (iptunnel_pull_header(skb, 0, tpi->proto, false))
 222                         goto drop;
 223                 if (tunnel->collect_md) {
 224                         tun_dst = ip_tun_rx_dst(skb, 0, 0, 0);
 225                         if (!tun_dst)
 226                                 return 0;
 227                 }
 228                 return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
 229         }
 230
 231         return -1;
 232
 233 drop:
 234         kfree_skb(skb);
 235         return 0;
 236 }
 237
 238 static int ipip_rcv(struct sk_buff *skb)
 239 {
 240         return ipip_tunnel_rcv(skb, IPPROTO_IPIP);
 241 }
 242
 243 #if IS_ENABLED(CONFIG_MPLS)
 244 static int mplsip_rcv(struct sk_buff *skb)
 245 {
 246         return ipip_tunnel_rcv(skb, IPPROTO_MPLS);
 247 }
 248 #endif
 249
 250 static struct ip_fan_map *ipip_fan_find_map(struct ip_tunnel *t, __be32 daddr)
 251 {
 252         struct ip_fan_map *fan_map;
 253
 254         rcu_read_lock();
 255         list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
 256                 if (fan_map->overlay ==
 257                     (daddr & inet_make_mask(fan_map->overlay_prefix))) {
 258                         rcu_read_unlock();
 259                         return fan_map;
 260                 }
 261         }
 262         rcu_read_unlock();
 263
 264         return NULL;
 265 }
 266
 267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
 268  * address.
 269  *
 270  * Given a /8 overlay and /16 underlay, for an overlay (inner) address
 271  * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
 272  * two octets of the underlay network (the network portion of a /16), "A"
 273  * and "B" are the low order two octets of the underlay network host (the
 274  * host portion of a /16), and "Y" is a configured first octet of the
 275  * overlay network.
 276  *
 277  * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
 278  * host overlay subnet 99.3.4.0/24.  An overlay network datagram from
 279  * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
 280  * which hosts overlay network subnet 99.6.7.0/24.  This transformation is
 281  * described in detail further below.
 282  *
 283  * Using netmasks for the overlay and underlay other than /8 and /16, as
 284  * shown above, can yield larger (or smaller) overlay subnets, with the
 285  * trade-off of allowing fewer (or more) underlay hosts to participate.
 286  *
 287  * The size of each overlay network subnet is defined by the total of the
 288  * network mask of the overlay plus the size of host portion of the
 289  * underlay network. In the above example, /8 + /16 = /24.
 290  *
 291  * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
 292  * this case, the network portion of the underlay is 10.99.224.0/20, and
 293  * the host portion is 0.0.14.5 (12 bits).  To determine the overlay
 294  * network subnet, the 12 bits of host portion are left shifted 12 bits
 295  * (/20 - /8) and ORed with the overlay subnet prefix.  This yields an
 296  * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
 297  * 12 bits underlay.  This yields 12 bits in the overlay network portion,
 298  * allowing for 4094 addresses in each overlay network subnet.  The
 299  * trade-off is that fewer hosts may participate in the underlay network,
 300  * as its host address size has shrunk from 16 bits (65534 addresses) in
 301  * the first example to 12 bits (4094 addresses) here.
 302  *
 303  * For fewer hosts per overlay subnet (permitting a larger number of
 304  * underlay hosts to participate), the underlay netmask may be made
 305  * smaller.
 306  *
 307  * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
 308  * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
 309  * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
 310  * the lowest order bit of the /8 overlay).  This yields an overlay subnet
 311  * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
 312  * the underlay).  This provides more addresses for the underlay network
 313  * (approximately 2^20), but each host's segment of the overlay provides
 314  * only 4 bits of addresses (14 usable).
 315  *
 316  * It is also possible to adjust the overlay subnet.
 317  *
 318  * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
 319  * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
 320  * shifted 15 bits (/20 - /5), yielding an overlay network of
 321  * 240.129.0.0/17.  An underlay host of 10.88.244.215 would yield an
 322  * overlay network of 242.107.128.0/17.
 323  *
 324  * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
 325  * underlay host 10.224.220.10, the underlay host portion (.10) is left
 326  * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
 327  * This would permit 254 addresses on the underlay, with each overlay
 328  * segment providing approximately 2^14 - 2 addresses (16382).
 329  *
 330  * For packets being encapsulated, the overlay network destination IP
 331  * address is deconstructed into its overlay and underlay-derived
 332  * portions.  The underlay portion (determined by the overlay mask and
 333  * overlay subnet mask) is right shifted according to the size of the
 334  * underlay network mask.  This value is then ORed with the network
 335  * portion of the underlay network to produce the underlay network
 336  * destination for the encapsulated datagram.
 337  *
 338  * For example, using the initial example of underlay 10.88.3.4/16 and
 339  * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
 340  * subnet 99.3.4.0/24 with specfic host 99.3.4.5.  A datagram from
 341  * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
 342  * of the address extracted.  This is a number of bits equal to underlay
 343  * network host portion.  In the destination address, the highest order of
 344  * these bits is one bit lower than the lowest order bit from the overlay
 345  * network mask.
 346  *
 347  * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
 348  * underlay mask is /16 (leaving 16 bits for the host portion).  The bits
 349  * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
 350  * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
 351  * which is 1 bit lower than the lowest order overlay address bit).
 352  *
 353  * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
 354  * This value is then ORed with the underlay network portion,
 355  * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
 356  * the encapuslated datagram.
 357  *
 358  * Another transform using the final example: overlay 100.64.0.0/10 and
 359  * underlay 10.224.220.0/24.  Consider overlay address 100.66.128.1
 360  * sending a datagram to 100.66.200.5.  In this case, 8 bits (the host
 361  * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
 362  * prefix are masked off, yielding 0.2.192.0.  This is right shifted 14
 363  * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
 364  * network portion and the underlay host portion) bits, yielding 0.0.0.11.
 365  * This is ORed with the underlay network portion, 10.224.220.0/24, giving
 366  * the underlay destination of 10.224.220.11 for overlay destination
 367  * 100.66.200.5.
 368  */
 369 static int ipip_build_fan_iphdr(struct ip_tunnel *tunnel, struct sk_buff *skb, struct iphdr *iph)
 370 {
 371         struct ip_fan_map *f_map;
 372         u32 daddr, underlay;
 373
 374         f_map = ipip_fan_find_map(tunnel, ip_hdr(skb)->daddr);
 375         if (!f_map)
 376                 return -ENOENT;
 377
 378         daddr = ntohl(ip_hdr(skb)->daddr);
 379         underlay = ntohl(f_map->underlay);
 380         if (!underlay)
 381                 return -EINVAL;
 382
 383         *iph = tunnel->parms.iph;
 384         iph->daddr = htonl(underlay |
 385                            ((daddr & ~f_map->overlay_mask) >>
 386                             (32 - f_map->overlay_prefix -
 387                              (32 - f_map->underlay_prefix))));
 388         return 0;
 389 }
 390
 391 /*
 392  *      This function assumes it is being called from dev_queue_xmit()
 393  *      and that skb is filled properly by that function.
 394  */
 395 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb,
 396                                     struct net_device *dev)
 397 {
 398         struct ip_tunnel *tunnel = netdev_priv(dev);
 399         const struct iphdr  *tiph = &tunnel->parms.iph;
 400         u8 ipproto;
 401         struct iphdr fiph;
 402
 403         switch (skb->protocol) {
 404         case htons(ETH_P_IP):
 405                 ipproto = IPPROTO_IPIP;
 406                 break;
 407 #if IS_ENABLED(CONFIG_MPLS)
 408         case htons(ETH_P_MPLS_UC):
 409                 ipproto = IPPROTO_MPLS;
 410                 break;
 411 #endif
 412         default:
 413                 goto tx_error;
 414         }
 415
 416         if (tiph->protocol != ipproto && tiph->protocol != 0)
 417                 goto tx_error;
 418
 419         if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP4))
 420                 goto tx_error;
 421
 422         if (fan_has_map(&tunnel->fan)) {
 423                 if (ipip_build_fan_iphdr(tunnel, skb, &fiph))
 424                         goto tx_error;
 425                 tiph = &fiph;
 426         } else {
 427                 tiph = &tunnel->parms.iph;
 428         }
 429
 430         skb_set_inner_ipproto(skb, ipproto);
 431
 432         if (tunnel->collect_md)
 433                 ip_md_tunnel_xmit(skb, dev, ipproto);
 434         else
 435                 ip_tunnel_xmit(skb, dev, tiph, ipproto);
 436         return NETDEV_TX_OK;
 437
 438 tx_error:
 439         kfree_skb(skb);
 440
 441         dev->stats.tx_errors++;
 442         return NETDEV_TX_OK;
 443 }
 444
 445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
 446 {
 447         switch (ipproto) {
 448         case 0:
 449         case IPPROTO_IPIP:
 450 #if IS_ENABLED(CONFIG_MPLS)
 451         case IPPROTO_MPLS:
 452 #endif
 453                 return true;
 454         }
 455
 456         return false;
 457 }
 458
 459 static int
 460 ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 461 {
 462         int err = 0;
 463         struct ip_tunnel_parm p;
 464
 465         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
 466                 return -EFAULT;
 467
 468         if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
 469                 if (p.iph.version != 4 ||
 470                     !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
 471                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
 472                         return -EINVAL;
 473         }
 474
 475         p.i_key = p.o_key = 0;
 476         p.i_flags = p.o_flags = 0;
 477         err = ip_tunnel_ioctl(dev, &p, cmd);
 478         if (err)
 479                 return err;
 480
 481         if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
 482                 return -EFAULT;
 483
 484         return 0;
 485 }
 486
 487 static const struct net_device_ops ipip_netdev_ops = {
 488         .ndo_init       = ipip_tunnel_init,
 489         .ndo_uninit     = ip_tunnel_uninit,
 490         .ndo_start_xmit = ipip_tunnel_xmit,
 491         .ndo_do_ioctl   = ipip_tunnel_ioctl,
 492         .ndo_change_mtu = ip_tunnel_change_mtu,
 493         .ndo_get_stats64 = ip_tunnel_get_stats64,
 494         .ndo_get_iflink = ip_tunnel_get_iflink,
 495 };
 496
 497 #define IPIP_FEATURES (NETIF_F_SG |             \
 498                        NETIF_F_FRAGLIST |       \
 499                        NETIF_F_HIGHDMA |        \
 500                        NETIF_F_GSO_SOFTWARE |   \
 501                        NETIF_F_HW_CSUM)
 502
 503 static void ipip_tunnel_setup(struct net_device *dev)
 504 {
 505         struct ip_tunnel *t = netdev_priv(dev);
 506
 507         dev->netdev_ops         = &ipip_netdev_ops;
 508
 509         dev->type               = ARPHRD_TUNNEL;
 510         dev->flags              = IFF_NOARP;
 511         dev->addr_len           = 4;
 512         dev->features           |= NETIF_F_LLTX;
 513         netif_keep_dst(dev);
 514
 515         dev->features           |= IPIP_FEATURES;
 516         dev->hw_features        |= IPIP_FEATURES;
 517         ip_tunnel_setup(dev, ipip_net_id);
 518         INIT_LIST_HEAD(&t->fan.fan_maps);
 519 }
 520
 521 static int ipip_tunnel_init(struct net_device *dev)
 522 {
 523         struct ip_tunnel *tunnel = netdev_priv(dev);
 524
 525         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
 526         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 527
 528         tunnel->tun_hlen = 0;
 529         tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
 530         return ip_tunnel_init(dev);
 531 }
 532
 533 static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
 534                                 struct netlink_ext_ack *extack)
 535 {
 536         u8 proto;
 537
 538         if (!data || !data[IFLA_IPTUN_PROTO])
 539                 return 0;
 540
 541         proto = nla_get_u8(data[IFLA_IPTUN_PROTO]);
 542         if (proto != IPPROTO_IPIP && proto != IPPROTO_MPLS && proto != 0)
 543                 return -EINVAL;
 544
 545         return 0;
 546 }
 547
 548 static void ipip_netlink_parms(struct nlattr *data[],
 549                                struct ip_tunnel_parm *parms, bool *collect_md,
 550                                __u32 *fwmark)
 551 {
 552         memset(parms, 0, sizeof(*parms));
 553
 554         parms->iph.version = 4;
 555         parms->iph.protocol = IPPROTO_IPIP;
 556         parms->iph.ihl = 5;
 557         *collect_md = false;
 558
 559         if (!data)
 560                 return;
 561
 562         if (data[IFLA_IPTUN_LINK])
 563                 parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]);
 564
 565         if (data[IFLA_IPTUN_LOCAL])
 566                 parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]);
 567
 568         if (data[IFLA_IPTUN_REMOTE])
 569                 parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]);
 570
 571         if (data[IFLA_IPTUN_TTL]) {
 572                 parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]);
 573                 if (parms->iph.ttl)
 574                         parms->iph.frag_off = htons(IP_DF);
 575         }
 576
 577         if (data[IFLA_IPTUN_TOS])
 578                 parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]);
 579
 580         if (data[IFLA_IPTUN_PROTO])
 581                 parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]);
 582
 583         if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC]))
 584                 parms->iph.frag_off = htons(IP_DF);
 585
 586         if (data[IFLA_IPTUN_COLLECT_METADATA])
 587                 *collect_md = true;
 588
 589         if (data[IFLA_IPTUN_FWMARK])
 590                 *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
 591 }
 592
 593 /* This function returns true when ENCAP attributes are present in the nl msg */
 594 static bool ipip_netlink_encap_parms(struct nlattr *data[],
 595                                      struct ip_tunnel_encap *ipencap)
 596 {
 597         bool ret = false;
 598
 599         memset(ipencap, 0, sizeof(*ipencap));
 600
 601         if (!data)
 602                 return ret;
 603
 604         if (data[IFLA_IPTUN_ENCAP_TYPE]) {
 605                 ret = true;
 606                 ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]);
 607         }
 608
 609         if (data[IFLA_IPTUN_ENCAP_FLAGS]) {
 610                 ret = true;
 611                 ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]);
 612         }
 613
 614         if (data[IFLA_IPTUN_ENCAP_SPORT]) {
 615                 ret = true;
 616                 ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]);
 617         }
 618
 619         if (data[IFLA_IPTUN_ENCAP_DPORT]) {
 620                 ret = true;
 621                 ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]);
 622         }
 623
 624         return ret;
 625 }
 626
 627 static void ipip_fan_flush_map(struct ip_tunnel *t)
 628 {
 629         struct ip_fan_map *fan_map;
 630
 631         list_for_each_entry_rcu(fan_map, &t->fan.fan_maps, list) {
 632                 list_del_rcu(&fan_map->list);
 633                 kfree_rcu(fan_map, rcu);
 634         }
 635 }
 636
 637 static int ipip_fan_del_map(struct ip_tunnel *t, __be32 overlay)
 638 {
 639         struct ip_fan_map *fan_map;
 640
 641         fan_map = ipip_fan_find_map(t, overlay);
 642         if (!fan_map)
 643                 return -ENOENT;
 644
 645         list_del_rcu(&fan_map->list);
 646         kfree_rcu(fan_map, rcu);
 647
 648         return 0;
 649 }
 650
 651 static int ipip_fan_add_map(struct ip_tunnel *t, struct ifla_fan_map *map)
 652 {
 653         __be32 overlay_mask, underlay_mask;
 654         struct ip_fan_map *fan_map;
 655
 656         overlay_mask = inet_make_mask(map->overlay_prefix);
 657         underlay_mask = inet_make_mask(map->underlay_prefix);
 658
 659         if ((map->overlay & ~overlay_mask) || (map->underlay & ~underlay_mask))
 660                 return -EINVAL;
 661
 662         if (!(map->overlay & overlay_mask) && (map->underlay & underlay_mask))
 663                 return -EINVAL;
 664
 665         /* Special case: overlay 0 and underlay 0: flush all mappings */
 666         if (!map->overlay && !map->underlay) {
 667                 ipip_fan_flush_map(t);
 668                 return 0;
 669         }
 670
 671         /* Special case: overlay set and underlay 0: clear map for overlay */
 672         if (!map->underlay)
 673                 return ipip_fan_del_map(t, map->overlay);
 674
 675         if (ipip_fan_find_map(t, map->overlay))
 676                 return -EEXIST;
 677
 678         fan_map = kmalloc(sizeof(*fan_map), GFP_KERNEL);
 679         fan_map->underlay = map->underlay;
 680         fan_map->overlay = map->overlay;
 681         fan_map->underlay_prefix = map->underlay_prefix;
 682         fan_map->overlay_mask = ntohl(overlay_mask);
 683         fan_map->overlay_prefix = map->overlay_prefix;
 684
 685         list_add_tail_rcu(&fan_map->list, &t->fan.fan_maps);
 686
 687         return 0;
 688 }
 689
 690
 691 static int ipip_netlink_fan(struct nlattr *data[], struct ip_tunnel *t,
 692                             struct ip_tunnel_parm *parms)
 693 {
 694         struct ifla_fan_map *map;
 695         struct nlattr *attr;
 696         int rem, rv;
 697
 698         if (!data[IFLA_IPTUN_FAN_MAP])
 699                 return 0;
 700
 701         if (parms->iph.daddr)
 702                 return -EINVAL;
 703
 704         nla_for_each_nested(attr, data[IFLA_IPTUN_FAN_MAP], rem) {
 705                 map = nla_data(attr);
 706                 rv = ipip_fan_add_map(t, map);
 707                 if (rv)
 708                         return rv;
 709         }
 710
 711         return 0;
 712 }
 713
 714 static int ipip_newlink(struct net *src_net, struct net_device *dev,
 715                         struct nlattr *tb[], struct nlattr *data[],
 716                         struct netlink_ext_ack *extack)
 717 {
 718         struct ip_tunnel *t = netdev_priv(dev);
 719         struct ip_tunnel_parm p;
 720         struct ip_tunnel_encap ipencap;
 721         __u32 fwmark = 0;
 722         int err;
 723
 724         if (ipip_netlink_encap_parms(data, &ipencap)) {
 725                 err = ip_tunnel_encap_setup(t, &ipencap);
 726
 727                 if (err < 0)
 728                         return err;
 729         }
 730
 731         ipip_netlink_parms(data, &p, &t->collect_md, &fwmark);
 732         err = ipip_netlink_fan(data, t, &p);
 733         if (err < 0)
 734                 return err;
 735         return ip_tunnel_newlink(dev, tb, &p, fwmark);
 736 }
 737
 738 static int ipip_changelink(struct net_device *dev, struct nlattr *tb[],
 739                            struct nlattr *data[],
 740                            struct netlink_ext_ack *extack)
 741 {
 742         struct ip_tunnel *t = netdev_priv(dev);
 743         struct ip_tunnel_parm p;
 744         struct ip_tunnel_encap ipencap;
 745         bool collect_md;
 746         __u32 fwmark = t->fwmark;
 747         int err;
 748
 749         if (ipip_netlink_encap_parms(data, &ipencap)) {
 750                 err = ip_tunnel_encap_setup(t, &ipencap);
 751
 752                 if (err < 0)
 753                         return err;
 754         }
 755
 756         ipip_netlink_parms(data, &p, &collect_md, &fwmark);
 757         if (collect_md)
 758                 return -EINVAL;
 759         err = ipip_netlink_fan(data, t, &p);
 760         if (err < 0)
 761                 return err;
 762
 763         if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
 764             (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
 765                 return -EINVAL;
 766
 767         return ip_tunnel_changelink(dev, tb, &p, fwmark);
 768 }
 769
 770 static size_t ipip_get_size(const struct net_device *dev)
 771 {
 772         return
 773                 /* IFLA_IPTUN_LINK */
 774                 nla_total_size(4) +
 775                 /* IFLA_IPTUN_LOCAL */
 776                 nla_total_size(4) +
 777                 /* IFLA_IPTUN_REMOTE */
 778                 nla_total_size(4) +
 779                 /* IFLA_IPTUN_TTL */
 780                 nla_total_size(1) +
 781                 /* IFLA_IPTUN_TOS */
 782                 nla_total_size(1) +
 783                 /* IFLA_IPTUN_PROTO */
 784                 nla_total_size(1) +
 785                 /* IFLA_IPTUN_PMTUDISC */
 786                 nla_total_size(1) +
 787                 /* IFLA_IPTUN_ENCAP_TYPE */
 788                 nla_total_size(2) +
 789                 /* IFLA_IPTUN_ENCAP_FLAGS */
 790                 nla_total_size(2) +
 791                 /* IFLA_IPTUN_ENCAP_SPORT */
 792                 nla_total_size(2) +
 793                 /* IFLA_IPTUN_ENCAP_DPORT */
 794                 nla_total_size(2) +
 795                 /* IFLA_IPTUN_COLLECT_METADATA */
 796                 nla_total_size(0) +
 797                 /* IFLA_IPTUN_FWMARK */
 798                 nla_total_size(4) +
 799                 /* IFLA_IPTUN_FAN_MAP */
 800                 nla_total_size(sizeof(struct ifla_fan_map)) * 256 +
 801                 0;
 802 }
 803
 804 static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
 805 {
 806         struct ip_tunnel *tunnel = netdev_priv(dev);
 807         struct ip_tunnel_parm *parm = &tunnel->parms;
 808
 809         if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
 810             nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) ||
 811             nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) ||
 812             nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) ||
 813             nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) ||
 814             nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->iph.protocol) ||
 815             nla_put_u8(skb, IFLA_IPTUN_PMTUDISC,
 816                        !!(parm->iph.frag_off & htons(IP_DF))) ||
 817             nla_put_u32(skb, IFLA_IPTUN_FWMARK, tunnel->fwmark))
 818                 goto nla_put_failure;
 819
 820         if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE,
 821                         tunnel->encap.type) ||
 822             nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT,
 823                          tunnel->encap.sport) ||
 824             nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT,
 825                          tunnel->encap.dport) ||
 826             nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
 827                         tunnel->encap.flags))
 828                 goto nla_put_failure;
 829
 830         if (tunnel->collect_md)
 831                 if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA))
 832                         goto nla_put_failure;
 833         if (fan_has_map(&tunnel->fan)) {
 834                 struct nlattr *fan_nest;
 835                 struct ip_fan_map *fan_map;
 836
 837                 fan_nest = nla_nest_start(skb, IFLA_IPTUN_FAN_MAP);
 838                 if (!fan_nest)
 839                         goto nla_put_failure;
 840                 list_for_each_entry_rcu(fan_map, &tunnel->fan.fan_maps, list) {
 841                         struct ifla_fan_map map;
 842
 843                         map.underlay = fan_map->underlay;
 844                         map.underlay_prefix = fan_map->underlay_prefix;
 845                         map.overlay = fan_map->overlay;
 846                         map.overlay_prefix = fan_map->overlay_prefix;
 847                         if (nla_put(skb, IFLA_FAN_MAPPING, sizeof(map), &map))
 848                                 goto nla_put_failure;
 849                 }
 850                 nla_nest_end(skb, fan_nest);
 851         }
 852
 853         return 0;
 854
 855 nla_put_failure:
 856         return -EMSGSIZE;
 857 }
 858
 859 static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = {
 860         [IFLA_IPTUN_LINK]               = { .type = NLA_U32 },
 861         [IFLA_IPTUN_LOCAL]              = { .type = NLA_U32 },
 862         [IFLA_IPTUN_REMOTE]             = { .type = NLA_U32 },
 863         [IFLA_IPTUN_TTL]                = { .type = NLA_U8 },
 864         [IFLA_IPTUN_TOS]                = { .type = NLA_U8 },
 865         [IFLA_IPTUN_PROTO]              = { .type = NLA_U8 },
 866         [IFLA_IPTUN_PMTUDISC]           = { .type = NLA_U8 },
 867         [IFLA_IPTUN_ENCAP_TYPE]         = { .type = NLA_U16 },
 868         [IFLA_IPTUN_ENCAP_FLAGS]        = { .type = NLA_U16 },
 869         [IFLA_IPTUN_ENCAP_SPORT]        = { .type = NLA_U16 },
 870         [IFLA_IPTUN_ENCAP_DPORT]        = { .type = NLA_U16 },
 871         [IFLA_IPTUN_COLLECT_METADATA]   = { .type = NLA_FLAG },
 872         [IFLA_IPTUN_FWMARK]             = { .type = NLA_U32 },
 873
 874         [__IFLA_IPTUN_VENDOR_BREAK ... IFLA_IPTUN_MAX]  = { .type = NLA_BINARY },
 875         [IFLA_IPTUN_FAN_MAP]            = { .type = NLA_NESTED },
 876 };
 877
 878 static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 879         .kind           = "ipip",
 880         .maxtype        = IFLA_IPTUN_MAX,
 881         .policy         = ipip_policy,
 882         .priv_size      = sizeof(struct ip_tunnel),
 883         .setup          = ipip_tunnel_setup,
 884         .validate       = ipip_tunnel_validate,
 885         .newlink        = ipip_newlink,
 886         .changelink     = ipip_changelink,
 887         .dellink        = ip_tunnel_dellink,
 888         .get_size       = ipip_get_size,
 889         .fill_info      = ipip_fill_info,
 890         .get_link_net   = ip_tunnel_get_link_net,
 891 };
 892
 893 static struct xfrm_tunnel ipip_handler __read_mostly = {
 894         .handler        =       ipip_rcv,
 895         .err_handler    =       ipip_err,
 896         .priority       =       1,
 897 };
 898
 899 #if IS_ENABLED(CONFIG_MPLS)
 900 static struct xfrm_tunnel mplsip_handler __read_mostly = {
 901         .handler        =       mplsip_rcv,
 902         .err_handler    =       ipip_err,
 903         .priority       =       1,
 904 };
 905 #endif
 906
 907 static int __net_init ipip_init_net(struct net *net)
 908 {
 909         return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
 910 }
 911
 912 static void __net_exit ipip_exit_net(struct net *net)
 913 {
 914         struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
 915         ip_tunnel_delete_net(itn, &ipip_link_ops);
 916 }
 917
 918 static struct pernet_operations ipip_net_ops = {
 919         .init = ipip_init_net,
 920         .exit = ipip_exit_net,
 921         .id   = &ipip_net_id,
 922         .size = sizeof(struct ip_tunnel_net),
 923 };
 924
 925 #ifdef CONFIG_SYSCTL
 926 static struct ctl_table_header *ipip_fan_header;
 927 static unsigned int ipip_fan_version = 3;
 928
 929 static struct ctl_table ipip_fan_sysctls[] = {
 930         {
 931                 .procname       = "version",
 932                 .data           = &ipip_fan_version,
 933                 .maxlen         = sizeof(ipip_fan_version),
 934                 .mode           = 0444,
 935                 .proc_handler   = proc_dointvec,
 936         },
 937         {},
 938 };
 939
 940 #endif /* CONFIG_SYSCTL */
 941
 942 static int __init ipip_init(void)
 943 {
 944         int err;
 945
 946         pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
 947
 948         err = register_pernet_device(&ipip_net_ops);
 949         if (err < 0)
 950                 return err;
 951         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
 952         if (err < 0) {
 953                 pr_info("%s: can't register tunnel\n", __func__);
 954                 goto xfrm_tunnel_ipip_failed;
 955         }
 956 #if IS_ENABLED(CONFIG_MPLS)
 957         err = xfrm4_tunnel_register(&mplsip_handler, AF_MPLS);
 958         if (err < 0) {
 959                 pr_info("%s: can't register tunnel\n", __func__);
 960                 goto xfrm_tunnel_mplsip_failed;
 961         }
 962 #endif
 963         err = rtnl_link_register(&ipip_link_ops);
 964         if (err < 0)
 965                 goto rtnl_link_failed;
 966
 967 #ifdef CONFIG_SYSCTL
 968         ipip_fan_header = register_net_sysctl(&init_net, "net/fan",
 969                                               ipip_fan_sysctls);
 970         if (!ipip_fan_header) {
 971                 err = -ENOMEM;
 972                 goto sysctl_failed;
 973         }
 974 #endif /* CONFIG_SYSCTL */
 975
 976 out:
 977         return err;
 978
 979 #ifdef CONFIG_SYSCTL
 980 sysctl_failed:
 981         rtnl_link_unregister(&ipip_link_ops);
 982 #endif /* CONFIG_SYSCTL */
 983 rtnl_link_failed:
 984 #if IS_ENABLED(CONFIG_MPLS)
 985         xfrm4_tunnel_deregister(&mplsip_handler, AF_INET);
 986 xfrm_tunnel_mplsip_failed:
 987
 988 #endif
 989         xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
 990 xfrm_tunnel_ipip_failed:
 991         unregister_pernet_device(&ipip_net_ops);
 992         goto out;
 993 }
 994
 995 static void __exit ipip_fini(void)
 996 {
 997 #ifdef CONFIG_SYSCTL
 998         unregister_net_sysctl_table(ipip_fan_header);
 999 #endif /* CONFIG_SYSCTL */
1000         rtnl_link_unregister(&ipip_link_ops);
1001         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
1002                 pr_info("%s: can't deregister tunnel\n", __func__);
1003 #if IS_ENABLED(CONFIG_MPLS)
1004         if (xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS))
1005                 pr_info("%s: can't deregister tunnel\n", __func__);
1006 #endif
1007         unregister_pernet_device(&ipip_net_ops);
1008 }
1009
1010 module_init(ipip_init);
1011 module_exit(ipip_fini);
1012 MODULE_LICENSE("GPL");
1013 MODULE_ALIAS_RTNL_LINK("ipip");
1014 MODULE_ALIAS_NETDEV("tunl0");