2 * Linux NET3: IP/IP protocol decoder.
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
27 /* tunnel.c: an IP tunnel driver
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
56 /* Things I wish I had known when writing the tunnel driver:
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
90 For comments look at net/ipv4/ip_gre.c --ANK
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
112 #include <net/sock.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
122 static bool log_ecn_error
= true;
123 module_param(log_ecn_error
, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
126 static unsigned int ipip_net_id __read_mostly
;
128 static int ipip_tunnel_init(struct net_device
*dev
);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly
;
131 static int ipip_err(struct sk_buff
*skb
, u32 info
)
134 /* All the routers (except for Linux) return only
135 8 bytes of packet payload. It means, that precise relaying of
136 ICMP in the real Internet is absolutely infeasible.
138 struct net
*net
= dev_net(skb
->dev
);
139 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
140 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
143 const int type
= icmp_hdr(skb
)->type
;
144 const int code
= icmp_hdr(skb
)->code
;
147 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
148 iph
->daddr
, iph
->saddr
, 0);
152 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
153 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
154 t
->parms
.link
, 0, iph
->protocol
, 0);
159 if (type
== ICMP_REDIRECT
) {
160 ipv4_redirect(skb
, dev_net(skb
->dev
), t
->parms
.link
, 0,
166 if (t
->parms
.iph
.daddr
== 0)
170 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
173 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
177 t
->err_time
= jiffies
;
183 static const struct tnl_ptk_info ipip_tpi
= {
184 /* no tunnel info required for ipip. */
185 .proto
= htons(ETH_P_IP
),
188 #if IS_ENABLED(CONFIG_MPLS)
189 static const struct tnl_ptk_info mplsip_tpi
= {
190 /* no tunnel info required for mplsip. */
191 .proto
= htons(ETH_P_MPLS_UC
),
195 static int ipip_tunnel_rcv(struct sk_buff
*skb
, u8 ipproto
)
197 struct net
*net
= dev_net(skb
->dev
);
198 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
199 struct metadata_dst
*tun_dst
= NULL
;
200 struct ip_tunnel
*tunnel
;
201 const struct iphdr
*iph
;
204 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
205 iph
->saddr
, iph
->daddr
, 0);
207 const struct tnl_ptk_info
*tpi
;
209 if (tunnel
->parms
.iph
.protocol
!= ipproto
&&
210 tunnel
->parms
.iph
.protocol
!= 0)
213 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
215 #if IS_ENABLED(CONFIG_MPLS)
216 if (ipproto
== IPPROTO_MPLS
)
221 if (iptunnel_pull_header(skb
, 0, tpi
->proto
, false))
223 if (tunnel
->collect_md
) {
224 tun_dst
= ip_tun_rx_dst(skb
, 0, 0, 0);
228 return ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
238 static int ipip_rcv(struct sk_buff
*skb
)
240 return ipip_tunnel_rcv(skb
, IPPROTO_IPIP
);
243 #if IS_ENABLED(CONFIG_MPLS)
244 static int mplsip_rcv(struct sk_buff
*skb
)
246 return ipip_tunnel_rcv(skb
, IPPROTO_MPLS
);
250 static struct ip_fan_map
*ipip_fan_find_map(struct ip_tunnel
*t
, __be32 daddr
)
252 struct ip_fan_map
*fan_map
;
255 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
256 if (fan_map
->overlay
==
257 (daddr
& inet_make_mask(fan_map
->overlay_prefix
))) {
267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
270 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
271 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
272 * two octets of the underlay network (the network portion of a /16), "A"
273 * and "B" are the low order two octets of the underlay network host (the
274 * host portion of a /16), and "Y" is a configured first octet of the
277 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
278 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
279 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
280 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
281 * described in detail further below.
283 * Using netmasks for the overlay and underlay other than /8 and /16, as
284 * shown above, can yield larger (or smaller) overlay subnets, with the
285 * trade-off of allowing fewer (or more) underlay hosts to participate.
287 * The size of each overlay network subnet is defined by the total of the
288 * network mask of the overlay plus the size of host portion of the
289 * underlay network. In the above example, /8 + /16 = /24.
291 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
292 * this case, the network portion of the underlay is 10.99.224.0/20, and
293 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
294 * network subnet, the 12 bits of host portion are left shifted 12 bits
295 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
296 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
297 * 12 bits underlay. This yields 12 bits in the overlay network portion,
298 * allowing for 4094 addresses in each overlay network subnet. The
299 * trade-off is that fewer hosts may participate in the underlay network,
300 * as its host address size has shrunk from 16 bits (65534 addresses) in
301 * the first example to 12 bits (4094 addresses) here.
303 * For fewer hosts per overlay subnet (permitting a larger number of
304 * underlay hosts to participate), the underlay netmask may be made
307 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
308 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
309 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
310 * the lowest order bit of the /8 overlay). This yields an overlay subnet
311 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
312 * the underlay). This provides more addresses for the underlay network
313 * (approximately 2^20), but each host's segment of the overlay provides
314 * only 4 bits of addresses (14 usable).
316 * It is also possible to adjust the overlay subnet.
318 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
319 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
320 * shifted 15 bits (/20 - /5), yielding an overlay network of
321 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
322 * overlay network of 242.107.128.0/17.
324 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
325 * underlay host 10.224.220.10, the underlay host portion (.10) is left
326 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
327 * This would permit 254 addresses on the underlay, with each overlay
328 * segment providing approximately 2^14 - 2 addresses (16382).
330 * For packets being encapsulated, the overlay network destination IP
331 * address is deconstructed into its overlay and underlay-derived
332 * portions. The underlay portion (determined by the overlay mask and
333 * overlay subnet mask) is right shifted according to the size of the
334 * underlay network mask. This value is then ORed with the network
335 * portion of the underlay network to produce the underlay network
336 * destination for the encapsulated datagram.
338 * For example, using the initial example of underlay 10.88.3.4/16 and
339 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
340 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
341 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
342 * of the address extracted. This is a number of bits equal to underlay
343 * network host portion. In the destination address, the highest order of
344 * these bits is one bit lower than the lowest order bit from the overlay
347 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
348 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
349 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
350 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
351 * which is 1 bit lower than the lowest order overlay address bit).
353 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
354 * This value is then ORed with the underlay network portion,
355 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
356 * the encapuslated datagram.
358 * Another transform using the final example: overlay 100.64.0.0/10 and
359 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
360 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
361 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
362 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
363 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
364 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
365 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
366 * the underlay destination of 10.224.220.11 for overlay destination
369 static int ipip_build_fan_iphdr(struct ip_tunnel
*tunnel
, struct sk_buff
*skb
, struct iphdr
*iph
)
371 struct ip_fan_map
*f_map
;
374 f_map
= ipip_fan_find_map(tunnel
, ip_hdr(skb
)->daddr
);
378 daddr
= ntohl(ip_hdr(skb
)->daddr
);
379 underlay
= ntohl(f_map
->underlay
);
383 *iph
= tunnel
->parms
.iph
;
384 iph
->daddr
= htonl(underlay
|
385 ((daddr
& ~f_map
->overlay_mask
) >>
386 (32 - f_map
->overlay_prefix
-
387 (32 - f_map
->underlay_prefix
))));
392 * This function assumes it is being called from dev_queue_xmit()
393 * and that skb is filled properly by that function.
395 static netdev_tx_t
ipip_tunnel_xmit(struct sk_buff
*skb
,
396 struct net_device
*dev
)
398 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
399 const struct iphdr
*tiph
= &tunnel
->parms
.iph
;
403 switch (skb
->protocol
) {
404 case htons(ETH_P_IP
):
405 ipproto
= IPPROTO_IPIP
;
407 #if IS_ENABLED(CONFIG_MPLS)
408 case htons(ETH_P_MPLS_UC
):
409 ipproto
= IPPROTO_MPLS
;
416 if (tiph
->protocol
!= ipproto
&& tiph
->protocol
!= 0)
419 if (iptunnel_handle_offloads(skb
, SKB_GSO_IPXIP4
))
422 if (fan_has_map(&tunnel
->fan
)) {
423 if (ipip_build_fan_iphdr(tunnel
, skb
, &fiph
))
427 tiph
= &tunnel
->parms
.iph
;
430 skb_set_inner_ipproto(skb
, ipproto
);
432 if (tunnel
->collect_md
)
433 ip_md_tunnel_xmit(skb
, dev
, ipproto
);
435 ip_tunnel_xmit(skb
, dev
, tiph
, ipproto
);
441 dev
->stats
.tx_errors
++;
445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto
)
450 #if IS_ENABLED(CONFIG_MPLS)
460 ipip_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
463 struct ip_tunnel_parm p
;
465 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
468 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
469 if (p
.iph
.version
!= 4 ||
470 !ipip_tunnel_ioctl_verify_protocol(p
.iph
.protocol
) ||
471 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)))
475 p
.i_key
= p
.o_key
= 0;
476 p
.i_flags
= p
.o_flags
= 0;
477 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
481 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
487 static const struct net_device_ops ipip_netdev_ops
= {
488 .ndo_init
= ipip_tunnel_init
,
489 .ndo_uninit
= ip_tunnel_uninit
,
490 .ndo_start_xmit
= ipip_tunnel_xmit
,
491 .ndo_do_ioctl
= ipip_tunnel_ioctl
,
492 .ndo_change_mtu
= ip_tunnel_change_mtu
,
493 .ndo_get_stats64
= ip_tunnel_get_stats64
,
494 .ndo_get_iflink
= ip_tunnel_get_iflink
,
497 #define IPIP_FEATURES (NETIF_F_SG | \
500 NETIF_F_GSO_SOFTWARE | \
503 static void ipip_tunnel_setup(struct net_device
*dev
)
505 struct ip_tunnel
*t
= netdev_priv(dev
);
507 dev
->netdev_ops
= &ipip_netdev_ops
;
509 dev
->type
= ARPHRD_TUNNEL
;
510 dev
->flags
= IFF_NOARP
;
512 dev
->features
|= NETIF_F_LLTX
;
515 dev
->features
|= IPIP_FEATURES
;
516 dev
->hw_features
|= IPIP_FEATURES
;
517 ip_tunnel_setup(dev
, ipip_net_id
);
518 INIT_LIST_HEAD(&t
->fan
.fan_maps
);
521 static int ipip_tunnel_init(struct net_device
*dev
)
523 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
525 memcpy(dev
->dev_addr
, &tunnel
->parms
.iph
.saddr
, 4);
526 memcpy(dev
->broadcast
, &tunnel
->parms
.iph
.daddr
, 4);
528 tunnel
->tun_hlen
= 0;
529 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
530 return ip_tunnel_init(dev
);
533 static int ipip_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
537 if (!data
|| !data
[IFLA_IPTUN_PROTO
])
540 proto
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
541 if (proto
!= IPPROTO_IPIP
&& proto
!= IPPROTO_MPLS
&& proto
!= 0)
547 static void ipip_netlink_parms(struct nlattr
*data
[],
548 struct ip_tunnel_parm
*parms
, bool *collect_md
)
550 memset(parms
, 0, sizeof(*parms
));
552 parms
->iph
.version
= 4;
553 parms
->iph
.protocol
= IPPROTO_IPIP
;
560 if (data
[IFLA_IPTUN_LINK
])
561 parms
->link
= nla_get_u32(data
[IFLA_IPTUN_LINK
]);
563 if (data
[IFLA_IPTUN_LOCAL
])
564 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_IPTUN_LOCAL
]);
566 if (data
[IFLA_IPTUN_REMOTE
])
567 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_IPTUN_REMOTE
]);
569 if (data
[IFLA_IPTUN_TTL
]) {
570 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_IPTUN_TTL
]);
572 parms
->iph
.frag_off
= htons(IP_DF
);
575 if (data
[IFLA_IPTUN_TOS
])
576 parms
->iph
.tos
= nla_get_u8(data
[IFLA_IPTUN_TOS
]);
578 if (data
[IFLA_IPTUN_PROTO
])
579 parms
->iph
.protocol
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
581 if (!data
[IFLA_IPTUN_PMTUDISC
] || nla_get_u8(data
[IFLA_IPTUN_PMTUDISC
]))
582 parms
->iph
.frag_off
= htons(IP_DF
);
584 if (data
[IFLA_IPTUN_COLLECT_METADATA
])
588 /* This function returns true when ENCAP attributes are present in the nl msg */
589 static bool ipip_netlink_encap_parms(struct nlattr
*data
[],
590 struct ip_tunnel_encap
*ipencap
)
594 memset(ipencap
, 0, sizeof(*ipencap
));
599 if (data
[IFLA_IPTUN_ENCAP_TYPE
]) {
601 ipencap
->type
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_TYPE
]);
604 if (data
[IFLA_IPTUN_ENCAP_FLAGS
]) {
606 ipencap
->flags
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_FLAGS
]);
609 if (data
[IFLA_IPTUN_ENCAP_SPORT
]) {
611 ipencap
->sport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_SPORT
]);
614 if (data
[IFLA_IPTUN_ENCAP_DPORT
]) {
616 ipencap
->dport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_DPORT
]);
622 static void ipip_fan_flush_map(struct ip_tunnel
*t
)
624 struct ip_fan_map
*fan_map
;
626 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
627 list_del_rcu(&fan_map
->list
);
628 kfree_rcu(fan_map
, rcu
);
632 static int ipip_fan_del_map(struct ip_tunnel
*t
, __be32 overlay
)
634 struct ip_fan_map
*fan_map
;
636 fan_map
= ipip_fan_find_map(t
, overlay
);
640 list_del_rcu(&fan_map
->list
);
641 kfree_rcu(fan_map
, rcu
);
646 static int ipip_fan_add_map(struct ip_tunnel
*t
, struct ifla_fan_map
*map
)
648 __be32 overlay_mask
, underlay_mask
;
649 struct ip_fan_map
*fan_map
;
651 overlay_mask
= inet_make_mask(map
->overlay_prefix
);
652 underlay_mask
= inet_make_mask(map
->underlay_prefix
);
654 if ((map
->overlay
& ~overlay_mask
) || (map
->underlay
& ~underlay_mask
))
657 if (!(map
->overlay
& overlay_mask
) && (map
->underlay
& underlay_mask
))
660 /* Special case: overlay 0 and underlay 0: flush all mappings */
661 if (!map
->overlay
&& !map
->underlay
) {
662 ipip_fan_flush_map(t
);
666 /* Special case: overlay set and underlay 0: clear map for overlay */
668 return ipip_fan_del_map(t
, map
->overlay
);
670 if (ipip_fan_find_map(t
, map
->overlay
))
673 fan_map
= kmalloc(sizeof(*fan_map
), GFP_KERNEL
);
674 fan_map
->underlay
= map
->underlay
;
675 fan_map
->overlay
= map
->overlay
;
676 fan_map
->underlay_prefix
= map
->underlay_prefix
;
677 fan_map
->overlay_mask
= ntohl(overlay_mask
);
678 fan_map
->overlay_prefix
= map
->overlay_prefix
;
680 list_add_tail_rcu(&fan_map
->list
, &t
->fan
.fan_maps
);
686 static int ipip_netlink_fan(struct nlattr
*data
[], struct ip_tunnel
*t
,
687 struct ip_tunnel_parm
*parms
)
689 struct ifla_fan_map
*map
;
693 if (!data
[IFLA_IPTUN_FAN_MAP
])
696 if (parms
->iph
.daddr
)
699 nla_for_each_nested(attr
, data
[IFLA_IPTUN_FAN_MAP
], rem
) {
700 map
= nla_data(attr
);
701 rv
= ipip_fan_add_map(t
, map
);
709 static int ipip_newlink(struct net
*src_net
, struct net_device
*dev
,
710 struct nlattr
*tb
[], struct nlattr
*data
[])
712 struct ip_tunnel
*t
= netdev_priv(dev
);
713 struct ip_tunnel_parm p
;
714 struct ip_tunnel_encap ipencap
;
717 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
718 err
= ip_tunnel_encap_setup(t
, &ipencap
);
724 ipip_netlink_parms(data
, &p
, &t
->collect_md
);
725 err
= ipip_netlink_fan(data
, t
, &p
);
728 return ip_tunnel_newlink(dev
, tb
, &p
);
731 static int ipip_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
732 struct nlattr
*data
[])
734 struct ip_tunnel_parm p
;
735 struct ip_tunnel_encap ipencap
;
737 struct ip_tunnel
*t
= netdev_priv(dev
);
740 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
741 err
= ip_tunnel_encap_setup(t
, &ipencap
);
747 ipip_netlink_parms(data
, &p
, &collect_md
);
750 err
= ipip_netlink_fan(data
, t
, &p
);
754 if (((dev
->flags
& IFF_POINTOPOINT
) && !p
.iph
.daddr
) ||
755 (!(dev
->flags
& IFF_POINTOPOINT
) && p
.iph
.daddr
))
758 return ip_tunnel_changelink(dev
, tb
, &p
);
761 static size_t ipip_get_size(const struct net_device
*dev
)
764 /* IFLA_IPTUN_LINK */
766 /* IFLA_IPTUN_LOCAL */
768 /* IFLA_IPTUN_REMOTE */
774 /* IFLA_IPTUN_PROTO */
776 /* IFLA_IPTUN_PMTUDISC */
778 /* IFLA_IPTUN_ENCAP_TYPE */
780 /* IFLA_IPTUN_ENCAP_FLAGS */
782 /* IFLA_IPTUN_ENCAP_SPORT */
784 /* IFLA_IPTUN_ENCAP_DPORT */
786 /* IFLA_IPTUN_COLLECT_METADATA */
788 /* IFLA_IPTUN_FAN_MAP */
789 nla_total_size(sizeof(struct ifla_fan_map
)) * 256 +
793 static int ipip_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
795 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
796 struct ip_tunnel_parm
*parm
= &tunnel
->parms
;
798 if (nla_put_u32(skb
, IFLA_IPTUN_LINK
, parm
->link
) ||
799 nla_put_in_addr(skb
, IFLA_IPTUN_LOCAL
, parm
->iph
.saddr
) ||
800 nla_put_in_addr(skb
, IFLA_IPTUN_REMOTE
, parm
->iph
.daddr
) ||
801 nla_put_u8(skb
, IFLA_IPTUN_TTL
, parm
->iph
.ttl
) ||
802 nla_put_u8(skb
, IFLA_IPTUN_TOS
, parm
->iph
.tos
) ||
803 nla_put_u8(skb
, IFLA_IPTUN_PROTO
, parm
->iph
.protocol
) ||
804 nla_put_u8(skb
, IFLA_IPTUN_PMTUDISC
,
805 !!(parm
->iph
.frag_off
& htons(IP_DF
))))
806 goto nla_put_failure
;
808 if (nla_put_u16(skb
, IFLA_IPTUN_ENCAP_TYPE
,
809 tunnel
->encap
.type
) ||
810 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_SPORT
,
811 tunnel
->encap
.sport
) ||
812 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_DPORT
,
813 tunnel
->encap
.dport
) ||
814 nla_put_u16(skb
, IFLA_IPTUN_ENCAP_FLAGS
,
815 tunnel
->encap
.flags
))
816 goto nla_put_failure
;
818 if (tunnel
->collect_md
)
819 if (nla_put_flag(skb
, IFLA_IPTUN_COLLECT_METADATA
))
820 goto nla_put_failure
;
821 if (fan_has_map(&tunnel
->fan
)) {
822 struct nlattr
*fan_nest
;
823 struct ip_fan_map
*fan_map
;
825 fan_nest
= nla_nest_start(skb
, IFLA_IPTUN_FAN_MAP
);
827 goto nla_put_failure
;
828 list_for_each_entry_rcu(fan_map
, &tunnel
->fan
.fan_maps
, list
) {
829 struct ifla_fan_map map
;
831 map
.underlay
= fan_map
->underlay
;
832 map
.underlay_prefix
= fan_map
->underlay_prefix
;
833 map
.overlay
= fan_map
->overlay
;
834 map
.overlay_prefix
= fan_map
->overlay_prefix
;
835 if (nla_put(skb
, IFLA_FAN_MAPPING
, sizeof(map
), &map
))
836 goto nla_put_failure
;
838 nla_nest_end(skb
, fan_nest
);
847 static const struct nla_policy ipip_policy
[IFLA_IPTUN_MAX
+ 1] = {
848 [IFLA_IPTUN_LINK
] = { .type
= NLA_U32
},
849 [IFLA_IPTUN_LOCAL
] = { .type
= NLA_U32
},
850 [IFLA_IPTUN_REMOTE
] = { .type
= NLA_U32
},
851 [IFLA_IPTUN_TTL
] = { .type
= NLA_U8
},
852 [IFLA_IPTUN_TOS
] = { .type
= NLA_U8
},
853 [IFLA_IPTUN_PROTO
] = { .type
= NLA_U8
},
854 [IFLA_IPTUN_PMTUDISC
] = { .type
= NLA_U8
},
855 [IFLA_IPTUN_ENCAP_TYPE
] = { .type
= NLA_U16
},
856 [IFLA_IPTUN_ENCAP_FLAGS
] = { .type
= NLA_U16
},
857 [IFLA_IPTUN_ENCAP_SPORT
] = { .type
= NLA_U16
},
858 [IFLA_IPTUN_ENCAP_DPORT
] = { .type
= NLA_U16
},
859 [IFLA_IPTUN_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
861 [__IFLA_IPTUN_VENDOR_BREAK
... IFLA_IPTUN_MAX
] = { .type
= NLA_BINARY
},
862 [IFLA_IPTUN_FAN_MAP
] = { .type
= NLA_NESTED
},
865 static struct rtnl_link_ops ipip_link_ops __read_mostly
= {
867 .maxtype
= IFLA_IPTUN_MAX
,
868 .policy
= ipip_policy
,
869 .priv_size
= sizeof(struct ip_tunnel
),
870 .setup
= ipip_tunnel_setup
,
871 .validate
= ipip_tunnel_validate
,
872 .newlink
= ipip_newlink
,
873 .changelink
= ipip_changelink
,
874 .dellink
= ip_tunnel_dellink
,
875 .get_size
= ipip_get_size
,
876 .fill_info
= ipip_fill_info
,
877 .get_link_net
= ip_tunnel_get_link_net
,
880 static struct xfrm_tunnel ipip_handler __read_mostly
= {
882 .err_handler
= ipip_err
,
886 #if IS_ENABLED(CONFIG_MPLS)
887 static struct xfrm_tunnel mplsip_handler __read_mostly
= {
888 .handler
= mplsip_rcv
,
889 .err_handler
= ipip_err
,
894 static int __net_init
ipip_init_net(struct net
*net
)
896 return ip_tunnel_init_net(net
, ipip_net_id
, &ipip_link_ops
, "tunl0");
899 static void __net_exit
ipip_exit_net(struct net
*net
)
901 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
902 ip_tunnel_delete_net(itn
, &ipip_link_ops
);
905 static struct pernet_operations ipip_net_ops
= {
906 .init
= ipip_init_net
,
907 .exit
= ipip_exit_net
,
909 .size
= sizeof(struct ip_tunnel_net
),
913 static struct ctl_table_header
*ipip_fan_header
;
914 static unsigned int ipip_fan_version
= 3;
916 static struct ctl_table ipip_fan_sysctls
[] = {
918 .procname
= "version",
919 .data
= &ipip_fan_version
,
920 .maxlen
= sizeof(ipip_fan_version
),
922 .proc_handler
= proc_dointvec
,
927 #endif /* CONFIG_SYSCTL */
929 static int __init
ipip_init(void)
933 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
935 err
= register_pernet_device(&ipip_net_ops
);
938 err
= xfrm4_tunnel_register(&ipip_handler
, AF_INET
);
940 pr_info("%s: can't register tunnel\n", __func__
);
941 goto xfrm_tunnel_ipip_failed
;
943 #if IS_ENABLED(CONFIG_MPLS)
944 err
= xfrm4_tunnel_register(&mplsip_handler
, AF_MPLS
);
946 pr_info("%s: can't register tunnel\n", __func__
);
947 goto xfrm_tunnel_mplsip_failed
;
950 err
= rtnl_link_register(&ipip_link_ops
);
952 goto rtnl_link_failed
;
955 ipip_fan_header
= register_net_sysctl(&init_net
, "net/fan",
957 if (!ipip_fan_header
) {
961 #endif /* CONFIG_SYSCTL */
968 rtnl_link_unregister(&ipip_link_ops
);
969 #endif /* CONFIG_SYSCTL */
971 #if IS_ENABLED(CONFIG_MPLS)
972 xfrm4_tunnel_deregister(&mplsip_handler
, AF_INET
);
973 xfrm_tunnel_mplsip_failed
:
976 xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
);
977 xfrm_tunnel_ipip_failed
:
978 unregister_pernet_device(&ipip_net_ops
);
982 static void __exit
ipip_fini(void)
985 unregister_net_sysctl_table(ipip_fan_header
);
986 #endif /* CONFIG_SYSCTL */
987 rtnl_link_unregister(&ipip_link_ops
);
988 if (xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
))
989 pr_info("%s: can't deregister tunnel\n", __func__
);
990 #if IS_ENABLED(CONFIG_MPLS)
991 if (xfrm4_tunnel_deregister(&mplsip_handler
, AF_MPLS
))
992 pr_info("%s: can't deregister tunnel\n", __func__
);
994 unregister_pernet_device(&ipip_net_ops
);
997 module_init(ipip_init
);
998 module_exit(ipip_fini
);
999 MODULE_LICENSE("GPL");
1000 MODULE_ALIAS_RTNL_LINK("ipip");
1001 MODULE_ALIAS_NETDEV("tunl0");