2 * Linux NET3: IP/IP protocol decoder.
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
27 /* tunnel.c: an IP tunnel driver
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
56 /* Things I wish I had known when writing the tunnel driver:
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
90 For comments look at net/ipv4/ip_gre.c --ANK
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
112 #include <net/sock.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
122 static bool log_ecn_error
= true;
123 module_param(log_ecn_error
, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
126 static unsigned int ipip_net_id __read_mostly
;
128 static int ipip_tunnel_init(struct net_device
*dev
);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly
;
131 static int ipip_err(struct sk_buff
*skb
, u32 info
)
134 /* All the routers (except for Linux) return only
135 8 bytes of packet payload. It means, that precise relaying of
136 ICMP in the real Internet is absolutely infeasible.
138 struct net
*net
= dev_net(skb
->dev
);
139 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
140 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
143 const int type
= icmp_hdr(skb
)->type
;
144 const int code
= icmp_hdr(skb
)->code
;
147 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
148 iph
->daddr
, iph
->saddr
, 0);
152 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
153 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
154 t
->parms
.link
, 0, iph
->protocol
, 0);
159 if (type
== ICMP_REDIRECT
) {
160 ipv4_redirect(skb
, dev_net(skb
->dev
), t
->parms
.link
, 0,
166 if (t
->parms
.iph
.daddr
== 0)
170 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
173 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
177 t
->err_time
= jiffies
;
183 static const struct tnl_ptk_info ipip_tpi
= {
184 /* no tunnel info required for ipip. */
185 .proto
= htons(ETH_P_IP
),
188 #if IS_ENABLED(CONFIG_MPLS)
189 static const struct tnl_ptk_info mplsip_tpi
= {
190 /* no tunnel info required for mplsip. */
191 .proto
= htons(ETH_P_MPLS_UC
),
195 static int ipip_tunnel_rcv(struct sk_buff
*skb
, u8 ipproto
)
197 struct net
*net
= dev_net(skb
->dev
);
198 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
199 struct metadata_dst
*tun_dst
= NULL
;
200 struct ip_tunnel
*tunnel
;
201 const struct iphdr
*iph
;
204 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
205 iph
->saddr
, iph
->daddr
, 0);
207 const struct tnl_ptk_info
*tpi
;
209 if (tunnel
->parms
.iph
.protocol
!= ipproto
&&
210 tunnel
->parms
.iph
.protocol
!= 0)
213 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
215 #if IS_ENABLED(CONFIG_MPLS)
216 if (ipproto
== IPPROTO_MPLS
)
221 if (iptunnel_pull_header(skb
, 0, tpi
->proto
, false))
223 if (tunnel
->collect_md
) {
224 tun_dst
= ip_tun_rx_dst(skb
, 0, 0, 0);
228 return ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
238 static int ipip_rcv(struct sk_buff
*skb
)
240 return ipip_tunnel_rcv(skb
, IPPROTO_IPIP
);
243 #if IS_ENABLED(CONFIG_MPLS)
244 static int mplsip_rcv(struct sk_buff
*skb
)
246 return ipip_tunnel_rcv(skb
, IPPROTO_MPLS
);
250 static struct ip_fan_map
*ipip_fan_find_map(struct ip_tunnel
*t
, __be32 daddr
)
252 struct ip_fan_map
*fan_map
;
255 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
256 if (fan_map
->overlay
==
257 (daddr
& inet_make_mask(fan_map
->overlay_prefix
))) {
267 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
270 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
271 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
272 * two octets of the underlay network (the network portion of a /16), "A"
273 * and "B" are the low order two octets of the underlay network host (the
274 * host portion of a /16), and "Y" is a configured first octet of the
277 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
278 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
279 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
280 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
281 * described in detail further below.
283 * Using netmasks for the overlay and underlay other than /8 and /16, as
284 * shown above, can yield larger (or smaller) overlay subnets, with the
285 * trade-off of allowing fewer (or more) underlay hosts to participate.
287 * The size of each overlay network subnet is defined by the total of the
288 * network mask of the overlay plus the size of host portion of the
289 * underlay network. In the above example, /8 + /16 = /24.
291 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
292 * this case, the network portion of the underlay is 10.99.224.0/20, and
293 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
294 * network subnet, the 12 bits of host portion are left shifted 12 bits
295 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
296 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
297 * 12 bits underlay. This yields 12 bits in the overlay network portion,
298 * allowing for 4094 addresses in each overlay network subnet. The
299 * trade-off is that fewer hosts may participate in the underlay network,
300 * as its host address size has shrunk from 16 bits (65534 addresses) in
301 * the first example to 12 bits (4094 addresses) here.
303 * For fewer hosts per overlay subnet (permitting a larger number of
304 * underlay hosts to participate), the underlay netmask may be made
307 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
308 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
309 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
310 * the lowest order bit of the /8 overlay). This yields an overlay subnet
311 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
312 * the underlay). This provides more addresses for the underlay network
313 * (approximately 2^20), but each host's segment of the overlay provides
314 * only 4 bits of addresses (14 usable).
316 * It is also possible to adjust the overlay subnet.
318 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
319 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
320 * shifted 15 bits (/20 - /5), yielding an overlay network of
321 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
322 * overlay network of 242.107.128.0/17.
324 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
325 * underlay host 10.224.220.10, the underlay host portion (.10) is left
326 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
327 * This would permit 254 addresses on the underlay, with each overlay
328 * segment providing approximately 2^14 - 2 addresses (16382).
330 * For packets being encapsulated, the overlay network destination IP
331 * address is deconstructed into its overlay and underlay-derived
332 * portions. The underlay portion (determined by the overlay mask and
333 * overlay subnet mask) is right shifted according to the size of the
334 * underlay network mask. This value is then ORed with the network
335 * portion of the underlay network to produce the underlay network
336 * destination for the encapsulated datagram.
338 * For example, using the initial example of underlay 10.88.3.4/16 and
339 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
340 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
341 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
342 * of the address extracted. This is a number of bits equal to underlay
343 * network host portion. In the destination address, the highest order of
344 * these bits is one bit lower than the lowest order bit from the overlay
347 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
348 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
349 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
350 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
351 * which is 1 bit lower than the lowest order overlay address bit).
353 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
354 * This value is then ORed with the underlay network portion,
355 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
356 * the encapuslated datagram.
358 * Another transform using the final example: overlay 100.64.0.0/10 and
359 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
360 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
361 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
362 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
363 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
364 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
365 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
366 * the underlay destination of 10.224.220.11 for overlay destination
369 static int ipip_build_fan_iphdr(struct ip_tunnel
*tunnel
, struct sk_buff
*skb
, struct iphdr
*iph
)
371 struct ip_fan_map
*f_map
;
374 f_map
= ipip_fan_find_map(tunnel
, ip_hdr(skb
)->daddr
);
378 daddr
= ntohl(ip_hdr(skb
)->daddr
);
379 underlay
= ntohl(f_map
->underlay
);
383 *iph
= tunnel
->parms
.iph
;
384 iph
->daddr
= htonl(underlay
|
385 ((daddr
& ~f_map
->overlay_mask
) >>
386 (32 - f_map
->overlay_prefix
-
387 (32 - f_map
->underlay_prefix
))));
392 * This function assumes it is being called from dev_queue_xmit()
393 * and that skb is filled properly by that function.
395 static netdev_tx_t
ipip_tunnel_xmit(struct sk_buff
*skb
,
396 struct net_device
*dev
)
398 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
399 const struct iphdr
*tiph
= &tunnel
->parms
.iph
;
403 switch (skb
->protocol
) {
404 case htons(ETH_P_IP
):
405 ipproto
= IPPROTO_IPIP
;
407 #if IS_ENABLED(CONFIG_MPLS)
408 case htons(ETH_P_MPLS_UC
):
409 ipproto
= IPPROTO_MPLS
;
416 if (tiph
->protocol
!= ipproto
&& tiph
->protocol
!= 0)
419 if (iptunnel_handle_offloads(skb
, SKB_GSO_IPXIP4
))
422 if (fan_has_map(&tunnel
->fan
)) {
423 if (ipip_build_fan_iphdr(tunnel
, skb
, &fiph
))
427 tiph
= &tunnel
->parms
.iph
;
430 skb_set_inner_ipproto(skb
, ipproto
);
432 if (tunnel
->collect_md
)
433 ip_md_tunnel_xmit(skb
, dev
, ipproto
);
435 ip_tunnel_xmit(skb
, dev
, tiph
, ipproto
);
441 dev
->stats
.tx_errors
++;
445 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto
)
450 #if IS_ENABLED(CONFIG_MPLS)
460 ipip_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
463 struct ip_tunnel_parm p
;
465 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
468 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
469 if (p
.iph
.version
!= 4 ||
470 !ipip_tunnel_ioctl_verify_protocol(p
.iph
.protocol
) ||
471 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)))
475 p
.i_key
= p
.o_key
= 0;
476 p
.i_flags
= p
.o_flags
= 0;
477 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
481 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
487 static const struct net_device_ops ipip_netdev_ops
= {
488 .ndo_init
= ipip_tunnel_init
,
489 .ndo_uninit
= ip_tunnel_uninit
,
490 .ndo_start_xmit
= ipip_tunnel_xmit
,
491 .ndo_do_ioctl
= ipip_tunnel_ioctl
,
492 .ndo_change_mtu
= ip_tunnel_change_mtu
,
493 .ndo_get_stats64
= ip_tunnel_get_stats64
,
494 .ndo_get_iflink
= ip_tunnel_get_iflink
,
497 #define IPIP_FEATURES (NETIF_F_SG | \
500 NETIF_F_GSO_SOFTWARE | \
503 static void ipip_tunnel_setup(struct net_device
*dev
)
505 struct ip_tunnel
*t
= netdev_priv(dev
);
507 dev
->netdev_ops
= &ipip_netdev_ops
;
509 dev
->type
= ARPHRD_TUNNEL
;
510 dev
->flags
= IFF_NOARP
;
512 dev
->features
|= NETIF_F_LLTX
;
515 dev
->features
|= IPIP_FEATURES
;
516 dev
->hw_features
|= IPIP_FEATURES
;
517 ip_tunnel_setup(dev
, ipip_net_id
);
518 INIT_LIST_HEAD(&t
->fan
.fan_maps
);
521 static int ipip_tunnel_init(struct net_device
*dev
)
523 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
525 memcpy(dev
->dev_addr
, &tunnel
->parms
.iph
.saddr
, 4);
526 memcpy(dev
->broadcast
, &tunnel
->parms
.iph
.daddr
, 4);
528 tunnel
->tun_hlen
= 0;
529 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
530 return ip_tunnel_init(dev
);
533 static int ipip_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
534 struct netlink_ext_ack
*extack
)
538 if (!data
|| !data
[IFLA_IPTUN_PROTO
])
541 proto
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
542 if (proto
!= IPPROTO_IPIP
&& proto
!= IPPROTO_MPLS
&& proto
!= 0)
548 static void ipip_netlink_parms(struct nlattr
*data
[],
549 struct ip_tunnel_parm
*parms
, bool *collect_md
,
552 memset(parms
, 0, sizeof(*parms
));
554 parms
->iph
.version
= 4;
555 parms
->iph
.protocol
= IPPROTO_IPIP
;
562 if (data
[IFLA_IPTUN_LINK
])
563 parms
->link
= nla_get_u32(data
[IFLA_IPTUN_LINK
]);
565 if (data
[IFLA_IPTUN_LOCAL
])
566 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_IPTUN_LOCAL
]);
568 if (data
[IFLA_IPTUN_REMOTE
])
569 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_IPTUN_REMOTE
]);
571 if (data
[IFLA_IPTUN_TTL
]) {
572 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_IPTUN_TTL
]);
574 parms
->iph
.frag_off
= htons(IP_DF
);
577 if (data
[IFLA_IPTUN_TOS
])
578 parms
->iph
.tos
= nla_get_u8(data
[IFLA_IPTUN_TOS
]);
580 if (data
[IFLA_IPTUN_PROTO
])
581 parms
->iph
.protocol
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
583 if (!data
[IFLA_IPTUN_PMTUDISC
] || nla_get_u8(data
[IFLA_IPTUN_PMTUDISC
]))
584 parms
->iph
.frag_off
= htons(IP_DF
);
586 if (data
[IFLA_IPTUN_COLLECT_METADATA
])
589 if (data
[IFLA_IPTUN_FWMARK
])
590 *fwmark
= nla_get_u32(data
[IFLA_IPTUN_FWMARK
]);
593 /* This function returns true when ENCAP attributes are present in the nl msg */
594 static bool ipip_netlink_encap_parms(struct nlattr
*data
[],
595 struct ip_tunnel_encap
*ipencap
)
599 memset(ipencap
, 0, sizeof(*ipencap
));
604 if (data
[IFLA_IPTUN_ENCAP_TYPE
]) {
606 ipencap
->type
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_TYPE
]);
609 if (data
[IFLA_IPTUN_ENCAP_FLAGS
]) {
611 ipencap
->flags
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_FLAGS
]);
614 if (data
[IFLA_IPTUN_ENCAP_SPORT
]) {
616 ipencap
->sport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_SPORT
]);
619 if (data
[IFLA_IPTUN_ENCAP_DPORT
]) {
621 ipencap
->dport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_DPORT
]);
627 static void ipip_fan_flush_map(struct ip_tunnel
*t
)
629 struct ip_fan_map
*fan_map
;
631 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
632 list_del_rcu(&fan_map
->list
);
633 kfree_rcu(fan_map
, rcu
);
637 static int ipip_fan_del_map(struct ip_tunnel
*t
, __be32 overlay
)
639 struct ip_fan_map
*fan_map
;
641 fan_map
= ipip_fan_find_map(t
, overlay
);
645 list_del_rcu(&fan_map
->list
);
646 kfree_rcu(fan_map
, rcu
);
651 static int ipip_fan_add_map(struct ip_tunnel
*t
, struct ifla_fan_map
*map
)
653 __be32 overlay_mask
, underlay_mask
;
654 struct ip_fan_map
*fan_map
;
656 overlay_mask
= inet_make_mask(map
->overlay_prefix
);
657 underlay_mask
= inet_make_mask(map
->underlay_prefix
);
659 if ((map
->overlay
& ~overlay_mask
) || (map
->underlay
& ~underlay_mask
))
662 if (!(map
->overlay
& overlay_mask
) && (map
->underlay
& underlay_mask
))
665 /* Special case: overlay 0 and underlay 0: flush all mappings */
666 if (!map
->overlay
&& !map
->underlay
) {
667 ipip_fan_flush_map(t
);
671 /* Special case: overlay set and underlay 0: clear map for overlay */
673 return ipip_fan_del_map(t
, map
->overlay
);
675 if (ipip_fan_find_map(t
, map
->overlay
))
678 fan_map
= kmalloc(sizeof(*fan_map
), GFP_KERNEL
);
679 fan_map
->underlay
= map
->underlay
;
680 fan_map
->overlay
= map
->overlay
;
681 fan_map
->underlay_prefix
= map
->underlay_prefix
;
682 fan_map
->overlay_mask
= ntohl(overlay_mask
);
683 fan_map
->overlay_prefix
= map
->overlay_prefix
;
685 list_add_tail_rcu(&fan_map
->list
, &t
->fan
.fan_maps
);
691 static int ipip_netlink_fan(struct nlattr
*data
[], struct ip_tunnel
*t
,
692 struct ip_tunnel_parm
*parms
)
694 struct ifla_fan_map
*map
;
698 if (!data
[IFLA_IPTUN_FAN_MAP
])
701 if (parms
->iph
.daddr
)
704 nla_for_each_nested(attr
, data
[IFLA_IPTUN_FAN_MAP
], rem
) {
705 map
= nla_data(attr
);
706 rv
= ipip_fan_add_map(t
, map
);
714 static int ipip_newlink(struct net
*src_net
, struct net_device
*dev
,
715 struct nlattr
*tb
[], struct nlattr
*data
[],
716 struct netlink_ext_ack
*extack
)
718 struct ip_tunnel
*t
= netdev_priv(dev
);
719 struct ip_tunnel_parm p
;
720 struct ip_tunnel_encap ipencap
;
724 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
725 err
= ip_tunnel_encap_setup(t
, &ipencap
);
731 ipip_netlink_parms(data
, &p
, &t
->collect_md
, &fwmark
);
732 err
= ipip_netlink_fan(data
, t
, &p
);
735 return ip_tunnel_newlink(dev
, tb
, &p
, fwmark
);
738 static int ipip_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
739 struct nlattr
*data
[],
740 struct netlink_ext_ack
*extack
)
742 struct ip_tunnel
*t
= netdev_priv(dev
);
743 struct ip_tunnel_parm p
;
744 struct ip_tunnel_encap ipencap
;
746 __u32 fwmark
= t
->fwmark
;
749 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
750 err
= ip_tunnel_encap_setup(t
, &ipencap
);
756 ipip_netlink_parms(data
, &p
, &collect_md
, &fwmark
);
759 err
= ipip_netlink_fan(data
, t
, &p
);
763 if (((dev
->flags
& IFF_POINTOPOINT
) && !p
.iph
.daddr
) ||
764 (!(dev
->flags
& IFF_POINTOPOINT
) && p
.iph
.daddr
))
767 return ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
770 static size_t ipip_get_size(const struct net_device
*dev
)
773 /* IFLA_IPTUN_LINK */
775 /* IFLA_IPTUN_LOCAL */
777 /* IFLA_IPTUN_REMOTE */
783 /* IFLA_IPTUN_PROTO */
785 /* IFLA_IPTUN_PMTUDISC */
787 /* IFLA_IPTUN_ENCAP_TYPE */
789 /* IFLA_IPTUN_ENCAP_FLAGS */
791 /* IFLA_IPTUN_ENCAP_SPORT */
793 /* IFLA_IPTUN_ENCAP_DPORT */
795 /* IFLA_IPTUN_COLLECT_METADATA */
797 /* IFLA_IPTUN_FWMARK */
799 /* IFLA_IPTUN_FAN_MAP */
800 nla_total_size(sizeof(struct ifla_fan_map
)) * 256 +
804 static int ipip_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
806 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
807 struct ip_tunnel_parm
*parm
= &tunnel
->parms
;
809 if (nla_put_u32(skb
, IFLA_IPTUN_LINK
, parm
->link
) ||
810 nla_put_in_addr(skb
, IFLA_IPTUN_LOCAL
, parm
->iph
.saddr
) ||
811 nla_put_in_addr(skb
, IFLA_IPTUN_REMOTE
, parm
->iph
.daddr
) ||
812 nla_put_u8(skb
, IFLA_IPTUN_TTL
, parm
->iph
.ttl
) ||
813 nla_put_u8(skb
, IFLA_IPTUN_TOS
, parm
->iph
.tos
) ||
814 nla_put_u8(skb
, IFLA_IPTUN_PROTO
, parm
->iph
.protocol
) ||
815 nla_put_u8(skb
, IFLA_IPTUN_PMTUDISC
,
816 !!(parm
->iph
.frag_off
& htons(IP_DF
))) ||
817 nla_put_u32(skb
, IFLA_IPTUN_FWMARK
, tunnel
->fwmark
))
818 goto nla_put_failure
;
820 if (nla_put_u16(skb
, IFLA_IPTUN_ENCAP_TYPE
,
821 tunnel
->encap
.type
) ||
822 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_SPORT
,
823 tunnel
->encap
.sport
) ||
824 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_DPORT
,
825 tunnel
->encap
.dport
) ||
826 nla_put_u16(skb
, IFLA_IPTUN_ENCAP_FLAGS
,
827 tunnel
->encap
.flags
))
828 goto nla_put_failure
;
830 if (tunnel
->collect_md
)
831 if (nla_put_flag(skb
, IFLA_IPTUN_COLLECT_METADATA
))
832 goto nla_put_failure
;
833 if (fan_has_map(&tunnel
->fan
)) {
834 struct nlattr
*fan_nest
;
835 struct ip_fan_map
*fan_map
;
837 fan_nest
= nla_nest_start(skb
, IFLA_IPTUN_FAN_MAP
);
839 goto nla_put_failure
;
840 list_for_each_entry_rcu(fan_map
, &tunnel
->fan
.fan_maps
, list
) {
841 struct ifla_fan_map map
;
843 map
.underlay
= fan_map
->underlay
;
844 map
.underlay_prefix
= fan_map
->underlay_prefix
;
845 map
.overlay
= fan_map
->overlay
;
846 map
.overlay_prefix
= fan_map
->overlay_prefix
;
847 if (nla_put(skb
, IFLA_FAN_MAPPING
, sizeof(map
), &map
))
848 goto nla_put_failure
;
850 nla_nest_end(skb
, fan_nest
);
859 static const struct nla_policy ipip_policy
[IFLA_IPTUN_MAX
+ 1] = {
860 [IFLA_IPTUN_LINK
] = { .type
= NLA_U32
},
861 [IFLA_IPTUN_LOCAL
] = { .type
= NLA_U32
},
862 [IFLA_IPTUN_REMOTE
] = { .type
= NLA_U32
},
863 [IFLA_IPTUN_TTL
] = { .type
= NLA_U8
},
864 [IFLA_IPTUN_TOS
] = { .type
= NLA_U8
},
865 [IFLA_IPTUN_PROTO
] = { .type
= NLA_U8
},
866 [IFLA_IPTUN_PMTUDISC
] = { .type
= NLA_U8
},
867 [IFLA_IPTUN_ENCAP_TYPE
] = { .type
= NLA_U16
},
868 [IFLA_IPTUN_ENCAP_FLAGS
] = { .type
= NLA_U16
},
869 [IFLA_IPTUN_ENCAP_SPORT
] = { .type
= NLA_U16
},
870 [IFLA_IPTUN_ENCAP_DPORT
] = { .type
= NLA_U16
},
871 [IFLA_IPTUN_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
872 [IFLA_IPTUN_FWMARK
] = { .type
= NLA_U32
},
874 [__IFLA_IPTUN_VENDOR_BREAK
... IFLA_IPTUN_MAX
] = { .type
= NLA_BINARY
},
875 [IFLA_IPTUN_FAN_MAP
] = { .type
= NLA_NESTED
},
878 static struct rtnl_link_ops ipip_link_ops __read_mostly
= {
880 .maxtype
= IFLA_IPTUN_MAX
,
881 .policy
= ipip_policy
,
882 .priv_size
= sizeof(struct ip_tunnel
),
883 .setup
= ipip_tunnel_setup
,
884 .validate
= ipip_tunnel_validate
,
885 .newlink
= ipip_newlink
,
886 .changelink
= ipip_changelink
,
887 .dellink
= ip_tunnel_dellink
,
888 .get_size
= ipip_get_size
,
889 .fill_info
= ipip_fill_info
,
890 .get_link_net
= ip_tunnel_get_link_net
,
893 static struct xfrm_tunnel ipip_handler __read_mostly
= {
895 .err_handler
= ipip_err
,
899 #if IS_ENABLED(CONFIG_MPLS)
900 static struct xfrm_tunnel mplsip_handler __read_mostly
= {
901 .handler
= mplsip_rcv
,
902 .err_handler
= ipip_err
,
907 static int __net_init
ipip_init_net(struct net
*net
)
909 return ip_tunnel_init_net(net
, ipip_net_id
, &ipip_link_ops
, "tunl0");
912 static void __net_exit
ipip_exit_net(struct net
*net
)
914 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
915 ip_tunnel_delete_net(itn
, &ipip_link_ops
);
918 static struct pernet_operations ipip_net_ops
= {
919 .init
= ipip_init_net
,
920 .exit
= ipip_exit_net
,
922 .size
= sizeof(struct ip_tunnel_net
),
926 static struct ctl_table_header
*ipip_fan_header
;
927 static unsigned int ipip_fan_version
= 3;
929 static struct ctl_table ipip_fan_sysctls
[] = {
931 .procname
= "version",
932 .data
= &ipip_fan_version
,
933 .maxlen
= sizeof(ipip_fan_version
),
935 .proc_handler
= proc_dointvec
,
940 #endif /* CONFIG_SYSCTL */
942 static int __init
ipip_init(void)
946 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
948 err
= register_pernet_device(&ipip_net_ops
);
951 err
= xfrm4_tunnel_register(&ipip_handler
, AF_INET
);
953 pr_info("%s: can't register tunnel\n", __func__
);
954 goto xfrm_tunnel_ipip_failed
;
956 #if IS_ENABLED(CONFIG_MPLS)
957 err
= xfrm4_tunnel_register(&mplsip_handler
, AF_MPLS
);
959 pr_info("%s: can't register tunnel\n", __func__
);
960 goto xfrm_tunnel_mplsip_failed
;
963 err
= rtnl_link_register(&ipip_link_ops
);
965 goto rtnl_link_failed
;
968 ipip_fan_header
= register_net_sysctl(&init_net
, "net/fan",
970 if (!ipip_fan_header
) {
974 #endif /* CONFIG_SYSCTL */
981 rtnl_link_unregister(&ipip_link_ops
);
982 #endif /* CONFIG_SYSCTL */
984 #if IS_ENABLED(CONFIG_MPLS)
985 xfrm4_tunnel_deregister(&mplsip_handler
, AF_INET
);
986 xfrm_tunnel_mplsip_failed
:
989 xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
);
990 xfrm_tunnel_ipip_failed
:
991 unregister_pernet_device(&ipip_net_ops
);
995 static void __exit
ipip_fini(void)
998 unregister_net_sysctl_table(ipip_fan_header
);
999 #endif /* CONFIG_SYSCTL */
1000 rtnl_link_unregister(&ipip_link_ops
);
1001 if (xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
))
1002 pr_info("%s: can't deregister tunnel\n", __func__
);
1003 #if IS_ENABLED(CONFIG_MPLS)
1004 if (xfrm4_tunnel_deregister(&mplsip_handler
, AF_MPLS
))
1005 pr_info("%s: can't deregister tunnel\n", __func__
);
1007 unregister_pernet_device(&ipip_net_ops
);
1010 module_init(ipip_init
);
1011 module_exit(ipip_fini
);
1012 MODULE_LICENSE("GPL");
1013 MODULE_ALIAS_RTNL_LINK("ipip");
1014 MODULE_ALIAS_NETDEV("tunl0");