2 * Linux NET3: IP/IP protocol decoder.
5 * Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
8 * Alan Cox : Merged and made usable non modular (its so tiny its silly as
9 * a module taking up 2 pages).
10 * Alan Cox : Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11 * to keep ip_forward happy.
12 * Alan Cox : More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13 * Kai Schulte : Fixed #defines for IP_FIREWALL->FIREWALL
14 * David Woodhouse : Perform some basic ICMP handling.
15 * IPIP Routing without decapsulation.
16 * Carlos Picoto : GRE over IP support
17 * Alexey Kuznetsov: Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18 * I do not want to merge them together.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
27 /* tunnel.c: an IP tunnel driver
29 The purpose of this driver is to provide an IP tunnel through
30 which you can tunnel network traffic transparently across subnets.
32 This was written by looking at Nick Holloway's dummy driver
33 Thanks for the great code!
35 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/01/95
38 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39 dev->hard_header/hard_header_len changed to use no headers.
40 Comments/bracketing tweaked.
41 Made the tunnels use dev->name not tunnel: when error reporting.
44 -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95
47 Changed to tunnel to destination gateway in addition to the
48 tunnel's pointopoint address
49 Almost completely rewritten
50 Note: There is currently no firewall or ICMP handling done.
52 -Sam Lantinga (slouken@cs.ucdavis.edu) 02/13/96
56 /* Things I wish I had known when writing the tunnel driver:
58 When the tunnel_xmit() function is called, the skb contains the
59 packet to be sent (plus a great deal of extra info), and dev
60 contains the tunnel device that _we_ are.
62 When we are passed a packet, we are expected to fill in the
63 source address with our source IP address.
65 What is the proper way to allocate, copy and free a buffer?
66 After you allocate it, it is a "0 length" chunk of memory
67 starting at zero. If you want to add headers to the buffer
68 later, you'll have to call "skb_reserve(skb, amount)" with
69 the amount of memory you want reserved. Then, you call
70 "skb_put(skb, amount)" with the amount of space you want in
71 the buffer. skb_put() returns a pointer to the top (#0) of
72 that buffer. skb->len is set to the amount of space you have
73 "allocated" with skb_put(). You can then write up to skb->len
74 bytes to that buffer. If you need more, you can call skb_put()
75 again with the additional amount of space you need. You can
76 find out how much more space you can allocate by calling
78 Now, to add header space, call "skb_push(skb, header_len)".
79 This creates space at the beginning of the buffer and returns
80 a pointer to this new space. If later you need to strip a
81 header from a buffer, call "skb_pull(skb, header_len)".
82 skb_headroom() will return how much space is left at the top
83 of the buffer (before the main data). Remember, this headroom
84 space must be reserved before the skb_put() function is called.
88 This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
90 For comments look at net/ipv4/ip_gre.c --ANK
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <linux/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/init.h>
107 #include <linux/netfilter_ipv4.h>
108 #include <linux/if_ether.h>
109 #include <linux/inetdevice.h>
110 #include <linux/rculist.h>
112 #include <net/sock.h>
114 #include <net/icmp.h>
115 #include <net/ip_tunnels.h>
116 #include <net/inet_ecn.h>
117 #include <net/xfrm.h>
118 #include <net/net_namespace.h>
119 #include <net/netns/generic.h>
120 #include <net/dst_metadata.h>
122 static bool log_ecn_error
= true;
123 module_param(log_ecn_error
, bool, 0644);
124 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
126 static unsigned int ipip_net_id __read_mostly
;
128 static int ipip_tunnel_init(struct net_device
*dev
);
129 static struct rtnl_link_ops ipip_link_ops __read_mostly
;
131 static int ipip_err(struct sk_buff
*skb
, u32 info
)
133 /* All the routers (except for Linux) return only
134 * 8 bytes of packet payload. It means, that precise relaying of
135 * ICMP in the real Internet is absolutely infeasible.
137 struct net
*net
= dev_net(skb
->dev
);
138 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
139 const struct iphdr
*iph
= (const struct iphdr
*)skb
->data
;
140 const int type
= icmp_hdr(skb
)->type
;
141 const int code
= icmp_hdr(skb
)->code
;
146 case ICMP_DEST_UNREACH
:
149 /* Impossible event. */
152 /* All others are translated to HOST_UNREACH.
153 * rfc2003 contains "deep thoughts" about NET_UNREACH,
154 * I believe they are just ether pollution. --ANK
160 case ICMP_TIME_EXCEEDED
:
161 if (code
!= ICMP_EXC_TTL
)
172 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
173 iph
->daddr
, iph
->saddr
, 0);
179 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
180 ipv4_update_pmtu(skb
, net
, info
, t
->parms
.link
, 0,
185 if (type
== ICMP_REDIRECT
) {
186 ipv4_redirect(skb
, net
, t
->parms
.link
, 0, iph
->protocol
, 0);
190 if (t
->parms
.iph
.daddr
== 0) {
195 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
198 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
202 t
->err_time
= jiffies
;
208 static const struct tnl_ptk_info ipip_tpi
= {
209 /* no tunnel info required for ipip. */
210 .proto
= htons(ETH_P_IP
),
213 #if IS_ENABLED(CONFIG_MPLS)
214 static const struct tnl_ptk_info mplsip_tpi
= {
215 /* no tunnel info required for mplsip. */
216 .proto
= htons(ETH_P_MPLS_UC
),
220 static int ipip_tunnel_rcv(struct sk_buff
*skb
, u8 ipproto
)
222 struct net
*net
= dev_net(skb
->dev
);
223 struct ip_tunnel_net
*itn
= net_generic(net
, ipip_net_id
);
224 struct metadata_dst
*tun_dst
= NULL
;
225 struct ip_tunnel
*tunnel
;
226 const struct iphdr
*iph
;
229 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, TUNNEL_NO_KEY
,
230 iph
->saddr
, iph
->daddr
, 0);
232 const struct tnl_ptk_info
*tpi
;
234 if (tunnel
->parms
.iph
.protocol
!= ipproto
&&
235 tunnel
->parms
.iph
.protocol
!= 0)
238 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
240 #if IS_ENABLED(CONFIG_MPLS)
241 if (ipproto
== IPPROTO_MPLS
)
246 if (iptunnel_pull_header(skb
, 0, tpi
->proto
, false))
248 if (tunnel
->collect_md
) {
249 tun_dst
= ip_tun_rx_dst(skb
, 0, 0, 0);
253 return ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
263 static int ipip_rcv(struct sk_buff
*skb
)
265 return ipip_tunnel_rcv(skb
, IPPROTO_IPIP
);
268 #if IS_ENABLED(CONFIG_MPLS)
269 static int mplsip_rcv(struct sk_buff
*skb
)
271 return ipip_tunnel_rcv(skb
, IPPROTO_MPLS
);
275 static struct ip_fan_map
*ipip_fan_find_map(struct ip_tunnel
*t
, __be32 daddr
)
277 struct ip_fan_map
*fan_map
;
280 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
281 if (fan_map
->overlay
==
282 (daddr
& inet_make_mask(fan_map
->overlay_prefix
))) {
292 /* Determine fan tunnel endpoint to send packet to, based on the inner IP
295 * Given a /8 overlay and /16 underlay, for an overlay (inner) address
296 * Y.A.B.C, the transformation is F.G.A.B, where "F" and "G" are the first
297 * two octets of the underlay network (the network portion of a /16), "A"
298 * and "B" are the low order two octets of the underlay network host (the
299 * host portion of a /16), and "Y" is a configured first octet of the
302 * E.g., underlay host 10.88.3.4/16 with an overlay of 99.0.0.0/8 would
303 * host overlay subnet 99.3.4.0/24. An overlay network datagram from
304 * 99.3.4.5 to 99.6.7.8, would be directed to underlay host 10.88.6.7,
305 * which hosts overlay network subnet 99.6.7.0/24. This transformation is
306 * described in detail further below.
308 * Using netmasks for the overlay and underlay other than /8 and /16, as
309 * shown above, can yield larger (or smaller) overlay subnets, with the
310 * trade-off of allowing fewer (or more) underlay hosts to participate.
312 * The size of each overlay network subnet is defined by the total of the
313 * network mask of the overlay plus the size of host portion of the
314 * underlay network. In the above example, /8 + /16 = /24.
316 * E.g., consider underlay host 10.99.238.5/20 and overlay 99.0.0.0/8. In
317 * this case, the network portion of the underlay is 10.99.224.0/20, and
318 * the host portion is 0.0.14.5 (12 bits). To determine the overlay
319 * network subnet, the 12 bits of host portion are left shifted 12 bits
320 * (/20 - /8) and ORed with the overlay subnet prefix. This yields an
321 * overlay subnet of 99.224.80/20, composed of 8 bits overlay, followed by
322 * 12 bits underlay. This yields 12 bits in the overlay network portion,
323 * allowing for 4094 addresses in each overlay network subnet. The
324 * trade-off is that fewer hosts may participate in the underlay network,
325 * as its host address size has shrunk from 16 bits (65534 addresses) in
326 * the first example to 12 bits (4094 addresses) here.
328 * For fewer hosts per overlay subnet (permitting a larger number of
329 * underlay hosts to participate), the underlay netmask may be made
332 * E.g., underlay host 10.111.1.2/12 (network 10.96.0.0/12, host portion
333 * is 0.15.1.2, 20 bits) with an overlay of 33.0.0.0/8 would left shift
334 * the 20 bits of host by 4 (so that it's highest order bit is adjacent to
335 * the lowest order bit of the /8 overlay). This yields an overlay subnet
336 * of 33.240.16.32/28 (8 bits overlay, 20 bits from the host portion of
337 * the underlay). This provides more addresses for the underlay network
338 * (approximately 2^20), but each host's segment of the overlay provides
339 * only 4 bits of addresses (14 usable).
341 * It is also possible to adjust the overlay subnet.
343 * For an overlay of 240.0.0.0/5 and underlay of 10.88.0.0/20, consider
344 * underlay host 10.88.129.2; the 12 bits of host, 0.0.1.2, are left
345 * shifted 15 bits (/20 - /5), yielding an overlay network of
346 * 240.129.0.0/17. An underlay host of 10.88.244.215 would yield an
347 * overlay network of 242.107.128.0/17.
349 * For an overlay of 100.64.0.0/10 and underlay of 10.224.220.0/24, for
350 * underlay host 10.224.220.10, the underlay host portion (.10) is left
351 * shifted 14 bits, yielding an overlay network subnet of 100.66.128.0/18.
352 * This would permit 254 addresses on the underlay, with each overlay
353 * segment providing approximately 2^14 - 2 addresses (16382).
355 * For packets being encapsulated, the overlay network destination IP
356 * address is deconstructed into its overlay and underlay-derived
357 * portions. The underlay portion (determined by the overlay mask and
358 * overlay subnet mask) is right shifted according to the size of the
359 * underlay network mask. This value is then ORed with the network
360 * portion of the underlay network to produce the underlay network
361 * destination for the encapsulated datagram.
363 * For example, using the initial example of underlay 10.88.3.4/16 and
364 * overlay 99.0.0.0/8, with underlay host 10.88.3.4/16 providing overlay
365 * subnet 99.3.4.0/24 with specfic host 99.3.4.5. A datagram from
366 * 99.3.4.5 to 99.6.7.8 would first have the underlay host derived portion
367 * of the address extracted. This is a number of bits equal to underlay
368 * network host portion. In the destination address, the highest order of
369 * these bits is one bit lower than the lowest order bit from the overlay
372 * Using the sample value, 99.6.7.8, the overlay mask is /8, and the
373 * underlay mask is /16 (leaving 16 bits for the host portion). The bits
374 * to be shifted are the middle two octets, 0.6.7.0, as this is 99.6.7.8
375 * ANDed with the mask 0x00ffff00 (which is 16 bits, the highest order of
376 * which is 1 bit lower than the lowest order overlay address bit).
378 * These octets, 0.6.7.0, are then right shifted 8 bits, yielding 0.0.6.7.
379 * This value is then ORed with the underlay network portion,
380 * 10.88.0.0/16, providing 10.88.6.7 as the final underlay destination for
381 * the encapuslated datagram.
383 * Another transform using the final example: overlay 100.64.0.0/10 and
384 * underlay 10.224.220.0/24. Consider overlay address 100.66.128.1
385 * sending a datagram to 100.66.200.5. In this case, 8 bits (the host
386 * portion size of 10.224.220.0/24) beginning after the 100.64/10 overlay
387 * prefix are masked off, yielding 0.2.192.0. This is right shifted 14
388 * (32 - 10 - (32 - 24), i.e., the number of bits between the overlay
389 * network portion and the underlay host portion) bits, yielding 0.0.0.11.
390 * This is ORed with the underlay network portion, 10.224.220.0/24, giving
391 * the underlay destination of 10.224.220.11 for overlay destination
394 static int ipip_build_fan_iphdr(struct ip_tunnel
*tunnel
, struct sk_buff
*skb
, struct iphdr
*iph
)
396 struct ip_fan_map
*f_map
;
399 f_map
= ipip_fan_find_map(tunnel
, ip_hdr(skb
)->daddr
);
403 daddr
= ntohl(ip_hdr(skb
)->daddr
);
404 underlay
= ntohl(f_map
->underlay
);
408 *iph
= tunnel
->parms
.iph
;
409 iph
->daddr
= htonl(underlay
|
410 ((daddr
& ~f_map
->overlay_mask
) >>
411 (32 - f_map
->overlay_prefix
-
412 (32 - f_map
->underlay_prefix
))));
417 * This function assumes it is being called from dev_queue_xmit()
418 * and that skb is filled properly by that function.
420 static netdev_tx_t
ipip_tunnel_xmit(struct sk_buff
*skb
,
421 struct net_device
*dev
)
423 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
424 const struct iphdr
*tiph
= &tunnel
->parms
.iph
;
428 if (!pskb_inet_may_pull(skb
))
431 switch (skb
->protocol
) {
432 case htons(ETH_P_IP
):
433 ipproto
= IPPROTO_IPIP
;
435 #if IS_ENABLED(CONFIG_MPLS)
436 case htons(ETH_P_MPLS_UC
):
437 ipproto
= IPPROTO_MPLS
;
444 if (tiph
->protocol
!= ipproto
&& tiph
->protocol
!= 0)
447 if (iptunnel_handle_offloads(skb
, SKB_GSO_IPXIP4
))
450 if (fan_has_map(&tunnel
->fan
)) {
451 if (ipip_build_fan_iphdr(tunnel
, skb
, &fiph
))
455 tiph
= &tunnel
->parms
.iph
;
458 skb_set_inner_ipproto(skb
, ipproto
);
460 if (tunnel
->collect_md
)
461 ip_md_tunnel_xmit(skb
, dev
, ipproto
);
463 ip_tunnel_xmit(skb
, dev
, tiph
, ipproto
);
469 dev
->stats
.tx_errors
++;
473 static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto
)
478 #if IS_ENABLED(CONFIG_MPLS)
488 ipip_tunnel_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
491 struct ip_tunnel_parm p
;
493 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
496 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
497 if (p
.iph
.version
!= 4 ||
498 !ipip_tunnel_ioctl_verify_protocol(p
.iph
.protocol
) ||
499 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
&htons(~IP_DF
)))
503 p
.i_key
= p
.o_key
= 0;
504 p
.i_flags
= p
.o_flags
= 0;
505 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
509 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
515 static const struct net_device_ops ipip_netdev_ops
= {
516 .ndo_init
= ipip_tunnel_init
,
517 .ndo_uninit
= ip_tunnel_uninit
,
518 .ndo_start_xmit
= ipip_tunnel_xmit
,
519 .ndo_do_ioctl
= ipip_tunnel_ioctl
,
520 .ndo_change_mtu
= ip_tunnel_change_mtu
,
521 .ndo_get_stats64
= ip_tunnel_get_stats64
,
522 .ndo_get_iflink
= ip_tunnel_get_iflink
,
525 #define IPIP_FEATURES (NETIF_F_SG | \
528 NETIF_F_GSO_SOFTWARE | \
531 static void ipip_tunnel_setup(struct net_device
*dev
)
533 struct ip_tunnel
*t
= netdev_priv(dev
);
535 dev
->netdev_ops
= &ipip_netdev_ops
;
537 dev
->type
= ARPHRD_TUNNEL
;
538 dev
->flags
= IFF_NOARP
;
540 dev
->features
|= NETIF_F_LLTX
;
543 dev
->features
|= IPIP_FEATURES
;
544 dev
->hw_features
|= IPIP_FEATURES
;
545 ip_tunnel_setup(dev
, ipip_net_id
);
546 INIT_LIST_HEAD(&t
->fan
.fan_maps
);
549 static int ipip_tunnel_init(struct net_device
*dev
)
551 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
553 memcpy(dev
->dev_addr
, &tunnel
->parms
.iph
.saddr
, 4);
554 memcpy(dev
->broadcast
, &tunnel
->parms
.iph
.daddr
, 4);
556 tunnel
->tun_hlen
= 0;
557 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
558 return ip_tunnel_init(dev
);
561 static int ipip_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
562 struct netlink_ext_ack
*extack
)
566 if (!data
|| !data
[IFLA_IPTUN_PROTO
])
569 proto
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
570 if (proto
!= IPPROTO_IPIP
&& proto
!= IPPROTO_MPLS
&& proto
!= 0)
576 static void ipip_netlink_parms(struct nlattr
*data
[],
577 struct ip_tunnel_parm
*parms
, bool *collect_md
,
580 memset(parms
, 0, sizeof(*parms
));
582 parms
->iph
.version
= 4;
583 parms
->iph
.protocol
= IPPROTO_IPIP
;
590 if (data
[IFLA_IPTUN_LINK
])
591 parms
->link
= nla_get_u32(data
[IFLA_IPTUN_LINK
]);
593 if (data
[IFLA_IPTUN_LOCAL
])
594 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_IPTUN_LOCAL
]);
596 if (data
[IFLA_IPTUN_REMOTE
])
597 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_IPTUN_REMOTE
]);
599 if (data
[IFLA_IPTUN_TTL
]) {
600 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_IPTUN_TTL
]);
602 parms
->iph
.frag_off
= htons(IP_DF
);
605 if (data
[IFLA_IPTUN_TOS
])
606 parms
->iph
.tos
= nla_get_u8(data
[IFLA_IPTUN_TOS
]);
608 if (data
[IFLA_IPTUN_PROTO
])
609 parms
->iph
.protocol
= nla_get_u8(data
[IFLA_IPTUN_PROTO
]);
611 if (!data
[IFLA_IPTUN_PMTUDISC
] || nla_get_u8(data
[IFLA_IPTUN_PMTUDISC
]))
612 parms
->iph
.frag_off
= htons(IP_DF
);
614 if (data
[IFLA_IPTUN_COLLECT_METADATA
])
617 if (data
[IFLA_IPTUN_FWMARK
])
618 *fwmark
= nla_get_u32(data
[IFLA_IPTUN_FWMARK
]);
621 /* This function returns true when ENCAP attributes are present in the nl msg */
622 static bool ipip_netlink_encap_parms(struct nlattr
*data
[],
623 struct ip_tunnel_encap
*ipencap
)
627 memset(ipencap
, 0, sizeof(*ipencap
));
632 if (data
[IFLA_IPTUN_ENCAP_TYPE
]) {
634 ipencap
->type
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_TYPE
]);
637 if (data
[IFLA_IPTUN_ENCAP_FLAGS
]) {
639 ipencap
->flags
= nla_get_u16(data
[IFLA_IPTUN_ENCAP_FLAGS
]);
642 if (data
[IFLA_IPTUN_ENCAP_SPORT
]) {
644 ipencap
->sport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_SPORT
]);
647 if (data
[IFLA_IPTUN_ENCAP_DPORT
]) {
649 ipencap
->dport
= nla_get_be16(data
[IFLA_IPTUN_ENCAP_DPORT
]);
655 static void ipip_fan_flush_map(struct ip_tunnel
*t
)
657 struct ip_fan_map
*fan_map
;
659 list_for_each_entry_rcu(fan_map
, &t
->fan
.fan_maps
, list
) {
660 list_del_rcu(&fan_map
->list
);
661 kfree_rcu(fan_map
, rcu
);
665 static int ipip_fan_del_map(struct ip_tunnel
*t
, __be32 overlay
)
667 struct ip_fan_map
*fan_map
;
669 fan_map
= ipip_fan_find_map(t
, overlay
);
673 list_del_rcu(&fan_map
->list
);
674 kfree_rcu(fan_map
, rcu
);
679 static int ipip_fan_add_map(struct ip_tunnel
*t
, struct ifla_fan_map
*map
)
681 __be32 overlay_mask
, underlay_mask
;
682 struct ip_fan_map
*fan_map
;
684 overlay_mask
= inet_make_mask(map
->overlay_prefix
);
685 underlay_mask
= inet_make_mask(map
->underlay_prefix
);
687 if ((map
->overlay
& ~overlay_mask
) || (map
->underlay
& ~underlay_mask
))
690 if (!(map
->overlay
& overlay_mask
) && (map
->underlay
& underlay_mask
))
693 /* Special case: overlay 0 and underlay 0: flush all mappings */
694 if (!map
->overlay
&& !map
->underlay
) {
695 ipip_fan_flush_map(t
);
699 /* Special case: overlay set and underlay 0: clear map for overlay */
701 return ipip_fan_del_map(t
, map
->overlay
);
703 if (ipip_fan_find_map(t
, map
->overlay
))
706 fan_map
= kmalloc(sizeof(*fan_map
), GFP_KERNEL
);
707 fan_map
->underlay
= map
->underlay
;
708 fan_map
->overlay
= map
->overlay
;
709 fan_map
->underlay_prefix
= map
->underlay_prefix
;
710 fan_map
->overlay_mask
= ntohl(overlay_mask
);
711 fan_map
->overlay_prefix
= map
->overlay_prefix
;
713 list_add_tail_rcu(&fan_map
->list
, &t
->fan
.fan_maps
);
719 static int ipip_netlink_fan(struct nlattr
*data
[], struct ip_tunnel
*t
,
720 struct ip_tunnel_parm
*parms
)
722 struct ifla_fan_map
*map
;
726 if (data
== NULL
|| !data
[IFLA_IPTUN_FAN_MAP
])
729 if (parms
->iph
.daddr
)
732 nla_for_each_nested(attr
, data
[IFLA_IPTUN_FAN_MAP
], rem
) {
733 map
= nla_data(attr
);
734 rv
= ipip_fan_add_map(t
, map
);
742 static int ipip_newlink(struct net
*src_net
, struct net_device
*dev
,
743 struct nlattr
*tb
[], struct nlattr
*data
[],
744 struct netlink_ext_ack
*extack
)
746 struct ip_tunnel
*t
= netdev_priv(dev
);
747 struct ip_tunnel_parm p
;
748 struct ip_tunnel_encap ipencap
;
752 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
753 err
= ip_tunnel_encap_setup(t
, &ipencap
);
759 ipip_netlink_parms(data
, &p
, &t
->collect_md
, &fwmark
);
760 err
= ipip_netlink_fan(data
, t
, &p
);
763 return ip_tunnel_newlink(dev
, tb
, &p
, fwmark
);
766 static int ipip_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
767 struct nlattr
*data
[],
768 struct netlink_ext_ack
*extack
)
770 struct ip_tunnel
*t
= netdev_priv(dev
);
771 struct ip_tunnel_parm p
;
772 struct ip_tunnel_encap ipencap
;
774 __u32 fwmark
= t
->fwmark
;
777 if (ipip_netlink_encap_parms(data
, &ipencap
)) {
778 err
= ip_tunnel_encap_setup(t
, &ipencap
);
784 ipip_netlink_parms(data
, &p
, &collect_md
, &fwmark
);
787 err
= ipip_netlink_fan(data
, t
, &p
);
791 if (((dev
->flags
& IFF_POINTOPOINT
) && !p
.iph
.daddr
) ||
792 (!(dev
->flags
& IFF_POINTOPOINT
) && p
.iph
.daddr
))
795 return ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
798 static size_t ipip_get_size(const struct net_device
*dev
)
801 /* IFLA_IPTUN_LINK */
803 /* IFLA_IPTUN_LOCAL */
805 /* IFLA_IPTUN_REMOTE */
811 /* IFLA_IPTUN_PROTO */
813 /* IFLA_IPTUN_PMTUDISC */
815 /* IFLA_IPTUN_ENCAP_TYPE */
817 /* IFLA_IPTUN_ENCAP_FLAGS */
819 /* IFLA_IPTUN_ENCAP_SPORT */
821 /* IFLA_IPTUN_ENCAP_DPORT */
823 /* IFLA_IPTUN_COLLECT_METADATA */
825 /* IFLA_IPTUN_FWMARK */
827 /* IFLA_IPTUN_FAN_MAP */
828 nla_total_size(sizeof(struct ifla_fan_map
)) * 256 +
832 static int ipip_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
834 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
835 struct ip_tunnel_parm
*parm
= &tunnel
->parms
;
837 if (nla_put_u32(skb
, IFLA_IPTUN_LINK
, parm
->link
) ||
838 nla_put_in_addr(skb
, IFLA_IPTUN_LOCAL
, parm
->iph
.saddr
) ||
839 nla_put_in_addr(skb
, IFLA_IPTUN_REMOTE
, parm
->iph
.daddr
) ||
840 nla_put_u8(skb
, IFLA_IPTUN_TTL
, parm
->iph
.ttl
) ||
841 nla_put_u8(skb
, IFLA_IPTUN_TOS
, parm
->iph
.tos
) ||
842 nla_put_u8(skb
, IFLA_IPTUN_PROTO
, parm
->iph
.protocol
) ||
843 nla_put_u8(skb
, IFLA_IPTUN_PMTUDISC
,
844 !!(parm
->iph
.frag_off
& htons(IP_DF
))) ||
845 nla_put_u32(skb
, IFLA_IPTUN_FWMARK
, tunnel
->fwmark
))
846 goto nla_put_failure
;
848 if (nla_put_u16(skb
, IFLA_IPTUN_ENCAP_TYPE
,
849 tunnel
->encap
.type
) ||
850 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_SPORT
,
851 tunnel
->encap
.sport
) ||
852 nla_put_be16(skb
, IFLA_IPTUN_ENCAP_DPORT
,
853 tunnel
->encap
.dport
) ||
854 nla_put_u16(skb
, IFLA_IPTUN_ENCAP_FLAGS
,
855 tunnel
->encap
.flags
))
856 goto nla_put_failure
;
858 if (tunnel
->collect_md
)
859 if (nla_put_flag(skb
, IFLA_IPTUN_COLLECT_METADATA
))
860 goto nla_put_failure
;
861 if (fan_has_map(&tunnel
->fan
)) {
862 struct nlattr
*fan_nest
;
863 struct ip_fan_map
*fan_map
;
865 fan_nest
= nla_nest_start(skb
, IFLA_IPTUN_FAN_MAP
);
867 goto nla_put_failure
;
868 list_for_each_entry_rcu(fan_map
, &tunnel
->fan
.fan_maps
, list
) {
869 struct ifla_fan_map map
;
871 map
.underlay
= fan_map
->underlay
;
872 map
.underlay_prefix
= fan_map
->underlay_prefix
;
873 map
.overlay
= fan_map
->overlay
;
874 map
.overlay_prefix
= fan_map
->overlay_prefix
;
875 if (nla_put(skb
, IFLA_FAN_MAPPING
, sizeof(map
), &map
))
876 goto nla_put_failure
;
878 nla_nest_end(skb
, fan_nest
);
887 static const struct nla_policy ipip_policy
[IFLA_IPTUN_MAX
+ 1] = {
888 [IFLA_IPTUN_LINK
] = { .type
= NLA_U32
},
889 [IFLA_IPTUN_LOCAL
] = { .type
= NLA_U32
},
890 [IFLA_IPTUN_REMOTE
] = { .type
= NLA_U32
},
891 [IFLA_IPTUN_TTL
] = { .type
= NLA_U8
},
892 [IFLA_IPTUN_TOS
] = { .type
= NLA_U8
},
893 [IFLA_IPTUN_PROTO
] = { .type
= NLA_U8
},
894 [IFLA_IPTUN_PMTUDISC
] = { .type
= NLA_U8
},
895 [IFLA_IPTUN_ENCAP_TYPE
] = { .type
= NLA_U16
},
896 [IFLA_IPTUN_ENCAP_FLAGS
] = { .type
= NLA_U16
},
897 [IFLA_IPTUN_ENCAP_SPORT
] = { .type
= NLA_U16
},
898 [IFLA_IPTUN_ENCAP_DPORT
] = { .type
= NLA_U16
},
899 [IFLA_IPTUN_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
900 [IFLA_IPTUN_FWMARK
] = { .type
= NLA_U32
},
902 [__IFLA_IPTUN_VENDOR_BREAK
... IFLA_IPTUN_MAX
] = { .type
= NLA_BINARY
},
903 [IFLA_IPTUN_FAN_MAP
] = { .type
= NLA_NESTED
},
906 static struct rtnl_link_ops ipip_link_ops __read_mostly
= {
908 .maxtype
= IFLA_IPTUN_MAX
,
909 .policy
= ipip_policy
,
910 .priv_size
= sizeof(struct ip_tunnel
),
911 .setup
= ipip_tunnel_setup
,
912 .validate
= ipip_tunnel_validate
,
913 .newlink
= ipip_newlink
,
914 .changelink
= ipip_changelink
,
915 .dellink
= ip_tunnel_dellink
,
916 .get_size
= ipip_get_size
,
917 .fill_info
= ipip_fill_info
,
918 .get_link_net
= ip_tunnel_get_link_net
,
921 static struct xfrm_tunnel ipip_handler __read_mostly
= {
923 .err_handler
= ipip_err
,
927 #if IS_ENABLED(CONFIG_MPLS)
928 static struct xfrm_tunnel mplsip_handler __read_mostly
= {
929 .handler
= mplsip_rcv
,
930 .err_handler
= ipip_err
,
935 static int __net_init
ipip_init_net(struct net
*net
)
937 return ip_tunnel_init_net(net
, ipip_net_id
, &ipip_link_ops
, "tunl0");
940 static void __net_exit
ipip_exit_batch_net(struct list_head
*list_net
)
942 ip_tunnel_delete_nets(list_net
, ipip_net_id
, &ipip_link_ops
);
945 static struct pernet_operations ipip_net_ops
= {
946 .init
= ipip_init_net
,
947 .exit_batch
= ipip_exit_batch_net
,
949 .size
= sizeof(struct ip_tunnel_net
),
953 static struct ctl_table_header
*ipip_fan_header
;
954 static unsigned int ipip_fan_version
= 3;
956 static struct ctl_table ipip_fan_sysctls
[] = {
958 .procname
= "version",
959 .data
= &ipip_fan_version
,
960 .maxlen
= sizeof(ipip_fan_version
),
962 .proc_handler
= proc_dointvec
,
967 #endif /* CONFIG_SYSCTL */
969 static int __init
ipip_init(void)
973 pr_info("ipip: IPv4 and MPLS over IPv4 tunneling driver\n");
975 err
= register_pernet_device(&ipip_net_ops
);
978 err
= xfrm4_tunnel_register(&ipip_handler
, AF_INET
);
980 pr_info("%s: can't register tunnel\n", __func__
);
981 goto xfrm_tunnel_ipip_failed
;
983 #if IS_ENABLED(CONFIG_MPLS)
984 err
= xfrm4_tunnel_register(&mplsip_handler
, AF_MPLS
);
986 pr_info("%s: can't register tunnel\n", __func__
);
987 goto xfrm_tunnel_mplsip_failed
;
990 err
= rtnl_link_register(&ipip_link_ops
);
992 goto rtnl_link_failed
;
995 ipip_fan_header
= register_net_sysctl(&init_net
, "net/fan",
997 if (!ipip_fan_header
) {
1001 #endif /* CONFIG_SYSCTL */
1006 #ifdef CONFIG_SYSCTL
1008 rtnl_link_unregister(&ipip_link_ops
);
1009 #endif /* CONFIG_SYSCTL */
1011 #if IS_ENABLED(CONFIG_MPLS)
1012 xfrm4_tunnel_deregister(&mplsip_handler
, AF_INET
);
1013 xfrm_tunnel_mplsip_failed
:
1016 xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
);
1017 xfrm_tunnel_ipip_failed
:
1018 unregister_pernet_device(&ipip_net_ops
);
1022 static void __exit
ipip_fini(void)
1024 #ifdef CONFIG_SYSCTL
1025 unregister_net_sysctl_table(ipip_fan_header
);
1026 #endif /* CONFIG_SYSCTL */
1027 rtnl_link_unregister(&ipip_link_ops
);
1028 if (xfrm4_tunnel_deregister(&ipip_handler
, AF_INET
))
1029 pr_info("%s: can't deregister tunnel\n", __func__
);
1030 #if IS_ENABLED(CONFIG_MPLS)
1031 if (xfrm4_tunnel_deregister(&mplsip_handler
, AF_MPLS
))
1032 pr_info("%s: can't deregister tunnel\n", __func__
);
1034 unregister_pernet_device(&ipip_net_ops
);
1037 module_init(ipip_init
);
1038 module_exit(ipip_fini
);
1039 MODULE_LICENSE("GPL");
1040 MODULE_ALIAS_RTNL_LINK("ipip");
1041 MODULE_ALIAS_NETDEV("tunl0");