2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/capability.h>
16 #include <linux/module.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/slab.h>
20 #include <linux/uaccess.h>
21 #include <linux/skbuff.h>
22 #include <linux/netdevice.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/if_arp.h>
27 #include <linux/if_vlan.h>
28 #include <linux/init.h>
29 #include <linux/in6.h>
30 #include <linux/inetdevice.h>
31 #include <linux/igmp.h>
32 #include <linux/netfilter_ipv4.h>
33 #include <linux/etherdevice.h>
34 #include <linux/if_ether.h>
39 #include <net/protocol.h>
40 #include <net/ip_tunnels.h>
42 #include <net/checksum.h>
43 #include <net/dsfield.h>
44 #include <net/inet_ecn.h>
46 #include <net/net_namespace.h>
47 #include <net/netns/generic.h>
48 #include <net/rtnetlink.h>
50 #include <net/dst_metadata.h>
51 #include <net/erspan.h>
57 1. The most important issue is detecting local dead loops.
58 They would cause complete host lockup in transmit, which
59 would be "resolved" by stack overflow or, if queueing is enabled,
60 with infinite looping in net_bh.
62 We cannot track such dead loops during route installation,
63 it is infeasible task. The most general solutions would be
64 to keep skb->encapsulation counter (sort of local ttl),
65 and silently drop packet when it expires. It is a good
66 solution, but it supposes maintaining new variable in ALL
67 skb, even if no tunneling is used.
69 Current solution: xmit_recursion breaks dead loops. This is a percpu
70 counter, since when we enter the first ndo_xmit(), cpu migration is
71 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 2. Networking dead loops would not kill routers, but would really
74 kill network. IP hop limit plays role of "t->recursion" in this case,
75 if we copy it from packet being encapsulated to upper header.
76 It is very good solution, but it introduces two problems:
78 - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
79 do not work over tunnels.
80 - traceroute does not work. I planned to relay ICMP from tunnel,
81 so that this problem would be solved and traceroute output
82 would even more informative. This idea appeared to be wrong:
83 only Linux complies to rfc1812 now (yes, guys, Linux is the only
84 true router now :-)), all routers (at least, in neighbourhood of mine)
85 return only 8 bytes of payload. It is the end.
87 Hence, if we want that OSPF worked or traceroute said something reasonable,
88 we should search for another solution.
90 One of them is to parse packet trying to detect inner encapsulation
91 made by our node. It is difficult or even impossible, especially,
92 taking into account fragmentation. TO be short, ttl is not solution at all.
94 Current solution: The solution was UNEXPECTEDLY SIMPLE.
95 We force DF flag on tunnels with preconfigured hop limit,
96 that is ALL. :-) Well, it does not remove the problem completely,
97 but exponential growth of network traffic is changed to linear
98 (branches, that exceed pmtu are pruned) and tunnel mtu
99 rapidly degrades to value <68, where looping stops.
100 Yes, it is not good if there exists a router in the loop,
101 which does not force DF, even when encapsulating packets have DF set.
102 But it is not our problem! Nobody could accuse us, we made
103 all that we could make. Even if it is your gated who injected
104 fatal route to network, even if it were you who configured
105 fatal static route: you are innocent. :-)
110 static bool log_ecn_error
= true;
111 module_param(log_ecn_error
, bool, 0644);
112 MODULE_PARM_DESC(log_ecn_error
, "Log packets received with corrupted ECN");
114 static struct rtnl_link_ops ipgre_link_ops __read_mostly
;
115 static int ipgre_tunnel_init(struct net_device
*dev
);
116 static void erspan_build_header(struct sk_buff
*skb
,
117 __be32 id
, u32 index
, bool truncate
);
119 static unsigned int ipgre_net_id __read_mostly
;
120 static unsigned int gre_tap_net_id __read_mostly
;
121 static unsigned int erspan_net_id __read_mostly
;
123 static void ipgre_err(struct sk_buff
*skb
, u32 info
,
124 const struct tnl_ptk_info
*tpi
)
127 /* All the routers (except for Linux) return only
128 8 bytes of packet payload. It means, that precise relaying of
129 ICMP in the real Internet is absolutely infeasible.
131 Moreover, Cisco "wise men" put GRE key to the third word
132 in GRE header. It makes impossible maintaining even soft
133 state for keyed GRE tunnels with enabled checksum. Tell
136 Well, I wonder, rfc1812 was written by Cisco employee,
137 what the hell these idiots break standards established
140 struct net
*net
= dev_net(skb
->dev
);
141 struct ip_tunnel_net
*itn
;
142 const struct iphdr
*iph
;
143 const int type
= icmp_hdr(skb
)->type
;
144 const int code
= icmp_hdr(skb
)->code
;
145 unsigned int data_len
= 0;
150 case ICMP_PARAMETERPROB
:
153 case ICMP_DEST_UNREACH
:
156 case ICMP_PORT_UNREACH
:
157 /* Impossible event. */
160 /* All others are translated to HOST_UNREACH.
161 rfc2003 contains "deep thoughts" about NET_UNREACH,
162 I believe they are just ether pollution. --ANK
168 case ICMP_TIME_EXCEEDED
:
169 if (code
!= ICMP_EXC_TTL
)
171 data_len
= icmp_hdr(skb
)->un
.reserved
[1] * 4; /* RFC 4884 4.1 */
178 if (tpi
->proto
== htons(ETH_P_TEB
))
179 itn
= net_generic(net
, gre_tap_net_id
);
181 itn
= net_generic(net
, ipgre_net_id
);
183 iph
= (const struct iphdr
*)(icmp_hdr(skb
) + 1);
184 t
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
185 iph
->daddr
, iph
->saddr
, tpi
->key
);
190 #if IS_ENABLED(CONFIG_IPV6)
191 if (tpi
->proto
== htons(ETH_P_IPV6
) &&
192 !ip6_err_gen_icmpv6_unreach(skb
, iph
->ihl
* 4 + tpi
->hdr_len
,
197 if (t
->parms
.iph
.daddr
== 0 ||
198 ipv4_is_multicast(t
->parms
.iph
.daddr
))
201 if (t
->parms
.iph
.ttl
== 0 && type
== ICMP_TIME_EXCEEDED
)
204 if (time_before(jiffies
, t
->err_time
+ IPTUNNEL_ERR_TIMEO
))
208 t
->err_time
= jiffies
;
211 static void gre_err(struct sk_buff
*skb
, u32 info
)
213 /* All the routers (except for Linux) return only
214 * 8 bytes of packet payload. It means, that precise relaying of
215 * ICMP in the real Internet is absolutely infeasible.
217 * Moreover, Cisco "wise men" put GRE key to the third word
218 * in GRE header. It makes impossible maintaining even soft
220 * GRE tunnels with enabled checksum. Tell them "thank you".
222 * Well, I wonder, rfc1812 was written by Cisco employee,
223 * what the hell these idiots break standards established
227 const struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
228 const int type
= icmp_hdr(skb
)->type
;
229 const int code
= icmp_hdr(skb
)->code
;
230 struct tnl_ptk_info tpi
;
231 bool csum_err
= false;
233 if (gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
),
235 if (!csum_err
) /* ignore csum errors. */
239 if (type
== ICMP_DEST_UNREACH
&& code
== ICMP_FRAG_NEEDED
) {
240 ipv4_update_pmtu(skb
, dev_net(skb
->dev
), info
,
241 skb
->dev
->ifindex
, 0, IPPROTO_GRE
, 0);
244 if (type
== ICMP_REDIRECT
) {
245 ipv4_redirect(skb
, dev_net(skb
->dev
), skb
->dev
->ifindex
, 0,
250 ipgre_err(skb
, info
, &tpi
);
253 static int erspan_rcv(struct sk_buff
*skb
, struct tnl_ptk_info
*tpi
,
256 struct net
*net
= dev_net(skb
->dev
);
257 struct metadata_dst
*tun_dst
= NULL
;
258 struct ip_tunnel_net
*itn
;
259 struct ip_tunnel
*tunnel
;
260 struct erspanhdr
*ershdr
;
261 const struct iphdr
*iph
;
265 itn
= net_generic(net
, erspan_net_id
);
266 len
= gre_hdr_len
+ sizeof(*ershdr
);
268 if (unlikely(!pskb_may_pull(skb
, len
)))
269 return PACKET_REJECT
;
272 ershdr
= (struct erspanhdr
*)(skb
->data
+ gre_hdr_len
);
274 /* The original GRE header does not have key field,
275 * Use ERSPAN 10-bit session ID as key.
277 tpi
->key
= cpu_to_be32(ntohs(ershdr
->session_id
) & ID_MASK
);
278 index
= ershdr
->md
.index
;
279 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
,
280 tpi
->flags
| TUNNEL_KEY
,
281 iph
->saddr
, iph
->daddr
, tpi
->key
);
284 if (__iptunnel_pull_header(skb
,
285 gre_hdr_len
+ sizeof(*ershdr
),
290 if (tunnel
->collect_md
) {
291 struct ip_tunnel_info
*info
;
292 struct erspan_metadata
*md
;
296 tpi
->flags
|= TUNNEL_KEY
;
298 tun_id
= key32_to_tunnel_id(tpi
->key
);
300 tun_dst
= ip_tun_rx_dst(skb
, flags
,
301 tun_id
, sizeof(*md
));
303 return PACKET_REJECT
;
305 md
= ip_tunnel_info_opts(&tun_dst
->u
.tun_info
);
307 dst_release((struct dst_entry
*)tun_dst
);
308 return PACKET_REJECT
;
312 info
= &tun_dst
->u
.tun_info
;
313 info
->key
.tun_flags
|= TUNNEL_ERSPAN_OPT
;
314 info
->options_len
= sizeof(*md
);
316 tunnel
->index
= ntohl(index
);
319 skb_reset_mac_header(skb
);
320 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
328 static int __ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
329 struct ip_tunnel_net
*itn
, int hdr_len
, bool raw_proto
)
331 struct metadata_dst
*tun_dst
= NULL
;
332 const struct iphdr
*iph
;
333 struct ip_tunnel
*tunnel
;
336 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
337 iph
->saddr
, iph
->daddr
, tpi
->key
);
340 if (__iptunnel_pull_header(skb
, hdr_len
, tpi
->proto
,
341 raw_proto
, false) < 0)
344 if (tunnel
->dev
->type
!= ARPHRD_NONE
)
345 skb_pop_mac_header(skb
);
347 skb_reset_mac_header(skb
);
348 if (tunnel
->collect_md
) {
352 flags
= tpi
->flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
353 tun_id
= key32_to_tunnel_id(tpi
->key
);
354 tun_dst
= ip_tun_rx_dst(skb
, flags
, tun_id
, 0);
356 return PACKET_REJECT
;
359 ip_tunnel_rcv(tunnel
, skb
, tpi
, tun_dst
, log_ecn_error
);
369 static int ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
372 struct net
*net
= dev_net(skb
->dev
);
373 struct ip_tunnel_net
*itn
;
376 if (tpi
->proto
== htons(ETH_P_TEB
))
377 itn
= net_generic(net
, gre_tap_net_id
);
379 itn
= net_generic(net
, ipgre_net_id
);
381 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, false);
382 if (res
== PACKET_NEXT
&& tpi
->proto
== htons(ETH_P_TEB
)) {
383 /* ipgre tunnels in collect metadata mode should receive
384 * also ETH_P_TEB traffic.
386 itn
= net_generic(net
, ipgre_net_id
);
387 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, true);
392 static int gre_rcv(struct sk_buff
*skb
)
394 struct tnl_ptk_info tpi
;
395 bool csum_err
= false;
398 #ifdef CONFIG_NET_IPGRE_BROADCAST
399 if (ipv4_is_multicast(ip_hdr(skb
)->daddr
)) {
400 /* Looped back packet, drop it! */
401 if (rt_is_output_route(skb_rtable(skb
)))
406 hdr_len
= gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
), 0);
410 if (unlikely(tpi
.proto
== htons(ETH_P_ERSPAN
))) {
411 if (erspan_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
416 if (ipgre_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
420 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
426 static void __gre_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
427 const struct iphdr
*tnl_params
,
430 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
432 if (tunnel
->parms
.o_flags
& TUNNEL_SEQ
)
435 /* Push GRE header. */
436 gre_build_header(skb
, tunnel
->tun_hlen
,
437 tunnel
->parms
.o_flags
, proto
, tunnel
->parms
.o_key
,
438 htonl(tunnel
->o_seqno
));
440 ip_tunnel_xmit(skb
, dev
, tnl_params
, tnl_params
->protocol
);
443 static int gre_handle_offloads(struct sk_buff
*skb
, bool csum
)
445 return iptunnel_handle_offloads(skb
, csum
? SKB_GSO_GRE_CSUM
: SKB_GSO_GRE
);
448 static struct rtable
*gre_get_rt(struct sk_buff
*skb
,
449 struct net_device
*dev
,
451 const struct ip_tunnel_key
*key
)
453 struct net
*net
= dev_net(dev
);
455 memset(fl
, 0, sizeof(*fl
));
456 fl
->daddr
= key
->u
.ipv4
.dst
;
457 fl
->saddr
= key
->u
.ipv4
.src
;
458 fl
->flowi4_tos
= RT_TOS(key
->tos
);
459 fl
->flowi4_mark
= skb
->mark
;
460 fl
->flowi4_proto
= IPPROTO_GRE
;
462 return ip_route_output_key(net
, fl
);
465 static struct rtable
*prepare_fb_xmit(struct sk_buff
*skb
,
466 struct net_device
*dev
,
470 struct ip_tunnel_info
*tun_info
;
471 const struct ip_tunnel_key
*key
;
472 struct rtable
*rt
= NULL
;
477 tun_info
= skb_tunnel_info(skb
);
478 key
= &tun_info
->key
;
479 use_cache
= ip_tunnel_dst_cache_usable(skb
, tun_info
);
482 rt
= dst_cache_get_ip4(&tun_info
->dst_cache
, &fl
->saddr
);
484 rt
= gre_get_rt(skb
, dev
, fl
, key
);
488 dst_cache_set_ip4(&tun_info
->dst_cache
, &rt
->dst
,
492 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
493 + tunnel_hlen
+ sizeof(struct iphdr
);
494 if (skb_headroom(skb
) < min_headroom
|| skb_header_cloned(skb
)) {
495 int head_delta
= SKB_DATA_ALIGN(min_headroom
-
498 err
= pskb_expand_head(skb
, max_t(int, head_delta
, 0),
509 dev
->stats
.tx_dropped
++;
513 static void gre_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
516 struct ip_tunnel_info
*tun_info
;
517 const struct ip_tunnel_key
*key
;
518 struct rtable
*rt
= NULL
;
523 tun_info
= skb_tunnel_info(skb
);
524 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
525 ip_tunnel_info_af(tun_info
) != AF_INET
))
528 key
= &tun_info
->key
;
529 tunnel_hlen
= gre_calc_hlen(key
->tun_flags
);
531 rt
= prepare_fb_xmit(skb
, dev
, &fl
, tunnel_hlen
);
535 /* Push Tunnel header. */
536 if (gre_handle_offloads(skb
, !!(tun_info
->key
.tun_flags
& TUNNEL_CSUM
)))
539 flags
= tun_info
->key
.tun_flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
540 gre_build_header(skb
, tunnel_hlen
, flags
, proto
,
541 tunnel_id_to_key32(tun_info
->key
.tun_id
), 0);
543 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
545 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
546 key
->tos
, key
->ttl
, df
, false);
553 dev
->stats
.tx_dropped
++;
556 static void erspan_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
559 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
560 struct ip_tunnel_info
*tun_info
;
561 const struct ip_tunnel_key
*key
;
562 struct erspan_metadata
*md
;
563 struct rtable
*rt
= NULL
;
564 bool truncate
= false;
569 tun_info
= skb_tunnel_info(skb
);
570 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
571 ip_tunnel_info_af(tun_info
) != AF_INET
))
574 key
= &tun_info
->key
;
576 /* ERSPAN has fixed 8 byte GRE header */
577 tunnel_hlen
= 8 + sizeof(struct erspanhdr
);
579 rt
= prepare_fb_xmit(skb
, dev
, &fl
, tunnel_hlen
);
583 if (gre_handle_offloads(skb
, false))
586 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
587 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
591 md
= ip_tunnel_info_opts(tun_info
);
595 erspan_build_header(skb
, tunnel_id_to_key32(key
->tun_id
),
596 ntohl(md
->index
), truncate
);
598 gre_build_header(skb
, 8, TUNNEL_SEQ
,
599 htons(ETH_P_ERSPAN
), 0, htonl(tunnel
->o_seqno
++));
601 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
603 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
604 key
->tos
, key
->ttl
, df
, false);
611 dev
->stats
.tx_dropped
++;
614 static int gre_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
616 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
620 if (ip_tunnel_info_af(info
) != AF_INET
)
623 rt
= gre_get_rt(skb
, dev
, &fl4
, &info
->key
);
628 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
632 static netdev_tx_t
ipgre_xmit(struct sk_buff
*skb
,
633 struct net_device
*dev
)
635 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
636 const struct iphdr
*tnl_params
;
638 if (tunnel
->collect_md
) {
639 gre_fb_xmit(skb
, dev
, skb
->protocol
);
643 if (dev
->header_ops
) {
644 /* Need space for new headers */
645 if (skb_cow_head(skb
, dev
->needed_headroom
-
646 (tunnel
->hlen
+ sizeof(struct iphdr
))))
649 tnl_params
= (const struct iphdr
*)skb
->data
;
651 /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
654 skb_pull(skb
, tunnel
->hlen
+ sizeof(struct iphdr
));
655 skb_reset_mac_header(skb
);
657 if (skb_cow_head(skb
, dev
->needed_headroom
))
660 tnl_params
= &tunnel
->parms
.iph
;
663 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
666 __gre_xmit(skb
, dev
, tnl_params
, skb
->protocol
);
671 dev
->stats
.tx_dropped
++;
675 static inline u8
tos_to_cos(u8 tos
)
684 static void erspan_build_header(struct sk_buff
*skb
,
685 __be32 id
, u32 index
, bool truncate
)
687 struct iphdr
*iphdr
= ip_hdr(skb
);
688 struct ethhdr
*eth
= eth_hdr(skb
);
689 enum erspan_encap_type enc_type
;
690 struct erspanhdr
*ershdr
;
697 enc_type
= ERSPAN_ENCAP_NOVLAN
;
699 /* If mirrored packet has vlan tag, extract tci and
700 * perserve vlan header in the mirrored frame.
702 if (eth
->h_proto
== htons(ETH_P_8021Q
)) {
703 qp
= (struct qtag_prefix
*)(skb
->data
+ 2 * ETH_ALEN
);
704 vlan_tci
= ntohs(qp
->tci
);
705 enc_type
= ERSPAN_ENCAP_INFRAME
;
708 skb_push(skb
, sizeof(*ershdr
));
709 ershdr
= (struct erspanhdr
*)skb
->data
;
710 memset(ershdr
, 0, sizeof(*ershdr
));
712 ershdr
->ver_vlan
= htons((vlan_tci
& VLAN_MASK
) |
713 (ERSPAN_VERSION
<< VER_OFFSET
));
714 ershdr
->session_id
= htons((u16
)(ntohl(id
) & ID_MASK
) |
715 ((tos_to_cos(iphdr
->tos
) << COS_OFFSET
) & COS_MASK
) |
716 (enc_type
<< EN_OFFSET
& EN_MASK
) |
717 ((truncate
<< T_OFFSET
) & T_MASK
));
718 ershdr
->md
.index
= htonl(index
& INDEX_MASK
);
721 static netdev_tx_t
erspan_xmit(struct sk_buff
*skb
,
722 struct net_device
*dev
)
724 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
725 bool truncate
= false;
727 if (tunnel
->collect_md
) {
728 erspan_fb_xmit(skb
, dev
, skb
->protocol
);
732 if (gre_handle_offloads(skb
, false))
735 if (skb_cow_head(skb
, dev
->needed_headroom
))
738 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
739 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
743 /* Push ERSPAN header */
744 erspan_build_header(skb
, tunnel
->parms
.o_key
, tunnel
->index
, truncate
);
745 tunnel
->parms
.o_flags
&= ~TUNNEL_KEY
;
746 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_ERSPAN
));
751 dev
->stats
.tx_dropped
++;
755 static netdev_tx_t
gre_tap_xmit(struct sk_buff
*skb
,
756 struct net_device
*dev
)
758 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
760 if (tunnel
->collect_md
) {
761 gre_fb_xmit(skb
, dev
, htons(ETH_P_TEB
));
765 if (gre_handle_offloads(skb
, !!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
)))
768 if (skb_cow_head(skb
, dev
->needed_headroom
))
771 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_TEB
));
776 dev
->stats
.tx_dropped
++;
780 static void ipgre_link_update(struct net_device
*dev
, bool set_mtu
)
782 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
785 len
= tunnel
->tun_hlen
;
786 tunnel
->tun_hlen
= gre_calc_hlen(tunnel
->parms
.o_flags
);
787 len
= tunnel
->tun_hlen
- len
;
788 tunnel
->hlen
= tunnel
->hlen
+ len
;
790 dev
->needed_headroom
= dev
->needed_headroom
+ len
;
792 dev
->mtu
= max_t(int, dev
->mtu
- len
, 68);
794 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
795 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
796 tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
) {
797 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
798 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
800 dev
->features
&= ~NETIF_F_GSO_SOFTWARE
;
801 dev
->hw_features
&= ~NETIF_F_GSO_SOFTWARE
;
803 dev
->features
|= NETIF_F_LLTX
;
805 dev
->hw_features
&= ~NETIF_F_GSO_SOFTWARE
;
806 dev
->features
&= ~(NETIF_F_LLTX
| NETIF_F_GSO_SOFTWARE
);
810 static int ipgre_tunnel_ioctl(struct net_device
*dev
,
811 struct ifreq
*ifr
, int cmd
)
813 struct ip_tunnel_parm p
;
816 if (copy_from_user(&p
, ifr
->ifr_ifru
.ifru_data
, sizeof(p
)))
819 if (cmd
== SIOCADDTUNNEL
|| cmd
== SIOCCHGTUNNEL
) {
820 if (p
.iph
.version
!= 4 || p
.iph
.protocol
!= IPPROTO_GRE
||
821 p
.iph
.ihl
!= 5 || (p
.iph
.frag_off
& htons(~IP_DF
)) ||
822 ((p
.i_flags
| p
.o_flags
) & (GRE_VERSION
| GRE_ROUTING
)))
826 p
.i_flags
= gre_flags_to_tnl_flags(p
.i_flags
);
827 p
.o_flags
= gre_flags_to_tnl_flags(p
.o_flags
);
829 err
= ip_tunnel_ioctl(dev
, &p
, cmd
);
833 if (cmd
== SIOCCHGTUNNEL
) {
834 struct ip_tunnel
*t
= netdev_priv(dev
);
836 t
->parms
.i_flags
= p
.i_flags
;
837 t
->parms
.o_flags
= p
.o_flags
;
839 if (strcmp(dev
->rtnl_link_ops
->kind
, "erspan"))
840 ipgre_link_update(dev
, true);
843 p
.i_flags
= gre_tnl_flags_to_gre_flags(p
.i_flags
);
844 p
.o_flags
= gre_tnl_flags_to_gre_flags(p
.o_flags
);
846 if (copy_to_user(ifr
->ifr_ifru
.ifru_data
, &p
, sizeof(p
)))
852 /* Nice toy. Unfortunately, useless in real life :-)
853 It allows to construct virtual multiprotocol broadcast "LAN"
854 over the Internet, provided multicast routing is tuned.
857 I have no idea was this bicycle invented before me,
858 so that I had to set ARPHRD_IPGRE to a random value.
859 I have an impression, that Cisco could make something similar,
860 but this feature is apparently missing in IOS<=11.2(8).
862 I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
863 with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
865 ping -t 255 224.66.66.66
867 If nobody answers, mbone does not work.
869 ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
870 ip addr add 10.66.66.<somewhat>/24 dev Universe
872 ifconfig Universe add fe80::<Your_real_addr>/10
873 ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
876 ftp fec0:6666:6666::193.233.7.65
879 static int ipgre_header(struct sk_buff
*skb
, struct net_device
*dev
,
881 const void *daddr
, const void *saddr
, unsigned int len
)
883 struct ip_tunnel
*t
= netdev_priv(dev
);
885 struct gre_base_hdr
*greh
;
887 iph
= skb_push(skb
, t
->hlen
+ sizeof(*iph
));
888 greh
= (struct gre_base_hdr
*)(iph
+1);
889 greh
->flags
= gre_tnl_flags_to_gre_flags(t
->parms
.o_flags
);
890 greh
->protocol
= htons(type
);
892 memcpy(iph
, &t
->parms
.iph
, sizeof(struct iphdr
));
894 /* Set the source hardware address. */
896 memcpy(&iph
->saddr
, saddr
, 4);
898 memcpy(&iph
->daddr
, daddr
, 4);
900 return t
->hlen
+ sizeof(*iph
);
902 return -(t
->hlen
+ sizeof(*iph
));
905 static int ipgre_header_parse(const struct sk_buff
*skb
, unsigned char *haddr
)
907 const struct iphdr
*iph
= (const struct iphdr
*) skb_mac_header(skb
);
908 memcpy(haddr
, &iph
->saddr
, 4);
912 static const struct header_ops ipgre_header_ops
= {
913 .create
= ipgre_header
,
914 .parse
= ipgre_header_parse
,
917 #ifdef CONFIG_NET_IPGRE_BROADCAST
918 static int ipgre_open(struct net_device
*dev
)
920 struct ip_tunnel
*t
= netdev_priv(dev
);
922 if (ipv4_is_multicast(t
->parms
.iph
.daddr
)) {
926 rt
= ip_route_output_gre(t
->net
, &fl4
,
930 RT_TOS(t
->parms
.iph
.tos
),
933 return -EADDRNOTAVAIL
;
936 if (!__in_dev_get_rtnl(dev
))
937 return -EADDRNOTAVAIL
;
938 t
->mlink
= dev
->ifindex
;
939 ip_mc_inc_group(__in_dev_get_rtnl(dev
), t
->parms
.iph
.daddr
);
944 static int ipgre_close(struct net_device
*dev
)
946 struct ip_tunnel
*t
= netdev_priv(dev
);
948 if (ipv4_is_multicast(t
->parms
.iph
.daddr
) && t
->mlink
) {
949 struct in_device
*in_dev
;
950 in_dev
= inetdev_by_index(t
->net
, t
->mlink
);
952 ip_mc_dec_group(in_dev
, t
->parms
.iph
.daddr
);
958 static const struct net_device_ops ipgre_netdev_ops
= {
959 .ndo_init
= ipgre_tunnel_init
,
960 .ndo_uninit
= ip_tunnel_uninit
,
961 #ifdef CONFIG_NET_IPGRE_BROADCAST
962 .ndo_open
= ipgre_open
,
963 .ndo_stop
= ipgre_close
,
965 .ndo_start_xmit
= ipgre_xmit
,
966 .ndo_do_ioctl
= ipgre_tunnel_ioctl
,
967 .ndo_change_mtu
= ip_tunnel_change_mtu
,
968 .ndo_get_stats64
= ip_tunnel_get_stats64
,
969 .ndo_get_iflink
= ip_tunnel_get_iflink
,
972 #define GRE_FEATURES (NETIF_F_SG | \
977 static void ipgre_tunnel_setup(struct net_device
*dev
)
979 dev
->netdev_ops
= &ipgre_netdev_ops
;
980 dev
->type
= ARPHRD_IPGRE
;
981 ip_tunnel_setup(dev
, ipgre_net_id
);
984 static void __gre_tunnel_init(struct net_device
*dev
)
986 struct ip_tunnel
*tunnel
;
989 tunnel
= netdev_priv(dev
);
990 tunnel
->tun_hlen
= gre_calc_hlen(tunnel
->parms
.o_flags
);
991 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
993 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
995 t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
997 dev
->features
|= GRE_FEATURES
;
998 dev
->hw_features
|= GRE_FEATURES
;
1000 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
1001 /* TCP offload with GRE SEQ is not supported, nor
1002 * can we support 2 levels of outer headers requiring
1005 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
1006 (tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
)) {
1007 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
1008 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
1011 /* Can use a lockless transmit, unless we generate
1014 dev
->features
|= NETIF_F_LLTX
;
1018 static int ipgre_tunnel_init(struct net_device
*dev
)
1020 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1021 struct iphdr
*iph
= &tunnel
->parms
.iph
;
1023 __gre_tunnel_init(dev
);
1025 memcpy(dev
->dev_addr
, &iph
->saddr
, 4);
1026 memcpy(dev
->broadcast
, &iph
->daddr
, 4);
1028 dev
->flags
= IFF_NOARP
;
1029 netif_keep_dst(dev
);
1032 if (iph
->daddr
&& !tunnel
->collect_md
) {
1033 #ifdef CONFIG_NET_IPGRE_BROADCAST
1034 if (ipv4_is_multicast(iph
->daddr
)) {
1037 dev
->flags
= IFF_BROADCAST
;
1038 dev
->header_ops
= &ipgre_header_ops
;
1041 } else if (!tunnel
->collect_md
) {
1042 dev
->header_ops
= &ipgre_header_ops
;
1045 return ip_tunnel_init(dev
);
1048 static const struct gre_protocol ipgre_protocol
= {
1050 .err_handler
= gre_err
,
1053 static int __net_init
ipgre_init_net(struct net
*net
)
1055 return ip_tunnel_init_net(net
, ipgre_net_id
, &ipgre_link_ops
, NULL
);
1058 static void __net_exit
ipgre_exit_batch_net(struct list_head
*list_net
)
1060 ip_tunnel_delete_nets(list_net
, ipgre_net_id
, &ipgre_link_ops
);
1063 static struct pernet_operations ipgre_net_ops
= {
1064 .init
= ipgre_init_net
,
1065 .exit_batch
= ipgre_exit_batch_net
,
1066 .id
= &ipgre_net_id
,
1067 .size
= sizeof(struct ip_tunnel_net
),
1070 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1071 struct netlink_ext_ack
*extack
)
1079 if (data
[IFLA_GRE_IFLAGS
])
1080 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1081 if (data
[IFLA_GRE_OFLAGS
])
1082 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1083 if (flags
& (GRE_VERSION
|GRE_ROUTING
))
1086 if (data
[IFLA_GRE_COLLECT_METADATA
] &&
1087 data
[IFLA_GRE_ENCAP_TYPE
] &&
1088 nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]) != TUNNEL_ENCAP_NONE
)
1094 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1095 struct netlink_ext_ack
*extack
)
1099 if (tb
[IFLA_ADDRESS
]) {
1100 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
1102 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
1103 return -EADDRNOTAVAIL
;
1109 if (data
[IFLA_GRE_REMOTE
]) {
1110 memcpy(&daddr
, nla_data(data
[IFLA_GRE_REMOTE
]), 4);
1116 return ipgre_tunnel_validate(tb
, data
, extack
);
1119 static int erspan_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
1120 struct netlink_ext_ack
*extack
)
1128 ret
= ipgre_tap_validate(tb
, data
, extack
);
1132 /* ERSPAN should only have GRE sequence and key flag */
1133 if (data
[IFLA_GRE_OFLAGS
])
1134 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
1135 if (data
[IFLA_GRE_IFLAGS
])
1136 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
1137 if (!data
[IFLA_GRE_COLLECT_METADATA
] &&
1138 flags
!= (GRE_SEQ
| GRE_KEY
))
1141 /* ERSPAN Session ID only has 10-bit. Since we reuse
1142 * 32-bit key field as ID, check it's range.
1144 if (data
[IFLA_GRE_IKEY
] &&
1145 (ntohl(nla_get_be32(data
[IFLA_GRE_IKEY
])) & ~ID_MASK
))
1148 if (data
[IFLA_GRE_OKEY
] &&
1149 (ntohl(nla_get_be32(data
[IFLA_GRE_OKEY
])) & ~ID_MASK
))
1155 static int ipgre_netlink_parms(struct net_device
*dev
,
1156 struct nlattr
*data
[],
1157 struct nlattr
*tb
[],
1158 struct ip_tunnel_parm
*parms
,
1161 struct ip_tunnel
*t
= netdev_priv(dev
);
1163 memset(parms
, 0, sizeof(*parms
));
1165 parms
->iph
.protocol
= IPPROTO_GRE
;
1170 if (data
[IFLA_GRE_LINK
])
1171 parms
->link
= nla_get_u32(data
[IFLA_GRE_LINK
]);
1173 if (data
[IFLA_GRE_IFLAGS
])
1174 parms
->i_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_IFLAGS
]));
1176 if (data
[IFLA_GRE_OFLAGS
])
1177 parms
->o_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_OFLAGS
]));
1179 if (data
[IFLA_GRE_IKEY
])
1180 parms
->i_key
= nla_get_be32(data
[IFLA_GRE_IKEY
]);
1182 if (data
[IFLA_GRE_OKEY
])
1183 parms
->o_key
= nla_get_be32(data
[IFLA_GRE_OKEY
]);
1185 if (data
[IFLA_GRE_LOCAL
])
1186 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_GRE_LOCAL
]);
1188 if (data
[IFLA_GRE_REMOTE
])
1189 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_GRE_REMOTE
]);
1191 if (data
[IFLA_GRE_TTL
])
1192 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_GRE_TTL
]);
1194 if (data
[IFLA_GRE_TOS
])
1195 parms
->iph
.tos
= nla_get_u8(data
[IFLA_GRE_TOS
]);
1197 if (!data
[IFLA_GRE_PMTUDISC
] || nla_get_u8(data
[IFLA_GRE_PMTUDISC
])) {
1200 parms
->iph
.frag_off
= htons(IP_DF
);
1203 if (data
[IFLA_GRE_COLLECT_METADATA
]) {
1204 t
->collect_md
= true;
1205 if (dev
->type
== ARPHRD_IPGRE
)
1206 dev
->type
= ARPHRD_NONE
;
1209 if (data
[IFLA_GRE_IGNORE_DF
]) {
1210 if (nla_get_u8(data
[IFLA_GRE_IGNORE_DF
])
1211 && (parms
->iph
.frag_off
& htons(IP_DF
)))
1213 t
->ignore_df
= !!nla_get_u8(data
[IFLA_GRE_IGNORE_DF
]);
1216 if (data
[IFLA_GRE_FWMARK
])
1217 *fwmark
= nla_get_u32(data
[IFLA_GRE_FWMARK
]);
1219 if (data
[IFLA_GRE_ERSPAN_INDEX
]) {
1220 t
->index
= nla_get_u32(data
[IFLA_GRE_ERSPAN_INDEX
]);
1222 if (t
->index
& ~INDEX_MASK
)
1229 /* This function returns true when ENCAP attributes are present in the nl msg */
1230 static bool ipgre_netlink_encap_parms(struct nlattr
*data
[],
1231 struct ip_tunnel_encap
*ipencap
)
1235 memset(ipencap
, 0, sizeof(*ipencap
));
1240 if (data
[IFLA_GRE_ENCAP_TYPE
]) {
1242 ipencap
->type
= nla_get_u16(data
[IFLA_GRE_ENCAP_TYPE
]);
1245 if (data
[IFLA_GRE_ENCAP_FLAGS
]) {
1247 ipencap
->flags
= nla_get_u16(data
[IFLA_GRE_ENCAP_FLAGS
]);
1250 if (data
[IFLA_GRE_ENCAP_SPORT
]) {
1252 ipencap
->sport
= nla_get_be16(data
[IFLA_GRE_ENCAP_SPORT
]);
1255 if (data
[IFLA_GRE_ENCAP_DPORT
]) {
1257 ipencap
->dport
= nla_get_be16(data
[IFLA_GRE_ENCAP_DPORT
]);
1263 static int gre_tap_init(struct net_device
*dev
)
1265 __gre_tunnel_init(dev
);
1266 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1267 netif_keep_dst(dev
);
1269 return ip_tunnel_init(dev
);
1272 static const struct net_device_ops gre_tap_netdev_ops
= {
1273 .ndo_init
= gre_tap_init
,
1274 .ndo_uninit
= ip_tunnel_uninit
,
1275 .ndo_start_xmit
= gre_tap_xmit
,
1276 .ndo_set_mac_address
= eth_mac_addr
,
1277 .ndo_validate_addr
= eth_validate_addr
,
1278 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1279 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1280 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1281 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1284 static int erspan_tunnel_init(struct net_device
*dev
)
1286 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
1289 tunnel
->tun_hlen
= 8;
1290 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
1291 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
+
1292 sizeof(struct erspanhdr
);
1293 t_hlen
= tunnel
->hlen
+ sizeof(struct iphdr
);
1295 dev
->features
|= GRE_FEATURES
;
1296 dev
->hw_features
|= GRE_FEATURES
;
1297 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1298 netif_keep_dst(dev
);
1300 return ip_tunnel_init(dev
);
1303 static const struct net_device_ops erspan_netdev_ops
= {
1304 .ndo_init
= erspan_tunnel_init
,
1305 .ndo_uninit
= ip_tunnel_uninit
,
1306 .ndo_start_xmit
= erspan_xmit
,
1307 .ndo_set_mac_address
= eth_mac_addr
,
1308 .ndo_validate_addr
= eth_validate_addr
,
1309 .ndo_change_mtu
= ip_tunnel_change_mtu
,
1310 .ndo_get_stats64
= ip_tunnel_get_stats64
,
1311 .ndo_get_iflink
= ip_tunnel_get_iflink
,
1312 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
1315 static void ipgre_tap_setup(struct net_device
*dev
)
1319 dev
->netdev_ops
= &gre_tap_netdev_ops
;
1320 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1321 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1322 ip_tunnel_setup(dev
, gre_tap_net_id
);
1325 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
,
1326 struct nlattr
*tb
[], struct nlattr
*data
[],
1327 struct netlink_ext_ack
*extack
)
1329 struct ip_tunnel_parm p
;
1330 struct ip_tunnel_encap ipencap
;
1334 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1335 struct ip_tunnel
*t
= netdev_priv(dev
);
1336 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1342 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1345 return ip_tunnel_newlink(dev
, tb
, &p
, fwmark
);
1348 static int ipgre_changelink(struct net_device
*dev
, struct nlattr
*tb
[],
1349 struct nlattr
*data
[],
1350 struct netlink_ext_ack
*extack
)
1352 struct ip_tunnel
*t
= netdev_priv(dev
);
1353 struct ip_tunnel_encap ipencap
;
1354 __u32 fwmark
= t
->fwmark
;
1355 struct ip_tunnel_parm p
;
1358 if (ipgre_netlink_encap_parms(data
, &ipencap
)) {
1359 err
= ip_tunnel_encap_setup(t
, &ipencap
);
1365 err
= ipgre_netlink_parms(dev
, data
, tb
, &p
, &fwmark
);
1369 err
= ip_tunnel_changelink(dev
, tb
, &p
, fwmark
);
1373 t
->parms
.i_flags
= p
.i_flags
;
1374 t
->parms
.o_flags
= p
.o_flags
;
1376 if (strcmp(dev
->rtnl_link_ops
->kind
, "erspan"))
1377 ipgre_link_update(dev
, !tb
[IFLA_MTU
]);
1382 static size_t ipgre_get_size(const struct net_device
*dev
)
1387 /* IFLA_GRE_IFLAGS */
1389 /* IFLA_GRE_OFLAGS */
1395 /* IFLA_GRE_LOCAL */
1397 /* IFLA_GRE_REMOTE */
1403 /* IFLA_GRE_PMTUDISC */
1405 /* IFLA_GRE_ENCAP_TYPE */
1407 /* IFLA_GRE_ENCAP_FLAGS */
1409 /* IFLA_GRE_ENCAP_SPORT */
1411 /* IFLA_GRE_ENCAP_DPORT */
1413 /* IFLA_GRE_COLLECT_METADATA */
1415 /* IFLA_GRE_IGNORE_DF */
1417 /* IFLA_GRE_FWMARK */
1419 /* IFLA_GRE_ERSPAN_INDEX */
1424 static int ipgre_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1426 struct ip_tunnel
*t
= netdev_priv(dev
);
1427 struct ip_tunnel_parm
*p
= &t
->parms
;
1429 if (nla_put_u32(skb
, IFLA_GRE_LINK
, p
->link
) ||
1430 nla_put_be16(skb
, IFLA_GRE_IFLAGS
,
1431 gre_tnl_flags_to_gre_flags(p
->i_flags
)) ||
1432 nla_put_be16(skb
, IFLA_GRE_OFLAGS
,
1433 gre_tnl_flags_to_gre_flags(p
->o_flags
)) ||
1434 nla_put_be32(skb
, IFLA_GRE_IKEY
, p
->i_key
) ||
1435 nla_put_be32(skb
, IFLA_GRE_OKEY
, p
->o_key
) ||
1436 nla_put_in_addr(skb
, IFLA_GRE_LOCAL
, p
->iph
.saddr
) ||
1437 nla_put_in_addr(skb
, IFLA_GRE_REMOTE
, p
->iph
.daddr
) ||
1438 nla_put_u8(skb
, IFLA_GRE_TTL
, p
->iph
.ttl
) ||
1439 nla_put_u8(skb
, IFLA_GRE_TOS
, p
->iph
.tos
) ||
1440 nla_put_u8(skb
, IFLA_GRE_PMTUDISC
,
1441 !!(p
->iph
.frag_off
& htons(IP_DF
))) ||
1442 nla_put_u32(skb
, IFLA_GRE_FWMARK
, t
->fwmark
))
1443 goto nla_put_failure
;
1445 if (nla_put_u16(skb
, IFLA_GRE_ENCAP_TYPE
,
1447 nla_put_be16(skb
, IFLA_GRE_ENCAP_SPORT
,
1449 nla_put_be16(skb
, IFLA_GRE_ENCAP_DPORT
,
1451 nla_put_u16(skb
, IFLA_GRE_ENCAP_FLAGS
,
1453 goto nla_put_failure
;
1455 if (nla_put_u8(skb
, IFLA_GRE_IGNORE_DF
, t
->ignore_df
))
1456 goto nla_put_failure
;
1458 if (t
->collect_md
) {
1459 if (nla_put_flag(skb
, IFLA_GRE_COLLECT_METADATA
))
1460 goto nla_put_failure
;
1464 if (nla_put_u32(skb
, IFLA_GRE_ERSPAN_INDEX
, t
->index
))
1465 goto nla_put_failure
;
1473 static void erspan_setup(struct net_device
*dev
)
1476 dev
->netdev_ops
= &erspan_netdev_ops
;
1477 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
1478 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
1479 ip_tunnel_setup(dev
, erspan_net_id
);
1482 static const struct nla_policy ipgre_policy
[IFLA_GRE_MAX
+ 1] = {
1483 [IFLA_GRE_LINK
] = { .type
= NLA_U32
},
1484 [IFLA_GRE_IFLAGS
] = { .type
= NLA_U16
},
1485 [IFLA_GRE_OFLAGS
] = { .type
= NLA_U16
},
1486 [IFLA_GRE_IKEY
] = { .type
= NLA_U32
},
1487 [IFLA_GRE_OKEY
] = { .type
= NLA_U32
},
1488 [IFLA_GRE_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
1489 [IFLA_GRE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
1490 [IFLA_GRE_TTL
] = { .type
= NLA_U8
},
1491 [IFLA_GRE_TOS
] = { .type
= NLA_U8
},
1492 [IFLA_GRE_PMTUDISC
] = { .type
= NLA_U8
},
1493 [IFLA_GRE_ENCAP_TYPE
] = { .type
= NLA_U16
},
1494 [IFLA_GRE_ENCAP_FLAGS
] = { .type
= NLA_U16
},
1495 [IFLA_GRE_ENCAP_SPORT
] = { .type
= NLA_U16
},
1496 [IFLA_GRE_ENCAP_DPORT
] = { .type
= NLA_U16
},
1497 [IFLA_GRE_COLLECT_METADATA
] = { .type
= NLA_FLAG
},
1498 [IFLA_GRE_IGNORE_DF
] = { .type
= NLA_U8
},
1499 [IFLA_GRE_FWMARK
] = { .type
= NLA_U32
},
1500 [IFLA_GRE_ERSPAN_INDEX
] = { .type
= NLA_U32
},
1503 static struct rtnl_link_ops ipgre_link_ops __read_mostly
= {
1505 .maxtype
= IFLA_GRE_MAX
,
1506 .policy
= ipgre_policy
,
1507 .priv_size
= sizeof(struct ip_tunnel
),
1508 .setup
= ipgre_tunnel_setup
,
1509 .validate
= ipgre_tunnel_validate
,
1510 .newlink
= ipgre_newlink
,
1511 .changelink
= ipgre_changelink
,
1512 .dellink
= ip_tunnel_dellink
,
1513 .get_size
= ipgre_get_size
,
1514 .fill_info
= ipgre_fill_info
,
1515 .get_link_net
= ip_tunnel_get_link_net
,
1518 static struct rtnl_link_ops ipgre_tap_ops __read_mostly
= {
1520 .maxtype
= IFLA_GRE_MAX
,
1521 .policy
= ipgre_policy
,
1522 .priv_size
= sizeof(struct ip_tunnel
),
1523 .setup
= ipgre_tap_setup
,
1524 .validate
= ipgre_tap_validate
,
1525 .newlink
= ipgre_newlink
,
1526 .changelink
= ipgre_changelink
,
1527 .dellink
= ip_tunnel_dellink
,
1528 .get_size
= ipgre_get_size
,
1529 .fill_info
= ipgre_fill_info
,
1530 .get_link_net
= ip_tunnel_get_link_net
,
1533 static struct rtnl_link_ops erspan_link_ops __read_mostly
= {
1535 .maxtype
= IFLA_GRE_MAX
,
1536 .policy
= ipgre_policy
,
1537 .priv_size
= sizeof(struct ip_tunnel
),
1538 .setup
= erspan_setup
,
1539 .validate
= erspan_validate
,
1540 .newlink
= ipgre_newlink
,
1541 .changelink
= ipgre_changelink
,
1542 .dellink
= ip_tunnel_dellink
,
1543 .get_size
= ipgre_get_size
,
1544 .fill_info
= ipgre_fill_info
,
1545 .get_link_net
= ip_tunnel_get_link_net
,
1548 struct net_device
*gretap_fb_dev_create(struct net
*net
, const char *name
,
1549 u8 name_assign_type
)
1551 struct nlattr
*tb
[IFLA_MAX
+ 1];
1552 struct net_device
*dev
;
1553 LIST_HEAD(list_kill
);
1554 struct ip_tunnel
*t
;
1557 memset(&tb
, 0, sizeof(tb
));
1559 dev
= rtnl_create_link(net
, name
, name_assign_type
,
1560 &ipgre_tap_ops
, tb
);
1564 /* Configure flow based GRE device. */
1565 t
= netdev_priv(dev
);
1566 t
->collect_md
= true;
1568 err
= ipgre_newlink(net
, dev
, tb
, NULL
, NULL
);
1571 return ERR_PTR(err
);
1574 /* openvswitch users expect packet sizes to be unrestricted,
1575 * so set the largest MTU we can.
1577 err
= __ip_tunnel_change_mtu(dev
, IP_MAX_MTU
, false);
1581 err
= rtnl_configure_link(dev
, NULL
);
1587 ip_tunnel_dellink(dev
, &list_kill
);
1588 unregister_netdevice_many(&list_kill
);
1589 return ERR_PTR(err
);
1591 EXPORT_SYMBOL_GPL(gretap_fb_dev_create
);
1593 static int __net_init
ipgre_tap_init_net(struct net
*net
)
1595 return ip_tunnel_init_net(net
, gre_tap_net_id
, &ipgre_tap_ops
, "gretap0");
1598 static void __net_exit
ipgre_tap_exit_batch_net(struct list_head
*list_net
)
1600 ip_tunnel_delete_nets(list_net
, gre_tap_net_id
, &ipgre_tap_ops
);
1603 static struct pernet_operations ipgre_tap_net_ops
= {
1604 .init
= ipgre_tap_init_net
,
1605 .exit_batch
= ipgre_tap_exit_batch_net
,
1606 .id
= &gre_tap_net_id
,
1607 .size
= sizeof(struct ip_tunnel_net
),
1610 static int __net_init
erspan_init_net(struct net
*net
)
1612 return ip_tunnel_init_net(net
, erspan_net_id
,
1613 &erspan_link_ops
, "erspan0");
1616 static void __net_exit
erspan_exit_batch_net(struct list_head
*net_list
)
1618 ip_tunnel_delete_nets(net_list
, erspan_net_id
, &erspan_link_ops
);
1621 static struct pernet_operations erspan_net_ops
= {
1622 .init
= erspan_init_net
,
1623 .exit_batch
= erspan_exit_batch_net
,
1624 .id
= &erspan_net_id
,
1625 .size
= sizeof(struct ip_tunnel_net
),
1628 static int __init
ipgre_init(void)
1632 pr_info("GRE over IPv4 tunneling driver\n");
1634 err
= register_pernet_device(&ipgre_net_ops
);
1638 err
= register_pernet_device(&ipgre_tap_net_ops
);
1640 goto pnet_tap_failed
;
1642 err
= register_pernet_device(&erspan_net_ops
);
1644 goto pnet_erspan_failed
;
1646 err
= gre_add_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1648 pr_info("%s: can't add protocol\n", __func__
);
1649 goto add_proto_failed
;
1652 err
= rtnl_link_register(&ipgre_link_ops
);
1654 goto rtnl_link_failed
;
1656 err
= rtnl_link_register(&ipgre_tap_ops
);
1658 goto tap_ops_failed
;
1660 err
= rtnl_link_register(&erspan_link_ops
);
1662 goto erspan_link_failed
;
1667 rtnl_link_unregister(&ipgre_tap_ops
);
1669 rtnl_link_unregister(&ipgre_link_ops
);
1671 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1673 unregister_pernet_device(&erspan_net_ops
);
1675 unregister_pernet_device(&ipgre_tap_net_ops
);
1677 unregister_pernet_device(&ipgre_net_ops
);
1681 static void __exit
ipgre_fini(void)
1683 rtnl_link_unregister(&ipgre_tap_ops
);
1684 rtnl_link_unregister(&ipgre_link_ops
);
1685 rtnl_link_unregister(&erspan_link_ops
);
1686 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1687 unregister_pernet_device(&ipgre_tap_net_ops
);
1688 unregister_pernet_device(&ipgre_net_ops
);
1689 unregister_pernet_device(&erspan_net_ops
);
1692 module_init(ipgre_init
);
1693 module_exit(ipgre_fini
);
1694 MODULE_LICENSE("GPL");
1695 MODULE_ALIAS_RTNL_LINK("gre");
1696 MODULE_ALIAS_RTNL_LINK("gretap");
1697 MODULE_ALIAS_RTNL_LINK("erspan");
1698 MODULE_ALIAS_NETDEV("gre0");
1699 MODULE_ALIAS_NETDEV("gretap0");
1700 MODULE_ALIAS_NETDEV("erspan0");