2 * Linux NET3: GRE over IP protocol decoder.
4 * Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #ifndef USE_UPSTREAM_TUNNEL
16 #include <linux/capability.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/kconfig.h>
21 #include <linux/slab.h>
22 #include <linux/uaccess.h>
23 #include <linux/skbuff.h>
24 #include <linux/netdevice.h>
25 #include <linux/netdev_features.h>
27 #include <linux/tcp.h>
28 #include <linux/udp.h>
29 #include <linux/if_arp.h>
30 #include <linux/mroute.h>
31 #include <linux/if_vlan.h>
32 #include <linux/init.h>
33 #include <linux/in6.h>
34 #include <linux/inetdevice.h>
35 #include <linux/igmp.h>
36 #include <linux/netfilter_ipv4.h>
37 #include <linux/etherdevice.h>
38 #include <linux/if_ether.h>
43 #include <net/protocol.h>
44 #include <net/ip_tunnels.h>
46 #include <net/checksum.h>
47 #include <net/dsfield.h>
48 #include <net/inet_ecn.h>
50 #include <net/net_namespace.h>
51 #include <net/netns/generic.h>
52 #include <net/rtnetlink.h>
54 #include <net/dst_metadata.h>
55 #include <net/erspan.h>
57 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ip6_fib.h>
60 #include <net/ip6_route.h>
64 #include "vport-netdev.h"
66 static int gre_tap_net_id __read_mostly
;
67 static unsigned int erspan_net_id __read_mostly
;
68 static void erspan_build_header(struct sk_buff
*skb
,
70 bool truncate
, bool is_ipv4
);
72 static bool ip_gre_loaded
= false;
74 /* Normally in net/core/dst.c but move it here */
75 struct dst_ops md_dst_ops
= {
79 #ifndef ip_gre_calc_hlen
80 #define ip_gre_calc_hlen gre_calc_hlen
83 static int erspan_rcv(struct sk_buff
*skb
, struct tnl_ptk_info
*tpi
,
86 struct net
*net
= dev_net(skb
->dev
);
87 struct metadata_dst
*tun_dst
= NULL
;
88 struct erspan_base_hdr
*ershdr
;
89 struct erspan_metadata
*pkt_md
;
90 struct ip_tunnel_net
*itn
;
91 struct ip_tunnel
*tunnel
;
92 const struct iphdr
*iph
;
93 struct erspan_md2
*md2
;
97 itn
= net_generic(net
, erspan_net_id
);
98 len
= gre_hdr_len
+ sizeof(*ershdr
);
100 /* Check based hdr len */
101 if (unlikely(!pskb_may_pull(skb
, len
)))
102 return PACKET_REJECT
;
105 ershdr
= (struct erspan_base_hdr
*)(skb
->data
+ gre_hdr_len
);
108 /* The original GRE header does not have key field,
109 * Use ERSPAN 10-bit session ID as key.
111 tpi
->key
= cpu_to_be32(get_session_id(ershdr
));
112 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
,
114 iph
->saddr
, iph
->daddr
, tpi
->key
);
117 len
= gre_hdr_len
+ erspan_hdr_len(ver
);
118 if (unlikely(!pskb_may_pull(skb
, len
)))
119 return PACKET_REJECT
;
121 ershdr
= (struct erspan_base_hdr
*)skb
->data
;
122 pkt_md
= (struct erspan_metadata
*)(ershdr
+ 1);
124 if (__iptunnel_pull_header(skb
,
130 if (tunnel
->collect_md
) {
131 struct ip_tunnel_info
*info
;
132 struct erspan_metadata
*md
;
136 tpi
->flags
|= TUNNEL_KEY
;
138 tun_id
= key32_to_tunnel_id(tpi
->key
);
140 tun_dst
= rpl_ip_tun_rx_dst(skb
, flags
, tun_id
, sizeof(*md
));
142 return PACKET_REJECT
;
144 md
= ip_tunnel_info_opts(&tun_dst
->u
.tun_info
);
147 memcpy(md2
, pkt_md
, ver
== 1 ? ERSPAN_V1_MDSIZE
:
150 info
= &tun_dst
->u
.tun_info
;
151 info
->key
.tun_flags
|= TUNNEL_ERSPAN_OPT
;
152 info
->options_len
= sizeof(*md
);
155 skb_reset_mac_header(skb
);
156 ovs_ip_tunnel_rcv(tunnel
->dev
, skb
, tun_dst
);
166 static int __ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
167 struct ip_tunnel_net
*itn
, int hdr_len
, bool raw_proto
)
169 struct metadata_dst tun_dst
;
170 const struct iphdr
*iph
;
171 struct ip_tunnel
*tunnel
;
174 tunnel
= ip_tunnel_lookup(itn
, skb
->dev
->ifindex
, tpi
->flags
,
175 iph
->saddr
, iph
->daddr
, tpi
->key
);
178 if (__iptunnel_pull_header(skb
, hdr_len
, tpi
->proto
,
179 raw_proto
, false) < 0)
182 if (tunnel
->dev
->type
!= ARPHRD_NONE
)
183 skb_pop_mac_header(skb
);
185 skb_reset_mac_header(skb
);
186 if (tunnel
->collect_md
) {
190 flags
= tpi
->flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
191 tun_id
= key32_to_tunnel_id(tpi
->key
);
192 ovs_ip_tun_rx_dst(&tun_dst
, skb
, flags
, tun_id
, 0);
195 ovs_ip_tunnel_rcv(tunnel
->dev
, skb
, &tun_dst
);
206 static int ipgre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*tpi
,
209 struct net
*net
= dev_net(skb
->dev
);
210 struct ip_tunnel_net
*itn
;
213 if (tpi
->proto
== htons(ETH_P_TEB
))
214 itn
= net_generic(net
, gre_tap_net_id
);
215 else if (tpi
->proto
== htons(ETH_P_ERSPAN
) ||
216 tpi
->proto
== htons(ETH_P_ERSPAN2
))
217 itn
= net_generic(net
, erspan_net_id
);
221 res
= __ipgre_rcv(skb
, tpi
, itn
, hdr_len
, false);
226 static void __gre_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
227 const struct iphdr
*tnl_params
,
230 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
231 struct tnl_ptk_info tpi
;
233 tpi
.flags
= tunnel
->parms
.o_flags
;
235 tpi
.key
= tunnel
->parms
.o_key
;
236 if (tunnel
->parms
.o_flags
& TUNNEL_SEQ
)
238 tpi
.seq
= htonl(tunnel
->o_seqno
);
240 /* Push GRE header. */
241 gre_build_header(skb
, &tpi
, tunnel
->hlen
);
243 ip_tunnel_xmit(skb
, dev
, tnl_params
, tnl_params
->protocol
);
246 static int gre_rcv(struct sk_buff
*skb
, const struct tnl_ptk_info
*unused_tpi
)
248 struct tnl_ptk_info tpi
;
249 bool csum_err
= false;
252 hdr_len
= gre_parse_header(skb
, &tpi
, &csum_err
, htons(ETH_P_IP
), 0);
256 if (unlikely(tpi
.proto
== htons(ETH_P_ERSPAN
) ||
257 tpi
.proto
== htons(ETH_P_ERSPAN2
))) {
258 if (erspan_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
263 if (ipgre_rcv(skb
, &tpi
, hdr_len
) == PACKET_RCVD
)
271 #if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
273 /* gre_handle_offloads() has different return type on older kernsl. */
274 static void gre_nop_fix(struct sk_buff
*skb
) { }
276 static void gre_csum_fix(struct sk_buff
*skb
)
278 struct gre_base_hdr
*greh
;
280 int gre_offset
= skb_transport_offset(skb
);
282 greh
= (struct gre_base_hdr
*)skb_transport_header(skb
);
283 options
= ((__be32
*)greh
+ 1);
286 *(__sum16
*)options
= csum_fold(skb_checksum(skb
, gre_offset
,
287 skb
->len
- gre_offset
, 0));
290 #define gre_handle_offloads rpl_gre_handle_offloads
291 static int rpl_gre_handle_offloads(struct sk_buff
*skb
, bool gre_csum
)
293 int type
= gre_csum
? SKB_GSO_GRE_CSUM
: SKB_GSO_GRE
;
294 gso_fix_segment_t fix_segment
;
297 fix_segment
= gre_csum_fix
;
299 fix_segment
= gre_nop_fix
;
301 return ovs_iptunnel_handle_offloads(skb
, type
, fix_segment
);
304 static int gre_handle_offloads(struct sk_buff
*skb
, bool csum
)
306 return iptunnel_handle_offloads(skb
, csum
,
307 csum
? SKB_GSO_GRE_CSUM
: SKB_GSO_GRE
);
311 static bool is_gre_gso(struct sk_buff
*skb
)
313 return skb_shinfo(skb
)->gso_type
&
314 (SKB_GSO_GRE
| SKB_GSO_GRE_CSUM
);
317 static void build_header(struct sk_buff
*skb
, int hdr_len
, __be16 flags
,
318 __be16 proto
, __be32 key
, __be32 seq
)
320 struct gre_base_hdr
*greh
;
322 skb_push(skb
, hdr_len
);
324 skb_reset_transport_header(skb
);
325 greh
= (struct gre_base_hdr
*)skb
->data
;
326 greh
->flags
= tnl_flags_to_gre_flags(flags
);
327 greh
->protocol
= proto
;
329 if (flags
& (TUNNEL_KEY
| TUNNEL_CSUM
| TUNNEL_SEQ
)) {
330 __be32
*ptr
= (__be32
*)(((u8
*)greh
) + hdr_len
- 4);
332 if (flags
& TUNNEL_SEQ
) {
336 if (flags
& TUNNEL_KEY
) {
340 if (flags
& TUNNEL_CSUM
&& !is_gre_gso(skb
)) {
342 *(__sum16
*)ptr
= csum_fold(skb_checksum(skb
, 0,
346 ovs_skb_set_inner_protocol(skb
, proto
);
349 static struct rtable
*gre_get_rt(struct sk_buff
*skb
,
350 struct net_device
*dev
,
352 const struct ip_tunnel_key
*key
)
354 struct net
*net
= dev_net(dev
);
356 memset(fl
, 0, sizeof(*fl
));
357 fl
->daddr
= key
->u
.ipv4
.dst
;
358 fl
->saddr
= key
->u
.ipv4
.src
;
359 fl
->flowi4_tos
= RT_TOS(key
->tos
);
360 fl
->flowi4_mark
= skb
->mark
;
361 fl
->flowi4_proto
= IPPROTO_GRE
;
363 return ip_route_output_key(net
, fl
);
366 static struct rtable
*prepare_fb_xmit(struct sk_buff
*skb
,
367 struct net_device
*dev
,
371 struct ip_tunnel_info
*tun_info
;
372 const struct ip_tunnel_key
*key
;
373 struct rtable
*rt
= NULL
;
378 tun_info
= skb_tunnel_info(skb
);
379 key
= &tun_info
->key
;
380 use_cache
= ip_tunnel_dst_cache_usable(skb
, tun_info
);
383 rt
= dst_cache_get_ip4(&tun_info
->dst_cache
, &fl
->saddr
);
385 rt
= gre_get_rt(skb
, dev
, fl
, key
);
389 dst_cache_set_ip4(&tun_info
->dst_cache
, &rt
->dst
,
393 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
394 + tunnel_hlen
+ sizeof(struct iphdr
);
395 if (skb_headroom(skb
) < min_headroom
|| skb_header_cloned(skb
)) {
396 int head_delta
= SKB_DATA_ALIGN(min_headroom
-
399 err
= pskb_expand_head(skb
, max_t(int, head_delta
, 0),
410 dev
->stats
.tx_dropped
++;
414 netdev_tx_t
rpl_gre_fb_xmit(struct sk_buff
*skb
)
416 struct net_device
*dev
= skb
->dev
;
417 struct ip_tunnel_info
*tun_info
;
418 const struct ip_tunnel_key
*key
;
426 tun_info
= skb_tunnel_info(skb
);
427 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
428 ip_tunnel_info_af(tun_info
) != AF_INET
))
431 key
= &tun_info
->key
;
433 rt
= gre_get_rt(skb
, dev
, &fl
, key
);
437 tunnel_hlen
= ip_gre_calc_hlen(key
->tun_flags
);
439 min_headroom
= LL_RESERVED_SPACE(rt
->dst
.dev
) + rt
->dst
.header_len
440 + tunnel_hlen
+ sizeof(struct iphdr
)
441 + (skb_vlan_tag_present(skb
) ? VLAN_HLEN
: 0);
442 if (skb_headroom(skb
) < min_headroom
|| skb_header_cloned(skb
)) {
443 int head_delta
= SKB_DATA_ALIGN(min_headroom
-
446 err
= pskb_expand_head(skb
, max_t(int, head_delta
, 0),
452 if (skb_vlan_tag_present(skb
)) {
453 skb
= __vlan_hwaccel_push_inside(skb
);
454 if (unlikely(!skb
)) {
460 /* Push Tunnel header. */
461 err
= gre_handle_offloads(skb
, !!(tun_info
->key
.tun_flags
& TUNNEL_CSUM
));
465 flags
= tun_info
->key
.tun_flags
& (TUNNEL_CSUM
| TUNNEL_KEY
);
466 build_header(skb
, tunnel_hlen
, flags
, htons(ETH_P_TEB
),
467 tunnel_id_to_key32(tun_info
->key
.tun_id
), 0);
469 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
470 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
471 key
->tos
, key
->ttl
, df
, false);
478 dev
->stats
.tx_dropped
++;
481 EXPORT_SYMBOL(rpl_gre_fb_xmit
);
483 static void erspan_fb_xmit(struct sk_buff
*skb
, struct net_device
*dev
,
486 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
487 struct ip_tunnel_info
*tun_info
;
488 const struct ip_tunnel_key
*key
;
489 struct erspan_metadata
*md
;
490 struct rtable
*rt
= NULL
;
491 struct tnl_ptk_info tpi
;
492 bool truncate
= false;
500 tun_info
= skb_tunnel_info(skb
);
501 if (unlikely(!tun_info
|| !(tun_info
->mode
& IP_TUNNEL_INFO_TX
) ||
502 ip_tunnel_info_af(tun_info
) != AF_INET
))
505 key
= &tun_info
->key
;
506 if (!(tun_info
->key
.tun_flags
& TUNNEL_ERSPAN_OPT
))
508 md
= ip_tunnel_info_opts(tun_info
);
512 /* ERSPAN has fixed 8 byte GRE header */
513 version
= md
->version
;
514 tunnel_hlen
= 8 + erspan_hdr_len(version
);
516 rt
= prepare_fb_xmit(skb
, dev
, &fl
, tunnel_hlen
);
520 if (gre_handle_offloads(skb
, false))
523 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
524 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
528 nhoff
= skb_network_header(skb
) - skb_mac_header(skb
);
529 if (skb
->protocol
== htons(ETH_P_IP
) &&
530 (ntohs(ip_hdr(skb
)->tot_len
) > skb
->len
- nhoff
))
533 thoff
= skb_transport_header(skb
) - skb_mac_header(skb
);
534 if (skb
->protocol
== htons(ETH_P_IPV6
) &&
535 (ntohs(ipv6_hdr(skb
)->payload_len
) > skb
->len
- thoff
))
539 erspan_build_header(skb
, ntohl(tunnel_id_to_key32(key
->tun_id
)),
540 ntohl(md
->u
.index
), truncate
, true);
541 tpi
.hdr_len
= ERSPAN_V1_MDSIZE
;
542 tpi
.proto
= htons(ETH_P_ERSPAN
);
543 } else if (version
== 2) {
544 erspan_build_header_v2(skb
,
545 ntohl(tunnel_id_to_key32(key
->tun_id
)),
547 get_hwid(&md
->u
.md2
),
549 tpi
.hdr_len
= ERSPAN_V2_MDSIZE
;
550 tpi
.proto
= htons(ETH_P_ERSPAN2
);
555 tpi
.flags
= TUNNEL_SEQ
;
556 tpi
.key
= tunnel_id_to_key32(key
->tun_id
);
557 tpi
.seq
= htonl(tunnel
->o_seqno
++);
559 gre_build_header(skb
, &tpi
, 8);
561 df
= key
->tun_flags
& TUNNEL_DONT_FRAGMENT
? htons(IP_DF
) : 0;
563 iptunnel_xmit(skb
->sk
, rt
, skb
, fl
.saddr
, key
->u
.ipv4
.dst
, IPPROTO_GRE
,
564 key
->tos
, key
->ttl
, df
, false);
571 dev
->stats
.tx_dropped
++;
574 #define GRE_FEATURES (NETIF_F_SG | \
580 static void __gre_tunnel_init(struct net_device
*dev
)
582 struct ip_tunnel
*tunnel
;
584 tunnel
= netdev_priv(dev
);
585 tunnel
->tun_hlen
= ip_gre_calc_hlen(tunnel
->parms
.o_flags
);
586 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
588 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
;
590 dev
->features
|= GRE_FEATURES
;
591 dev
->hw_features
|= GRE_FEATURES
;
593 if (!(tunnel
->parms
.o_flags
& TUNNEL_SEQ
)) {
594 /* TCP offload with GRE SEQ is not supported, nor
595 * can we support 2 levels of outer headers requiring
598 if (!(tunnel
->parms
.o_flags
& TUNNEL_CSUM
) ||
599 (tunnel
->encap
.type
== TUNNEL_ENCAP_NONE
)) {
600 dev
->features
|= NETIF_F_GSO_SOFTWARE
;
601 dev
->hw_features
|= NETIF_F_GSO_SOFTWARE
;
604 /* Can use a lockless transmit, unless we generate
607 dev
->features
|= NETIF_F_LLTX
;
611 static int __gre_rcv(struct sk_buff
*skb
)
613 return gre_rcv(skb
, NULL
);
616 void __gre_err(struct sk_buff
*skb
, u32 info
)
618 pr_warn("%s: GRE receive error\n", __func__
);
621 static const struct gre_protocol ipgre_protocol
= {
622 .handler
= __gre_rcv
,
623 .err_handler
= __gre_err
,
626 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
627 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
628 struct netlink_ext_ack
*extack
)
630 static int ipgre_tunnel_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
639 if (data
[IFLA_GRE_IFLAGS
])
640 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
641 if (data
[IFLA_GRE_OFLAGS
])
642 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
643 if (flags
& (GRE_VERSION
|GRE_ROUTING
))
649 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
650 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
651 struct netlink_ext_ack
*extack
)
653 static int ipgre_tap_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
658 if (tb
[IFLA_ADDRESS
]) {
659 if (nla_len(tb
[IFLA_ADDRESS
]) != ETH_ALEN
)
661 if (!is_valid_ether_addr(nla_data(tb
[IFLA_ADDRESS
])))
662 return -EADDRNOTAVAIL
;
668 if (data
[IFLA_GRE_REMOTE
]) {
669 memcpy(&daddr
, nla_data(data
[IFLA_GRE_REMOTE
]), 4);
675 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
676 return ipgre_tunnel_validate(tb
, data
, NULL
);
678 return ipgre_tunnel_validate(tb
, data
);
683 #ifndef HAVE_IFLA_GRE_ENCAP_DPORT
684 IFLA_GRE_ENCAP_TYPE
= IFLA_GRE_FLAGS
+ 1,
685 IFLA_GRE_ENCAP_FLAGS
,
686 IFLA_GRE_ENCAP_SPORT
,
687 IFLA_GRE_ENCAP_DPORT
,
689 #ifndef HAVE_IFLA_GRE_COLLECT_METADATA
690 IFLA_GRE_COLLECT_METADATA
= IFLA_GRE_ENCAP_DPORT
+ 1,
692 #ifndef HAVE_IFLA_GRE_IGNORE_DF
693 IFLA_GRE_IGNORE_DF
= IFLA_GRE_COLLECT_METADATA
+ 1,
695 #ifndef HAVE_IFLA_GRE_FWMARK
696 IFLA_GRE_FWMARK
= IFLA_GRE_IGNORE_DF
+ 1,
698 #ifndef HAVE_IFLA_GRE_ERSPAN_INDEX
699 IFLA_GRE_ERSPAN_INDEX
= IFLA_GRE_FWMARK
+ 1,
701 #ifndef HAVE_IFLA_GRE_ERSPAN_HWID
702 IFLA_GRE_ERSPAN_VER
= IFLA_GRE_ERSPAN_INDEX
+ 1,
704 IFLA_GRE_ERSPAN_HWID
,
708 #define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
710 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
711 static int erspan_validate(struct nlattr
*tb
[], struct nlattr
*data
[],
712 struct netlink_ext_ack
*extack
)
714 static int erspan_validate(struct nlattr
*tb
[], struct nlattr
*data
[])
723 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
724 ret
= ipgre_tap_validate(tb
, data
, NULL
);
726 ret
= ipgre_tap_validate(tb
, data
);
731 /* ERSPAN should only have GRE sequence and key flag */
732 if (data
[IFLA_GRE_OFLAGS
])
733 flags
|= nla_get_be16(data
[IFLA_GRE_OFLAGS
]);
734 if (data
[IFLA_GRE_IFLAGS
])
735 flags
|= nla_get_be16(data
[IFLA_GRE_IFLAGS
]);
736 if (!data
[IFLA_GRE_COLLECT_METADATA
] &&
737 flags
!= (GRE_SEQ
| GRE_KEY
))
740 /* ERSPAN Session ID only has 10-bit. Since we reuse
741 * 32-bit key field as ID, check it's range.
743 if (data
[IFLA_GRE_OKEY
] &&
744 (ntohl(nla_get_be32(data
[IFLA_GRE_OKEY
])) & ~ID_MASK
))
750 static int ipgre_netlink_parms(struct net_device
*dev
,
751 struct nlattr
*data
[],
753 struct ip_tunnel_parm
*parms
)
755 struct ip_tunnel
*t
= netdev_priv(dev
);
757 memset(parms
, 0, sizeof(*parms
));
759 parms
->iph
.protocol
= IPPROTO_GRE
;
764 if (data
[IFLA_GRE_LINK
])
765 parms
->link
= nla_get_u32(data
[IFLA_GRE_LINK
]);
767 if (data
[IFLA_GRE_IFLAGS
])
768 parms
->i_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_IFLAGS
]));
770 if (data
[IFLA_GRE_OFLAGS
])
771 parms
->o_flags
= gre_flags_to_tnl_flags(nla_get_be16(data
[IFLA_GRE_OFLAGS
]));
773 if (data
[IFLA_GRE_IKEY
])
774 parms
->i_key
= nla_get_be32(data
[IFLA_GRE_IKEY
]);
776 if (data
[IFLA_GRE_OKEY
])
777 parms
->o_key
= nla_get_be32(data
[IFLA_GRE_OKEY
]);
779 if (data
[IFLA_GRE_LOCAL
])
780 parms
->iph
.saddr
= nla_get_in_addr(data
[IFLA_GRE_LOCAL
]);
782 if (data
[IFLA_GRE_REMOTE
])
783 parms
->iph
.daddr
= nla_get_in_addr(data
[IFLA_GRE_REMOTE
]);
785 if (data
[IFLA_GRE_TTL
])
786 parms
->iph
.ttl
= nla_get_u8(data
[IFLA_GRE_TTL
]);
788 if (data
[IFLA_GRE_TOS
])
789 parms
->iph
.tos
= nla_get_u8(data
[IFLA_GRE_TOS
]);
791 if (!data
[IFLA_GRE_PMTUDISC
] || nla_get_u8(data
[IFLA_GRE_PMTUDISC
])) {
794 parms
->iph
.frag_off
= htons(IP_DF
);
797 if (data
[IFLA_GRE_COLLECT_METADATA
]) {
798 t
->collect_md
= true;
799 if (dev
->type
== ARPHRD_IPGRE
)
800 dev
->type
= ARPHRD_NONE
;
803 if (data
[IFLA_GRE_IGNORE_DF
]) {
804 if (nla_get_u8(data
[IFLA_GRE_IGNORE_DF
])
805 && (parms
->iph
.frag_off
& htons(IP_DF
)))
807 t
->ignore_df
= !!nla_get_u8(data
[IFLA_GRE_IGNORE_DF
]);
810 if (data
[IFLA_GRE_ERSPAN_INDEX
]) {
811 t
->index
= nla_get_u32(data
[IFLA_GRE_ERSPAN_INDEX
]);
813 if (t
->index
& ~INDEX_MASK
)
820 static int gre_tap_init(struct net_device
*dev
)
822 __gre_tunnel_init(dev
);
823 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
825 return ip_tunnel_init(dev
);
828 static netdev_tx_t
gre_dev_xmit(struct sk_buff
*skb
, struct net_device
*dev
)
830 /* Drop All packets coming from networking stack. OVS-CB is
831 * not initialized for these packets.
835 dev
->stats
.tx_dropped
++;
839 static netdev_tx_t
erspan_xmit(struct sk_buff
*skb
,
840 struct net_device
*dev
)
842 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
843 bool truncate
= false;
845 if (tunnel
->collect_md
) {
846 erspan_fb_xmit(skb
, dev
, skb
->protocol
);
850 if (gre_handle_offloads(skb
, false))
853 if (skb_cow_head(skb
, dev
->needed_headroom
))
856 if (skb
->len
> dev
->mtu
+ dev
->hard_header_len
) {
857 pskb_trim(skb
, dev
->mtu
+ dev
->hard_header_len
);
861 /* Push ERSPAN header */
862 if (tunnel
->erspan_ver
== 1)
863 erspan_build_header(skb
, ntohl(tunnel
->parms
.o_key
),
866 else if (tunnel
->erspan_ver
== 2)
867 erspan_build_header_v2(skb
, ntohl(tunnel
->parms
.o_key
),
868 tunnel
->dir
, tunnel
->hwid
,
873 tunnel
->parms
.o_flags
&= ~TUNNEL_KEY
;
874 __gre_xmit(skb
, dev
, &tunnel
->parms
.iph
, htons(ETH_P_ERSPAN
));
879 dev
->stats
.tx_dropped
++;
883 static netdev_tx_t
__erspan_fb_xmit(struct sk_buff
*skb
)
885 erspan_fb_xmit(skb
, skb
->dev
, skb
->protocol
);
889 int ovs_gre_fill_metadata_dst(struct net_device
*dev
, struct sk_buff
*skb
)
891 struct ip_tunnel_info
*info
= skb_tunnel_info(skb
);
895 if (ip_tunnel_info_af(info
) != AF_INET
)
898 rt
= gre_get_rt(skb
, dev
, &fl4
, &info
->key
);
903 info
->key
.u
.ipv4
.src
= fl4
.saddr
;
906 EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst
);
908 static int erspan_tunnel_init(struct net_device
*dev
)
910 struct ip_tunnel
*tunnel
= netdev_priv(dev
);
912 tunnel
->tun_hlen
= 8;
913 tunnel
->parms
.iph
.protocol
= IPPROTO_GRE
;
914 tunnel
->hlen
= tunnel
->tun_hlen
+ tunnel
->encap_hlen
+
915 erspan_hdr_len(tunnel
->erspan_ver
);
917 dev
->features
|= GRE_FEATURES
;
918 dev
->hw_features
|= GRE_FEATURES
;
919 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
922 return ip_tunnel_init(dev
);
925 static const struct net_device_ops gre_tap_netdev_ops
= {
926 .ndo_init
= gre_tap_init
,
927 .ndo_uninit
= rpl_ip_tunnel_uninit
,
928 .ndo_start_xmit
= gre_dev_xmit
,
929 .ndo_set_mac_address
= eth_mac_addr
,
930 .ndo_validate_addr
= eth_validate_addr
,
931 #ifdef HAVE_RHEL7_MAX_MTU
932 .ndo_size
= sizeof(struct net_device_ops
),
933 .extended
.ndo_change_mtu
= ip_tunnel_change_mtu
,
935 .ndo_change_mtu
= ip_tunnel_change_mtu
,
937 .ndo_get_stats64
= ip_tunnel_get_stats64
,
938 #ifdef HAVE_NDO_GET_IFLINK
939 .ndo_get_iflink
= rpl_ip_tunnel_get_iflink
,
941 #ifdef HAVE_NDO_FILL_METADATA_DST
942 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
946 static const struct net_device_ops erspan_netdev_ops
= {
947 .ndo_init
= erspan_tunnel_init
,
948 .ndo_uninit
= rpl_ip_tunnel_uninit
,
949 .ndo_start_xmit
= erspan_xmit
,
950 .ndo_set_mac_address
= eth_mac_addr
,
951 .ndo_validate_addr
= eth_validate_addr
,
952 #ifdef HAVE_RHEL7_MAX_MTU
953 .ndo_size
= sizeof(struct net_device_ops
),
954 .extended
.ndo_change_mtu
= ip_tunnel_change_mtu
,
956 .ndo_change_mtu
= ip_tunnel_change_mtu
,
958 .ndo_get_stats64
= ip_tunnel_get_stats64
,
959 #ifdef HAVE_NDO_GET_IFLINK
960 .ndo_get_iflink
= rpl_ip_tunnel_get_iflink
,
962 #ifdef HAVE_NDO_FILL_METADATA_DST
963 .ndo_fill_metadata_dst
= gre_fill_metadata_dst
,
967 static void ipgre_tap_setup(struct net_device
*dev
)
970 #ifdef HAVE_NET_DEVICE_MAX_MTU
973 dev
->netdev_ops
= &gre_tap_netdev_ops
;
974 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
975 ip_tunnel_setup(dev
, gre_tap_net_id
);
978 static void erspan_setup(struct net_device
*dev
)
980 struct ip_tunnel
*t
= netdev_priv(dev
);
982 eth_hw_addr_random(dev
);
984 #ifdef HAVE_NET_DEVICE_MAX_MTU
987 dev
->netdev_ops
= &erspan_netdev_ops
;
988 dev
->priv_flags
&= ~IFF_TX_SKB_SHARING
;
989 dev
->priv_flags
|= IFF_LIVE_ADDR_CHANGE
;
990 ip_tunnel_setup(dev
, erspan_net_id
);
994 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
995 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
,
996 struct nlattr
*tb
[], struct nlattr
*data
[],
997 struct netlink_ext_ack
*extack
)
999 static int ipgre_newlink(struct net
*src_net
, struct net_device
*dev
,
1000 struct nlattr
*tb
[], struct nlattr
*data
[])
1003 struct ip_tunnel_parm p
;
1006 ipgre_netlink_parms(dev
, data
, tb
, &p
);
1007 err
= ip_tunnel_newlink(dev
, tb
, &p
);
1012 static size_t ipgre_get_size(const struct net_device
*dev
)
1017 /* IFLA_GRE_IFLAGS */
1019 /* IFLA_GRE_OFLAGS */
1025 /* IFLA_GRE_LOCAL */
1027 /* IFLA_GRE_REMOTE */
1033 /* IFLA_GRE_PMTUDISC */
1035 /* IFLA_GRE_ENCAP_TYPE */
1037 /* IFLA_GRE_ENCAP_FLAGS */
1039 /* IFLA_GRE_ENCAP_SPORT */
1041 /* IFLA_GRE_ENCAP_DPORT */
1043 /* IFLA_GRE_COLLECT_METADATA */
1045 /* IFLA_GRE_ERSPAN_INDEX */
1047 /* IFLA_GRE_ERSPAN_VER */
1049 /* IFLA_GRE_ERSPAN_DIR */
1051 /* IFLA_GRE_ERSPAN_HWID */
1056 static int ipgre_fill_info(struct sk_buff
*skb
, const struct net_device
*dev
)
1058 struct ip_tunnel
*t
= netdev_priv(dev
);
1059 struct ip_tunnel_parm
*p
= &t
->parms
;
1061 if (nla_put_u32(skb
, IFLA_GRE_LINK
, p
->link
) ||
1062 nla_put_be16(skb
, IFLA_GRE_IFLAGS
, tnl_flags_to_gre_flags(p
->i_flags
)) ||
1063 nla_put_be16(skb
, IFLA_GRE_OFLAGS
, tnl_flags_to_gre_flags(p
->o_flags
)) ||
1064 nla_put_be32(skb
, IFLA_GRE_IKEY
, p
->i_key
) ||
1065 nla_put_be32(skb
, IFLA_GRE_OKEY
, p
->o_key
) ||
1066 nla_put_in_addr(skb
, IFLA_GRE_LOCAL
, p
->iph
.saddr
) ||
1067 nla_put_in_addr(skb
, IFLA_GRE_REMOTE
, p
->iph
.daddr
) ||
1068 nla_put_u8(skb
, IFLA_GRE_TTL
, p
->iph
.ttl
) ||
1069 nla_put_u8(skb
, IFLA_GRE_TOS
, p
->iph
.tos
) ||
1070 nla_put_u8(skb
, IFLA_GRE_PMTUDISC
,
1071 !!(p
->iph
.frag_off
& htons(IP_DF
))))
1072 goto nla_put_failure
;
1074 if (nla_put_u8(skb
, IFLA_GRE_ERSPAN_VER
, t
->erspan_ver
))
1075 goto nla_put_failure
;
1077 if (t
->erspan_ver
== 1) {
1078 if (nla_put_u32(skb
, IFLA_GRE_ERSPAN_INDEX
, t
->index
))
1079 goto nla_put_failure
;
1080 } else if (t
->erspan_ver
== 2) {
1081 if (nla_put_u8(skb
, IFLA_GRE_ERSPAN_DIR
, t
->dir
))
1082 goto nla_put_failure
;
1083 if (nla_put_u16(skb
, IFLA_GRE_ERSPAN_HWID
, t
->hwid
))
1084 goto nla_put_failure
;
1093 static const struct nla_policy ipgre_policy
[RPL_IFLA_GRE_MAX
+ 1] = {
1094 [IFLA_GRE_LINK
] = { .type
= NLA_U32
},
1095 [IFLA_GRE_IFLAGS
] = { .type
= NLA_U16
},
1096 [IFLA_GRE_OFLAGS
] = { .type
= NLA_U16
},
1097 [IFLA_GRE_IKEY
] = { .type
= NLA_U32
},
1098 [IFLA_GRE_OKEY
] = { .type
= NLA_U32
},
1099 [IFLA_GRE_LOCAL
] = { .len
= FIELD_SIZEOF(struct iphdr
, saddr
) },
1100 [IFLA_GRE_REMOTE
] = { .len
= FIELD_SIZEOF(struct iphdr
, daddr
) },
1101 [IFLA_GRE_TTL
] = { .type
= NLA_U8
},
1102 [IFLA_GRE_TOS
] = { .type
= NLA_U8
},
1103 [IFLA_GRE_PMTUDISC
] = { .type
= NLA_U8
},
1104 [IFLA_GRE_ERSPAN_INDEX
] = { .type
= NLA_U32
},
1105 [IFLA_GRE_ERSPAN_VER
] = { .type
= NLA_U8
},
1106 [IFLA_GRE_ERSPAN_DIR
] = { .type
= NLA_U8
},
1107 [IFLA_GRE_ERSPAN_HWID
] = { .type
= NLA_U16
},
1110 static struct rtnl_link_ops ipgre_tap_ops __read_mostly
= {
1111 .kind
= "ovs_gretap",
1112 .maxtype
= RPL_IFLA_GRE_MAX
,
1113 .policy
= ipgre_policy
,
1114 .priv_size
= sizeof(struct ip_tunnel
),
1115 .setup
= ipgre_tap_setup
,
1116 .validate
= ipgre_tap_validate
,
1117 .newlink
= ipgre_newlink
,
1118 .dellink
= ip_tunnel_dellink
,
1119 .get_size
= ipgre_get_size
,
1120 .fill_info
= ipgre_fill_info
,
1121 #ifdef HAVE_GET_LINK_NET
1122 .get_link_net
= ip_tunnel_get_link_net
,
1126 static struct rtnl_link_ops erspan_link_ops __read_mostly
= {
1128 .maxtype
= RPL_IFLA_GRE_MAX
,
1129 .policy
= ipgre_policy
,
1130 .priv_size
= sizeof(struct ip_tunnel
),
1131 .setup
= erspan_setup
,
1132 .validate
= erspan_validate
,
1133 .newlink
= ipgre_newlink
,
1134 .dellink
= ip_tunnel_dellink
,
1135 .get_size
= ipgre_get_size
,
1136 .fill_info
= ipgre_fill_info
,
1137 #ifdef HAVE_GET_LINK_NET
1138 .get_link_net
= ip_tunnel_get_link_net
,
1142 struct net_device
*rpl_gretap_fb_dev_create(struct net
*net
, const char *name
,
1143 u8 name_assign_type
)
1145 struct nlattr
*tb
[IFLA_MAX
+ 1];
1146 struct net_device
*dev
;
1147 LIST_HEAD(list_kill
);
1148 struct ip_tunnel
*t
;
1151 memset(&tb
, 0, sizeof(tb
));
1153 dev
= rtnl_create_link(net
, (char *)name
, name_assign_type
,
1154 &ipgre_tap_ops
, tb
);
1158 t
= netdev_priv(dev
);
1159 t
->collect_md
= true;
1160 /* Configure flow based GRE device. */
1161 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1162 err
= ipgre_newlink(net
, dev
, tb
, NULL
, NULL
);
1164 err
= ipgre_newlink(net
, dev
, tb
, NULL
);
1168 return ERR_PTR(err
);
1171 /* openvswitch users expect packet sizes to be unrestricted,
1172 * so set the largest MTU we can.
1174 err
= __ip_tunnel_change_mtu(dev
, IP_MAX_MTU
, false);
1180 ip_tunnel_dellink(dev
, &list_kill
);
1181 unregister_netdevice_many(&list_kill
);
1182 return ERR_PTR(err
);
1184 EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create
);
1186 static int __net_init
erspan_init_net(struct net
*net
)
1188 return ip_tunnel_init_net(net
, erspan_net_id
,
1189 &erspan_link_ops
, NULL
);
1192 static void __net_exit
erspan_exit_net(struct net
*net
)
1194 struct ip_tunnel_net
*itn
= net_generic(net
, erspan_net_id
);
1196 ip_tunnel_delete_net(itn
, &erspan_link_ops
);
1199 static struct pernet_operations erspan_net_ops
= {
1200 .init
= erspan_init_net
,
1201 .exit
= erspan_exit_net
,
1202 .id
= &erspan_net_id
,
1203 .size
= sizeof(struct ip_tunnel_net
),
1206 static int __net_init
ipgre_tap_init_net(struct net
*net
)
1208 return ip_tunnel_init_net(net
, gre_tap_net_id
, &ipgre_tap_ops
, "ovs-gretap0");
1211 static void __net_exit
ipgre_tap_exit_net(struct net
*net
)
1213 struct ip_tunnel_net
*itn
= net_generic(net
, gre_tap_net_id
);
1215 ip_tunnel_delete_net(itn
, &ipgre_tap_ops
);
1218 static struct pernet_operations ipgre_tap_net_ops
= {
1219 .init
= ipgre_tap_init_net
,
1220 .exit
= ipgre_tap_exit_net
,
1221 .id
= &gre_tap_net_id
,
1222 .size
= sizeof(struct ip_tunnel_net
),
1225 static struct net_device
*erspan_fb_dev_create(struct net
*net
,
1227 u8 name_assign_type
)
1229 struct nlattr
*tb
[IFLA_MAX
+ 1];
1230 struct net_device
*dev
;
1231 LIST_HEAD(list_kill
);
1232 struct ip_tunnel
*t
;
1235 memset(&tb
, 0, sizeof(tb
));
1237 dev
= rtnl_create_link(net
, (char *)name
, name_assign_type
,
1238 &erspan_link_ops
, tb
);
1242 t
= netdev_priv(dev
);
1243 t
->collect_md
= true;
1244 /* Configure flow based GRE device. */
1245 #ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
1246 err
= ipgre_newlink(net
, dev
, tb
, NULL
, NULL
);
1248 err
= ipgre_newlink(net
, dev
, tb
, NULL
);
1252 return ERR_PTR(err
);
1255 /* openvswitch users expect packet sizes to be unrestricted,
1256 * so set the largest MTU we can.
1258 err
= __ip_tunnel_change_mtu(dev
, IP_MAX_MTU
, false);
1264 ip_tunnel_dellink(dev
, &list_kill
);
1265 unregister_netdevice_many(&list_kill
);
1266 return ERR_PTR(err
);
1269 static struct vport_ops ovs_erspan_vport_ops
;
1271 static struct vport
*erspan_tnl_create(const struct vport_parms
*parms
)
1273 struct net
*net
= ovs_dp_get_net(parms
->dp
);
1274 struct net_device
*dev
;
1275 struct vport
*vport
;
1278 vport
= ovs_vport_alloc(0, &ovs_erspan_vport_ops
, parms
);
1283 dev
= erspan_fb_dev_create(net
, parms
->name
, NET_NAME_USER
);
1286 ovs_vport_free(vport
);
1287 return ERR_CAST(dev
);
1290 err
= dev_change_flags(dev
, dev
->flags
| IFF_UP
, NULL
);
1292 rtnl_delete_link(dev
);
1294 ovs_vport_free(vport
);
1295 return ERR_PTR(err
);
1302 static struct vport
*erspan_create(const struct vport_parms
*parms
)
1304 struct vport
*vport
;
1306 vport
= erspan_tnl_create(parms
);
1310 return ovs_netdev_link(vport
, parms
->name
);
1313 static struct vport_ops ovs_erspan_vport_ops
= {
1314 .type
= OVS_VPORT_TYPE_ERSPAN
,
1315 .create
= erspan_create
,
1316 .send
= __erspan_fb_xmit
,
1317 #ifndef USE_UPSTREAM_TUNNEL
1318 .fill_metadata_dst
= gre_fill_metadata_dst
,
1320 .destroy
= ovs_netdev_tunnel_destroy
,
1323 static struct vport_ops ovs_ipgre_vport_ops
;
1325 static struct vport
*ipgre_tnl_create(const struct vport_parms
*parms
)
1327 struct net
*net
= ovs_dp_get_net(parms
->dp
);
1328 struct net_device
*dev
;
1329 struct vport
*vport
;
1332 vport
= ovs_vport_alloc(0, &ovs_ipgre_vport_ops
, parms
);
1337 dev
= gretap_fb_dev_create(net
, parms
->name
, NET_NAME_USER
);
1340 ovs_vport_free(vport
);
1341 return ERR_CAST(dev
);
1344 err
= dev_change_flags(dev
, dev
->flags
| IFF_UP
, NULL
);
1346 rtnl_delete_link(dev
);
1348 ovs_vport_free(vport
);
1349 return ERR_PTR(err
);
1356 static struct vport
*ipgre_create(const struct vport_parms
*parms
)
1358 struct vport
*vport
;
1360 vport
= ipgre_tnl_create(parms
);
1364 return ovs_netdev_link(vport
, parms
->name
);
1367 static struct vport_ops ovs_ipgre_vport_ops
= {
1368 .type
= OVS_VPORT_TYPE_GRE
,
1369 .create
= ipgre_create
,
1370 .send
= gre_fb_xmit
,
1371 #ifndef USE_UPSTREAM_TUNNEL
1372 .fill_metadata_dst
= gre_fill_metadata_dst
,
1374 .destroy
= ovs_netdev_tunnel_destroy
,
1377 int rpl_ipgre_init(void)
1381 err
= register_pernet_device(&ipgre_tap_net_ops
);
1386 goto pnet_tap_failed
;
1389 err
= register_pernet_device(&erspan_net_ops
);
1394 goto pnet_erspan_failed
;
1397 err
= gre_add_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1399 pr_info("%s: can't add protocol\n", __func__
);
1400 if (err
== -EBUSY
) {
1403 goto add_proto_failed
;
1407 pr_info("GRE over IPv4 tunneling driver\n");
1408 ovs_vport_ops_register(&ovs_ipgre_vport_ops
);
1409 ovs_vport_ops_register(&ovs_erspan_vport_ops
);
1413 /* Since GRE only allows single receiver to be registerd,
1414 * we skip here so only gre transmit works, see:
1416 * commit 9f57c67c379d88a10e8ad676426fee5ae7341b14
1417 * Author: Pravin B Shelar <pshelar@nicira.com>
1418 * Date: Fri Aug 7 23:51:52 2015 -0700
1419 * gre: Remove support for sharing GRE protocol hook
1421 * OVS GRE receive part is disabled.
1423 pr_info("GRE TX only over IPv4 tunneling driver\n");
1424 ip_gre_loaded
= true;
1425 ovs_vport_ops_register(&ovs_ipgre_vport_ops
);
1426 ovs_vport_ops_register(&ovs_erspan_vport_ops
);
1430 unregister_pernet_device(&erspan_net_ops
);
1432 unregister_pernet_device(&ipgre_tap_net_ops
);
1434 pr_err("Error while initializing GRE %d\n", err
);
1438 void rpl_ipgre_fini(void)
1440 ovs_vport_ops_unregister(&ovs_erspan_vport_ops
);
1441 ovs_vport_ops_unregister(&ovs_ipgre_vport_ops
);
1443 if (!ip_gre_loaded
) {
1444 gre_del_protocol(&ipgre_protocol
, GREPROTO_CISCO
);
1445 unregister_pernet_device(&erspan_net_ops
);
1446 unregister_pernet_device(&ipgre_tap_net_ops
);