2 * Copyright (c) 2007-2011 Nicira Networks.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22 #include <linux/skbuff.h>
24 #include <linux/if_tunnel.h>
25 #include <linux/if_vlan.h>
30 #include <net/protocol.h>
34 #include "vport-generic.h"
37 * The GRE header is composed of a series of sections: a base and then a variable
40 #define GRE_HEADER_SECTION 4
47 static int gre_hdr_len(const struct tnl_mutable_config
*mutable)
51 len
= GRE_HEADER_SECTION
;
53 if (mutable->flags
& TNL_F_CSUM
)
54 len
+= GRE_HEADER_SECTION
;
56 if (mutable->out_key
|| mutable->flags
& TNL_F_OUT_KEY_ACTION
)
57 len
+= GRE_HEADER_SECTION
;
62 /* Returns the least-significant 32 bits of a __be64. */
63 static __be32
be64_get_low32(__be64 x
)
66 return (__force __be32
)x
;
68 return (__force __be32
)((__force u64
)x
>> 32);
72 static void gre_build_header(const struct vport
*vport
,
73 const struct tnl_mutable_config
*mutable,
76 struct gre_base_hdr
*greh
= header
;
77 __be32
*options
= (__be32
*)(greh
+ 1);
79 greh
->protocol
= htons(ETH_P_TEB
);
82 if (mutable->flags
& TNL_F_CSUM
) {
83 greh
->flags
|= GRE_CSUM
;
88 if (mutable->out_key
|| mutable->flags
& TNL_F_OUT_KEY_ACTION
)
89 greh
->flags
|= GRE_KEY
;
92 *options
= be64_get_low32(mutable->out_key
);
95 static struct sk_buff
*gre_update_header(const struct vport
*vport
,
96 const struct tnl_mutable_config
*mutable,
97 struct dst_entry
*dst
,
100 __be32
*options
= (__be32
*)(skb_network_header(skb
) + mutable->tunnel_hlen
101 - GRE_HEADER_SECTION
);
103 /* Work backwards over the options so the checksum is last. */
104 if (mutable->flags
& TNL_F_OUT_KEY_ACTION
) {
105 *options
= be64_get_low32(OVS_CB(skb
)->tun_id
);
109 if (mutable->flags
& TNL_F_CSUM
)
110 *(__sum16
*)options
= csum_fold(skb_checksum(skb
,
111 skb_transport_offset(skb
),
112 skb
->len
- skb_transport_offset(skb
),
115 * Allow our local IP stack to fragment the outer packet even if the
116 * DF bit is set as a last resort. We also need to force selection of
117 * an IP ID here because Linux will otherwise leave it at 0 if the
118 * packet originally had DF set.
121 __ip_select_ident(ip_hdr(skb
), dst
, 0);
126 /* Zero-extends a __be32 into the least-significant 32 bits of a __be64. */
127 static __be64
be32_extend_to_be64(__be32 x
)
130 return (__force __be64
)x
;
132 return (__force __be64
)((__force u64
)x
<< 32);
136 static int parse_header(struct iphdr
*iph
, __be16
*flags
, __be64
*key
)
138 /* IP and ICMP protocol handlers check that the IHL is valid. */
139 struct gre_base_hdr
*greh
= (struct gre_base_hdr
*)((u8
*)iph
+ (iph
->ihl
<< 2));
140 __be32
*options
= (__be32
*)(greh
+ 1);
143 *flags
= greh
->flags
;
145 if (unlikely(greh
->flags
& (GRE_VERSION
| GRE_ROUTING
)))
148 if (unlikely(greh
->protocol
!= htons(ETH_P_TEB
)))
151 hdr_len
= GRE_HEADER_SECTION
;
153 if (greh
->flags
& GRE_CSUM
) {
154 hdr_len
+= GRE_HEADER_SECTION
;
158 if (greh
->flags
& GRE_KEY
) {
159 hdr_len
+= GRE_HEADER_SECTION
;
161 *key
= be32_extend_to_be64(*options
);
166 if (unlikely(greh
->flags
& GRE_SEQ
))
167 hdr_len
+= GRE_HEADER_SECTION
;
172 /* Called with rcu_read_lock and BH disabled. */
173 static void gre_err(struct sk_buff
*skb
, u32 info
)
176 const struct tnl_mutable_config
*mutable;
177 const int type
= icmp_hdr(skb
)->type
;
178 const int code
= icmp_hdr(skb
)->code
;
179 int mtu
= ntohs(icmp_hdr(skb
)->un
.frag
.mtu
);
184 int tunnel_hdr_len
, tot_hdr_len
;
185 unsigned int orig_mac_header
;
186 unsigned int orig_nw_header
;
188 if (type
!= ICMP_DEST_UNREACH
|| code
!= ICMP_FRAG_NEEDED
)
192 * The mimimum size packet that we would actually be able to process:
193 * encapsulating IP header, minimum GRE header, Ethernet header,
196 if (!pskb_may_pull(skb
, sizeof(struct iphdr
) + GRE_HEADER_SECTION
+
197 ETH_HLEN
+ sizeof(struct iphdr
)))
200 iph
= (struct iphdr
*)skb
->data
;
201 if (ipv4_is_multicast(iph
->daddr
))
204 tunnel_hdr_len
= parse_header(iph
, &flags
, &key
);
205 if (tunnel_hdr_len
< 0)
208 vport
= tnl_find_port(iph
->saddr
, iph
->daddr
, key
, TNL_T_PROTO_GRE
,
214 * Packets received by this function were previously sent by us, so
215 * any comparisons should be to the output values, not the input.
216 * However, it's not really worth it to have a hash table based on
217 * output keys (especially since ICMP error handling of tunneled packets
218 * isn't that reliable anyways). Therefore, we do a lookup based on the
219 * out key as if it were the in key and then check to see if the input
220 * and output keys are the same.
222 if (mutable->key
.in_key
!= mutable->out_key
)
225 if (!!(mutable->flags
& TNL_F_IN_KEY_MATCH
) !=
226 !!(mutable->flags
& TNL_F_OUT_KEY_ACTION
))
229 if ((mutable->flags
& TNL_F_CSUM
) && !(flags
& GRE_CSUM
))
232 tunnel_hdr_len
+= iph
->ihl
<< 2;
234 orig_mac_header
= skb_mac_header(skb
) - skb
->data
;
235 orig_nw_header
= skb_network_header(skb
) - skb
->data
;
236 skb_set_mac_header(skb
, tunnel_hdr_len
);
238 tot_hdr_len
= tunnel_hdr_len
+ ETH_HLEN
;
240 skb
->protocol
= eth_hdr(skb
)->h_proto
;
241 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
242 tot_hdr_len
+= VLAN_HLEN
;
243 skb
->protocol
= vlan_eth_hdr(skb
)->h_vlan_encapsulated_proto
;
246 skb_set_network_header(skb
, tot_hdr_len
);
249 if (skb
->protocol
== htons(ETH_P_IP
))
250 tot_hdr_len
+= sizeof(struct iphdr
);
251 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
252 else if (skb
->protocol
== htons(ETH_P_IPV6
))
253 tot_hdr_len
+= sizeof(struct ipv6hdr
);
258 if (!pskb_may_pull(skb
, tot_hdr_len
))
261 if (skb
->protocol
== htons(ETH_P_IP
)) {
262 if (mtu
< IP_MIN_MTU
) {
263 if (ntohs(ip_hdr(skb
)->tot_len
) >= IP_MIN_MTU
)
270 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
271 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
272 if (mtu
< IPV6_MIN_MTU
) {
273 unsigned int packet_length
= sizeof(struct ipv6hdr
) +
274 ntohs(ipv6_hdr(skb
)->payload_len
);
276 if (packet_length
>= IPV6_MIN_MTU
277 || ntohs(ipv6_hdr(skb
)->payload_len
) == 0)
285 __skb_pull(skb
, tunnel_hdr_len
);
286 tnl_frag_needed(vport
, mutable, skb
, mtu
, key
);
287 __skb_push(skb
, tunnel_hdr_len
);
290 skb_set_mac_header(skb
, orig_mac_header
);
291 skb_set_network_header(skb
, orig_nw_header
);
292 skb
->protocol
= htons(ETH_P_IP
);
295 static bool check_checksum(struct sk_buff
*skb
)
297 struct iphdr
*iph
= ip_hdr(skb
);
298 struct gre_base_hdr
*greh
= (struct gre_base_hdr
*)(iph
+ 1);
301 if (greh
->flags
& GRE_CSUM
) {
302 switch (skb
->ip_summed
) {
303 case CHECKSUM_COMPLETE
:
304 csum
= csum_fold(skb
->csum
);
312 csum
= __skb_checksum_complete(skb
);
313 skb
->ip_summed
= CHECKSUM_COMPLETE
;
321 /* Called with rcu_read_lock and BH disabled. */
322 static int gre_rcv(struct sk_buff
*skb
)
325 const struct tnl_mutable_config
*mutable;
331 if (unlikely(!pskb_may_pull(skb
, sizeof(struct gre_base_hdr
) + ETH_HLEN
)))
334 if (unlikely(!check_checksum(skb
)))
337 hdr_len
= parse_header(ip_hdr(skb
), &flags
, &key
);
338 if (unlikely(hdr_len
< 0))
341 if (unlikely(!pskb_may_pull(skb
, hdr_len
+ ETH_HLEN
)))
345 vport
= tnl_find_port(iph
->daddr
, iph
->saddr
, key
, TNL_T_PROTO_GRE
,
347 if (unlikely(!vport
)) {
348 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
352 if (mutable->flags
& TNL_F_IN_KEY_MATCH
)
353 OVS_CB(skb
)->tun_id
= key
;
355 OVS_CB(skb
)->tun_id
= 0;
357 __skb_pull(skb
, hdr_len
);
358 skb_postpull_rcsum(skb
, skb_transport_header(skb
), hdr_len
+ ETH_HLEN
);
360 tnl_rcv(vport
, skb
, iph
->tos
);
368 static const struct tnl_ops gre_tnl_ops
= {
369 .tunnel_type
= TNL_T_PROTO_GRE
,
370 .ipproto
= IPPROTO_GRE
,
371 .hdr_len
= gre_hdr_len
,
372 .build_header
= gre_build_header
,
373 .update_header
= gre_update_header
,
376 static struct vport
*gre_create(const struct vport_parms
*parms
)
378 return tnl_create(parms
, &gre_vport_ops
, &gre_tnl_ops
);
381 static const struct net_protocol gre_protocol_handlers
= {
383 .err_handler
= gre_err
,
386 static int gre_init(void)
390 err
= inet_add_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
392 pr_warn("cannot register gre protocol handler\n");
397 static void gre_exit(void)
399 inet_del_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
402 const struct vport_ops gre_vport_ops
= {
403 .type
= OVS_VPORT_TYPE_GRE
,
404 .flags
= VPORT_F_TUN_ID
,
407 .create
= gre_create
,
408 .destroy
= tnl_destroy
,
409 .set_addr
= tnl_set_addr
,
410 .get_name
= tnl_get_name
,
411 .get_addr
= tnl_get_addr
,
412 .get_options
= tnl_get_options
,
413 .set_options
= tnl_set_options
,
414 .get_dev_flags
= vport_gen_get_dev_flags
,
415 .is_running
= vport_gen_is_running
,
416 .get_operstate
= vport_gen_get_operstate
,