2 * Copyright (c) 2010 Nicira Networks.
3 * Distributed under the terms of the GNU GPL version 2.
5 * Significant portions of this file may be copied from parts of the Linux
6 * kernel, by Linus Torvalds and others.
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12 #include <linux/skbuff.h>
14 #include <linux/if_tunnel.h>
15 #include <linux/if_vlan.h>
20 #include <net/protocol.h>
24 #include "vport-generic.h"
27 * The GRE header is composed of a series of sections: a base and then a variable
30 #define GRE_HEADER_SECTION 4
37 static int gre_hdr_len(const struct tnl_port_config
*port_config
)
41 len
= GRE_HEADER_SECTION
;
43 if (port_config
->flags
& TNL_F_CSUM
)
44 len
+= GRE_HEADER_SECTION
;
46 if (port_config
->out_key
||
47 port_config
->flags
& TNL_F_OUT_KEY_ACTION
)
48 len
+= GRE_HEADER_SECTION
;
53 static void gre_build_header(const struct vport
*vport
,
54 const struct tnl_mutable_config
*mutable,
57 struct gre_base_hdr
*greh
= header
;
58 __be32
*options
= (__be32
*)(greh
+ 1);
60 greh
->protocol
= htons(ETH_P_TEB
);
63 if (mutable->port_config
.flags
& TNL_F_CSUM
) {
64 greh
->flags
|= GRE_CSUM
;
69 if (mutable->port_config
.out_key
||
70 mutable->port_config
.flags
& TNL_F_OUT_KEY_ACTION
)
71 greh
->flags
|= GRE_KEY
;
73 if (mutable->port_config
.out_key
)
74 *options
= mutable->port_config
.out_key
;
77 static struct sk_buff
*gre_update_header(const struct vport
*vport
,
78 const struct tnl_mutable_config
*mutable,
79 struct dst_entry
*dst
,
82 __be32
*options
= (__be32
*)(skb_network_header(skb
) + mutable->tunnel_hlen
83 - GRE_HEADER_SECTION
);
85 /* Work backwards over the options so the checksum is last. */
86 if (mutable->port_config
.flags
& TNL_F_OUT_KEY_ACTION
) {
87 *options
= OVS_CB(skb
)->tun_id
;
91 if (mutable->port_config
.flags
& TNL_F_CSUM
)
92 *(__sum16
*)options
= csum_fold(skb_checksum(skb
,
93 skb_transport_offset(skb
),
94 skb
->len
- skb_transport_offset(skb
),
97 * Allow our local IP stack to fragment the outer packet even if the
98 * DF bit is set as a last resort.
105 static int parse_header(struct iphdr
*iph
, __be16
*flags
, __be32
*key
)
107 /* IP and ICMP protocol handlers check that the IHL is valid. */
108 struct gre_base_hdr
*greh
= (struct gre_base_hdr
*)((u8
*)iph
+ (iph
->ihl
<< 2));
109 __be32
*options
= (__be32
*)(greh
+ 1);
112 *flags
= greh
->flags
;
114 if (unlikely(greh
->flags
& (GRE_VERSION
| GRE_ROUTING
)))
117 if (unlikely(greh
->protocol
!= htons(ETH_P_TEB
)))
120 hdr_len
= GRE_HEADER_SECTION
;
122 if (greh
->flags
& GRE_CSUM
) {
123 hdr_len
+= GRE_HEADER_SECTION
;
127 if (greh
->flags
& GRE_KEY
) {
128 hdr_len
+= GRE_HEADER_SECTION
;
135 if (unlikely(greh
->flags
& GRE_SEQ
))
136 hdr_len
+= GRE_HEADER_SECTION
;
141 /* Called with rcu_read_lock and BH disabled. */
142 static void gre_err(struct sk_buff
*skb
, u32 info
)
145 const struct tnl_mutable_config
*mutable;
146 const int type
= icmp_hdr(skb
)->type
;
147 const int code
= icmp_hdr(skb
)->code
;
148 int mtu
= ntohs(icmp_hdr(skb
)->un
.frag
.mtu
);
153 int tunnel_hdr_len
, tot_hdr_len
;
154 unsigned int orig_mac_header
;
155 unsigned int orig_nw_header
;
157 if (type
!= ICMP_DEST_UNREACH
|| code
!= ICMP_FRAG_NEEDED
)
161 * The mimimum size packet that we would actually be able to process:
162 * encapsulating IP header, minimum GRE header, Ethernet header,
165 if (!pskb_may_pull(skb
, sizeof(struct iphdr
) + GRE_HEADER_SECTION
+
166 ETH_HLEN
+ sizeof(struct iphdr
)))
169 iph
= (struct iphdr
*)skb
->data
;
171 tunnel_hdr_len
= parse_header(iph
, &flags
, &key
);
172 if (tunnel_hdr_len
< 0)
175 vport
= tnl_find_port(iph
->saddr
, iph
->daddr
, key
,
176 TNL_T_PROTO_GRE
| TNL_T_KEY_EITHER
, &mutable);
181 * Packets received by this function were previously sent by us, so
182 * any comparisons should be to the output values, not the input.
183 * However, it's not really worth it to have a hash table based on
184 * output keys (especially since ICMP error handling of tunneled packets
185 * isn't that reliable anyways). Therefore, we do a lookup based on the
186 * out key as if it were the in key and then check to see if the input
187 * and output keys are the same.
189 if (mutable->port_config
.in_key
!= mutable->port_config
.out_key
)
192 if (!!(mutable->port_config
.flags
& TNL_F_IN_KEY_MATCH
) !=
193 !!(mutable->port_config
.flags
& TNL_F_OUT_KEY_ACTION
))
196 if ((mutable->port_config
.flags
& TNL_F_CSUM
) && !(flags
& GRE_CSUM
))
199 tunnel_hdr_len
+= iph
->ihl
<< 2;
201 orig_mac_header
= skb_mac_header(skb
) - skb
->data
;
202 orig_nw_header
= skb_network_header(skb
) - skb
->data
;
203 skb_set_mac_header(skb
, tunnel_hdr_len
);
205 tot_hdr_len
= tunnel_hdr_len
+ ETH_HLEN
;
207 skb
->protocol
= eth_hdr(skb
)->h_proto
;
208 if (skb
->protocol
== htons(ETH_P_8021Q
)) {
209 tot_hdr_len
+= VLAN_HLEN
;
210 skb
->protocol
= vlan_eth_hdr(skb
)->h_vlan_encapsulated_proto
;
213 skb_set_network_header(skb
, tot_hdr_len
);
216 if (skb
->protocol
== htons(ETH_P_IP
))
217 tot_hdr_len
+= sizeof(struct iphdr
);
218 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
219 else if (skb
->protocol
== htons(ETH_P_IPV6
))
220 tot_hdr_len
+= sizeof(struct ipv6hdr
);
225 if (!pskb_may_pull(skb
, tot_hdr_len
))
228 if (skb
->protocol
== htons(ETH_P_IP
)) {
229 if (mtu
< IP_MIN_MTU
) {
230 if (ntohs(ip_hdr(skb
)->tot_len
) >= IP_MIN_MTU
)
237 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
238 else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
239 if (mtu
< IPV6_MIN_MTU
) {
240 unsigned int packet_length
= sizeof(struct ipv6hdr
) +
241 ntohs(ipv6_hdr(skb
)->payload_len
);
243 if (packet_length
>= IPV6_MIN_MTU
244 || ntohs(ipv6_hdr(skb
)->payload_len
) == 0)
252 __skb_pull(skb
, tunnel_hdr_len
);
253 tnl_frag_needed(vport
, mutable, skb
, mtu
, key
);
254 __skb_push(skb
, tunnel_hdr_len
);
257 skb_set_mac_header(skb
, orig_mac_header
);
258 skb_set_network_header(skb
, orig_nw_header
);
259 skb
->protocol
= htons(ETH_P_IP
);
262 static bool check_checksum(struct sk_buff
*skb
)
264 struct iphdr
*iph
= ip_hdr(skb
);
265 struct gre_base_hdr
*greh
= (struct gre_base_hdr
*)(iph
+ 1);
268 if (greh
->flags
& GRE_CSUM
) {
269 switch (skb
->ip_summed
) {
270 case CHECKSUM_COMPLETE
:
271 csum
= csum_fold(skb
->csum
);
279 csum
= __skb_checksum_complete(skb
);
280 skb
->ip_summed
= CHECKSUM_COMPLETE
;
288 /* Called with rcu_read_lock and BH disabled. */
289 static int gre_rcv(struct sk_buff
*skb
)
292 const struct tnl_mutable_config
*mutable;
298 if (unlikely(!pskb_may_pull(skb
, sizeof(struct gre_base_hdr
) + ETH_HLEN
)))
301 if (unlikely(!check_checksum(skb
)))
304 hdr_len
= parse_header(ip_hdr(skb
), &flags
, &key
);
305 if (unlikely(hdr_len
< 0))
308 if (unlikely(!pskb_may_pull(skb
, hdr_len
+ ETH_HLEN
)))
312 vport
= tnl_find_port(iph
->daddr
, iph
->saddr
, key
,
313 TNL_T_PROTO_GRE
| TNL_T_KEY_EITHER
, &mutable);
314 if (unlikely(!vport
)) {
315 icmp_send(skb
, ICMP_DEST_UNREACH
, ICMP_PORT_UNREACH
, 0);
319 if (mutable->port_config
.flags
& TNL_F_IN_KEY_MATCH
)
320 OVS_CB(skb
)->tun_id
= key
;
322 OVS_CB(skb
)->tun_id
= 0;
324 __skb_pull(skb
, hdr_len
);
325 skb_postpull_rcsum(skb
, skb_transport_header(skb
), hdr_len
+ ETH_HLEN
);
335 struct tnl_ops gre_tnl_ops
= {
336 .tunnel_type
= TNL_T_PROTO_GRE
,
337 .ipproto
= IPPROTO_GRE
,
338 .hdr_len
= gre_hdr_len
,
339 .build_header
= gre_build_header
,
340 .update_header
= gre_update_header
,
343 static struct vport
*gre_create(const char *name
, const void __user
*config
)
345 return tnl_create(name
, config
, &gre_vport_ops
, &gre_tnl_ops
);
348 static struct net_protocol gre_protocol_handlers
= {
350 .err_handler
= gre_err
,
353 static int gre_init(void)
357 err
= inet_add_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
359 pr_warn("cannot register gre protocol handler\n");
364 static void gre_exit(void)
366 inet_del_protocol(&gre_protocol_handlers
, IPPROTO_GRE
);
369 struct vport_ops gre_vport_ops
= {
371 .flags
= VPORT_F_GEN_STATS
| VPORT_F_TUN_ID
,
374 .create
= gre_create
,
375 .modify
= tnl_modify
,
376 .destroy
= tnl_destroy
,
377 .set_mtu
= tnl_set_mtu
,
378 .set_addr
= tnl_set_addr
,
379 .get_name
= tnl_get_name
,
380 .get_addr
= tnl_get_addr
,
381 .get_dev_flags
= vport_gen_get_dev_flags
,
382 .is_running
= vport_gen_is_running
,
383 .get_operstate
= vport_gen_get_operstate
,
384 .get_mtu
= tnl_get_mtu
,