]>
Commit | Line | Data |
---|---|---|
2736b84e JG |
1 | /* |
2 | * Copyright (c) 2010 Nicira Networks. | |
3 | * Distributed under the terms of the GNU GPL version 2. | |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
dfffaef1 JP |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | ||
d1eb60cc JG |
11 | #include <linux/if.h> |
12 | #include <linux/skbuff.h> | |
2736b84e JG |
13 | #include <linux/ip.h> |
14 | #include <linux/if_tunnel.h> | |
15 | #include <linux/if_vlan.h> | |
16 | #include <linux/in.h> | |
2736b84e | 17 | |
2736b84e | 18 | #include <net/icmp.h> |
2736b84e | 19 | #include <net/ip.h> |
2736b84e | 20 | #include <net/protocol.h> |
2736b84e | 21 | |
d1eb60cc | 22 | #include "tunnel.h" |
2736b84e | 23 | #include "vport.h" |
b19e8815 | 24 | #include "vport-generic.h" |
2736b84e | 25 | |
d1eb60cc JG |
26 | /* |
27 | * The GRE header is composed of a series of sections: a base and then a variable | |
28 | * number of options. | |
29 | */ | |
2736b84e JG |
30 | #define GRE_HEADER_SECTION 4 |
31 | ||
27b6cec0 JG |
32 | struct gre_base_hdr { |
33 | __be16 flags; | |
34 | __be16 protocol; | |
35 | }; | |
36 | ||
d1eb60cc | 37 | static int gre_hdr_len(const struct tnl_port_config *port_config) |
2736b84e | 38 | { |
d1eb60cc | 39 | int len; |
2736b84e | 40 | |
d1eb60cc | 41 | len = GRE_HEADER_SECTION; |
2736b84e | 42 | |
d1eb60cc JG |
43 | if (port_config->flags & TNL_F_CSUM) |
44 | len += GRE_HEADER_SECTION; | |
2736b84e | 45 | |
d1eb60cc JG |
46 | if (port_config->out_key || |
47 | port_config->flags & TNL_F_OUT_KEY_ACTION) | |
48 | len += GRE_HEADER_SECTION; | |
2736b84e | 49 | |
d1eb60cc | 50 | return len; |
2736b84e JG |
51 | } |
52 | ||
842cf6f4 JG |
53 | static void gre_build_header(const struct vport *vport, |
54 | const struct tnl_mutable_config *mutable, | |
55 | void *header) | |
2736b84e | 56 | { |
842cf6f4 JG |
57 | struct gre_base_hdr *greh = header; |
58 | __be32 *options = (__be32 *)(greh + 1); | |
2736b84e | 59 | |
27b6cec0 JG |
60 | greh->protocol = htons(ETH_P_TEB); |
61 | greh->flags = 0; | |
2736b84e | 62 | |
842cf6f4 JG |
63 | if (mutable->port_config.flags & TNL_F_CSUM) { |
64 | greh->flags |= GRE_CSUM; | |
65 | *options = 0; | |
66 | options++; | |
67 | } | |
68 | ||
2736b84e | 69 | if (mutable->port_config.out_key || |
842cf6f4 | 70 | mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) |
27b6cec0 | 71 | greh->flags |= GRE_KEY; |
2736b84e | 72 | |
842cf6f4 JG |
73 | if (mutable->port_config.out_key) |
74 | *options = mutable->port_config.out_key; | |
75 | } | |
76 | ||
77 | static struct sk_buff *gre_update_header(const struct vport *vport, | |
78 | const struct tnl_mutable_config *mutable, | |
79 | struct dst_entry *dst, | |
80 | struct sk_buff *skb) | |
81 | { | |
82 | __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen | |
83 | - GRE_HEADER_SECTION); | |
2736b84e | 84 | |
842cf6f4 JG |
85 | /* Work backwards over the options so the checksum is last. */ |
86 | if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) { | |
87 | *options = OVS_CB(skb)->tun_id; | |
2736b84e JG |
88 | options--; |
89 | } | |
90 | ||
842cf6f4 | 91 | if (mutable->port_config.flags & TNL_F_CSUM) |
2736b84e | 92 | *(__sum16 *)options = csum_fold(skb_checksum(skb, |
842cf6f4 JG |
93 | skb_transport_offset(skb), |
94 | skb->len - skb_transport_offset(skb), | |
2736b84e | 95 | 0)); |
5214f5c4 JG |
96 | /* |
97 | * Allow our local IP stack to fragment the outer packet even if the | |
98 | * DF bit is set as a last resort. | |
99 | */ | |
100 | skb->local_df = 1; | |
101 | ||
102 | return skb; | |
2736b84e JG |
103 | } |
104 | ||
d1eb60cc | 105 | static int parse_header(struct iphdr *iph, __be16 *flags, __be32 *key) |
2736b84e | 106 | { |
eea2aafb | 107 | /* IP and ICMP protocol handlers check that the IHL is valid. */ |
27b6cec0 JG |
108 | struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); |
109 | __be32 *options = (__be32 *)(greh + 1); | |
2736b84e JG |
110 | int hdr_len; |
111 | ||
27b6cec0 | 112 | *flags = greh->flags; |
2736b84e | 113 | |
d1eb60cc | 114 | if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) |
2736b84e JG |
115 | return -EINVAL; |
116 | ||
d1eb60cc | 117 | if (unlikely(greh->protocol != htons(ETH_P_TEB))) |
2736b84e JG |
118 | return -EINVAL; |
119 | ||
120 | hdr_len = GRE_HEADER_SECTION; | |
121 | ||
27b6cec0 | 122 | if (greh->flags & GRE_CSUM) { |
2736b84e JG |
123 | hdr_len += GRE_HEADER_SECTION; |
124 | options++; | |
125 | } | |
126 | ||
27b6cec0 | 127 | if (greh->flags & GRE_KEY) { |
2736b84e JG |
128 | hdr_len += GRE_HEADER_SECTION; |
129 | ||
130 | *key = *options; | |
131 | options++; | |
132 | } else | |
133 | *key = 0; | |
134 | ||
d1eb60cc | 135 | if (unlikely(greh->flags & GRE_SEQ)) |
2736b84e JG |
136 | hdr_len += GRE_HEADER_SECTION; |
137 | ||
138 | return hdr_len; | |
139 | } | |
140 | ||
d1eb60cc | 141 | /* Called with rcu_read_lock and BH disabled. */ |
fceb2a5b | 142 | static void gre_err(struct sk_buff *skb, u32 info) |
2736b84e JG |
143 | { |
144 | struct vport *vport; | |
d1eb60cc | 145 | const struct tnl_mutable_config *mutable; |
2736b84e JG |
146 | const int type = icmp_hdr(skb)->type; |
147 | const int code = icmp_hdr(skb)->code; | |
148 | int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); | |
149 | ||
150 | struct iphdr *iph; | |
151 | __be16 flags; | |
152 | __be32 key; | |
153 | int tunnel_hdr_len, tot_hdr_len; | |
154 | unsigned int orig_mac_header; | |
155 | unsigned int orig_nw_header; | |
156 | ||
157 | if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED) | |
158 | return; | |
159 | ||
d1eb60cc JG |
160 | /* |
161 | * The mimimum size packet that we would actually be able to process: | |
2736b84e | 162 | * encapsulating IP header, minimum GRE header, Ethernet header, |
d1eb60cc JG |
163 | * inner IPv4 header. |
164 | */ | |
2736b84e JG |
165 | if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION + |
166 | ETH_HLEN + sizeof(struct iphdr))) | |
167 | return; | |
168 | ||
169 | iph = (struct iphdr *)skb->data; | |
170 | ||
d1eb60cc | 171 | tunnel_hdr_len = parse_header(iph, &flags, &key); |
2736b84e JG |
172 | if (tunnel_hdr_len < 0) |
173 | return; | |
174 | ||
d1eb60cc JG |
175 | vport = tnl_find_port(iph->saddr, iph->daddr, key, |
176 | TNL_T_PROTO_GRE | TNL_T_KEY_EITHER, &mutable); | |
2736b84e JG |
177 | if (!vport) |
178 | return; | |
179 | ||
d1eb60cc JG |
180 | /* |
181 | * Packets received by this function were previously sent by us, so | |
eea2aafb JG |
182 | * any comparisons should be to the output values, not the input. |
183 | * However, it's not really worth it to have a hash table based on | |
184 | * output keys (especially since ICMP error handling of tunneled packets | |
185 | * isn't that reliable anyways). Therefore, we do a lookup based on the | |
186 | * out key as if it were the in key and then check to see if the input | |
d1eb60cc JG |
187 | * and output keys are the same. |
188 | */ | |
eea2aafb JG |
189 | if (mutable->port_config.in_key != mutable->port_config.out_key) |
190 | return; | |
191 | ||
d1eb60cc JG |
192 | if (!!(mutable->port_config.flags & TNL_F_IN_KEY_MATCH) != |
193 | !!(mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)) | |
eea2aafb JG |
194 | return; |
195 | ||
d1eb60cc | 196 | if ((mutable->port_config.flags & TNL_F_CSUM) && !(flags & GRE_CSUM)) |
2736b84e JG |
197 | return; |
198 | ||
eea2aafb | 199 | tunnel_hdr_len += iph->ihl << 2; |
2736b84e JG |
200 | |
201 | orig_mac_header = skb_mac_header(skb) - skb->data; | |
202 | orig_nw_header = skb_network_header(skb) - skb->data; | |
eea2aafb | 203 | skb_set_mac_header(skb, tunnel_hdr_len); |
2736b84e | 204 | |
eea2aafb | 205 | tot_hdr_len = tunnel_hdr_len + ETH_HLEN; |
2736b84e JG |
206 | |
207 | skb->protocol = eth_hdr(skb)->h_proto; | |
208 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
209 | tot_hdr_len += VLAN_HLEN; | |
210 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
211 | } | |
212 | ||
eea2aafb JG |
213 | skb_set_network_header(skb, tot_hdr_len); |
214 | mtu -= tot_hdr_len; | |
215 | ||
2736b84e JG |
216 | if (skb->protocol == htons(ETH_P_IP)) |
217 | tot_hdr_len += sizeof(struct iphdr); | |
6f470982 | 218 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
eea2aafb | 219 | else if (skb->protocol == htons(ETH_P_IPV6)) |
2736b84e | 220 | tot_hdr_len += sizeof(struct ipv6hdr); |
6f470982 | 221 | #endif |
2736b84e JG |
222 | else |
223 | goto out; | |
224 | ||
225 | if (!pskb_may_pull(skb, tot_hdr_len)) | |
226 | goto out; | |
227 | ||
2736b84e JG |
228 | if (skb->protocol == htons(ETH_P_IP)) { |
229 | if (mtu < IP_MIN_MTU) { | |
230 | if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU) | |
231 | mtu = IP_MIN_MTU; | |
232 | else | |
233 | goto out; | |
234 | } | |
235 | ||
6f470982 JG |
236 | } |
237 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
238 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
239 | if (mtu < IPV6_MIN_MTU) { |
240 | unsigned int packet_length = sizeof(struct ipv6hdr) + | |
241 | ntohs(ipv6_hdr(skb)->payload_len); | |
242 | ||
243 | if (packet_length >= IPV6_MIN_MTU | |
244 | || ntohs(ipv6_hdr(skb)->payload_len) == 0) | |
245 | mtu = IPV6_MIN_MTU; | |
246 | else | |
247 | goto out; | |
248 | } | |
249 | } | |
6f470982 | 250 | #endif |
2736b84e | 251 | |
d1eb60cc JG |
252 | __skb_pull(skb, tunnel_hdr_len); |
253 | tnl_frag_needed(vport, mutable, skb, mtu, key); | |
254 | __skb_push(skb, tunnel_hdr_len); | |
2736b84e JG |
255 | |
256 | out: | |
257 | skb_set_mac_header(skb, orig_mac_header); | |
258 | skb_set_network_header(skb, orig_nw_header); | |
259 | skb->protocol = htons(ETH_P_IP); | |
260 | } | |
261 | ||
d1eb60cc JG |
262 | static bool check_checksum(struct sk_buff *skb) |
263 | { | |
264 | struct iphdr *iph = ip_hdr(skb); | |
265 | struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1); | |
266 | __sum16 csum = 0; | |
267 | ||
268 | if (greh->flags & GRE_CSUM) { | |
269 | switch (skb->ip_summed) { | |
270 | case CHECKSUM_COMPLETE: | |
271 | csum = csum_fold(skb->csum); | |
272 | ||
273 | if (!csum) | |
274 | break; | |
275 | /* Fall through. */ | |
276 | ||
277 | case CHECKSUM_NONE: | |
278 | skb->csum = 0; | |
279 | csum = __skb_checksum_complete(skb); | |
280 | skb->ip_summed = CHECKSUM_COMPLETE; | |
281 | break; | |
282 | } | |
283 | } | |
284 | ||
285 | return (csum == 0); | |
286 | } | |
287 | ||
288 | /* Called with rcu_read_lock and BH disabled. */ | |
fceb2a5b | 289 | static int gre_rcv(struct sk_buff *skb) |
2736b84e JG |
290 | { |
291 | struct vport *vport; | |
d1eb60cc | 292 | const struct tnl_mutable_config *mutable; |
2736b84e JG |
293 | int hdr_len; |
294 | struct iphdr *iph; | |
295 | __be16 flags; | |
296 | __be32 key; | |
297 | ||
d1eb60cc | 298 | if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))) |
2736b84e JG |
299 | goto error; |
300 | ||
d1eb60cc | 301 | if (unlikely(!check_checksum(skb))) |
2736b84e JG |
302 | goto error; |
303 | ||
d1eb60cc JG |
304 | hdr_len = parse_header(ip_hdr(skb), &flags, &key); |
305 | if (unlikely(hdr_len < 0)) | |
2736b84e JG |
306 | goto error; |
307 | ||
d1eb60cc | 308 | if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) |
2736b84e | 309 | goto error; |
2736b84e | 310 | |
d1eb60cc JG |
311 | iph = ip_hdr(skb); |
312 | vport = tnl_find_port(iph->daddr, iph->saddr, key, | |
313 | TNL_T_PROTO_GRE | TNL_T_KEY_EITHER, &mutable); | |
314 | if (unlikely(!vport)) { | |
315 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | |
2736b84e JG |
316 | goto error; |
317 | } | |
318 | ||
d1eb60cc | 319 | if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) |
2736b84e JG |
320 | OVS_CB(skb)->tun_id = key; |
321 | else | |
322 | OVS_CB(skb)->tun_id = 0; | |
323 | ||
d1eb60cc JG |
324 | __skb_pull(skb, hdr_len); |
325 | skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); | |
2736b84e | 326 | |
d1eb60cc | 327 | tnl_rcv(vport, skb); |
2736b84e JG |
328 | return 0; |
329 | ||
330 | error: | |
331 | kfree_skb(skb); | |
332 | return 0; | |
333 | } | |
334 | ||
d1eb60cc JG |
335 | struct tnl_ops gre_tnl_ops = { |
336 | .tunnel_type = TNL_T_PROTO_GRE, | |
337 | .ipproto = IPPROTO_GRE, | |
338 | .hdr_len = gre_hdr_len, | |
339 | .build_header = gre_build_header, | |
842cf6f4 | 340 | .update_header = gre_update_header, |
d1eb60cc | 341 | }; |
2736b84e | 342 | |
d1eb60cc | 343 | static struct vport *gre_create(const char *name, const void __user *config) |
2736b84e | 344 | { |
d1eb60cc | 345 | return tnl_create(name, config, &gre_vport_ops, &gre_tnl_ops); |
2736b84e JG |
346 | } |
347 | ||
348 | static struct net_protocol gre_protocol_handlers = { | |
349 | .handler = gre_rcv, | |
350 | .err_handler = gre_err, | |
351 | }; | |
352 | ||
fceb2a5b | 353 | static int gre_init(void) |
2736b84e JG |
354 | { |
355 | int err; | |
356 | ||
357 | err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
842cf6f4 | 358 | if (err) |
dfffaef1 | 359 | pr_warn("cannot register gre protocol handler\n"); |
2736b84e | 360 | |
2736b84e JG |
361 | return err; |
362 | } | |
363 | ||
d1eb60cc | 364 | static void gre_exit(void) |
2736b84e | 365 | { |
d1eb60cc | 366 | inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); |
2736b84e JG |
367 | } |
368 | ||
369 | struct vport_ops gre_vport_ops = { | |
370 | .type = "gre", | |
371 | .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID, | |
372 | .init = gre_init, | |
373 | .exit = gre_exit, | |
374 | .create = gre_create, | |
d1eb60cc JG |
375 | .modify = tnl_modify, |
376 | .destroy = tnl_destroy, | |
377 | .set_mtu = tnl_set_mtu, | |
378 | .set_addr = tnl_set_addr, | |
379 | .get_name = tnl_get_name, | |
380 | .get_addr = tnl_get_addr, | |
b19e8815 JG |
381 | .get_dev_flags = vport_gen_get_dev_flags, |
382 | .is_running = vport_gen_is_running, | |
383 | .get_operstate = vport_gen_get_operstate, | |
d1eb60cc JG |
384 | .get_mtu = tnl_get_mtu, |
385 | .send = tnl_send, | |
2736b84e | 386 | }; |