]>
Commit | Line | Data |
---|---|---|
2736b84e JG |
1 | /* |
2 | * Copyright (c) 2010 Nicira Networks. | |
3 | * Distributed under the terms of the GNU GPL version 2. | |
4 | * | |
5 | * Significant portions of this file may be copied from parts of the Linux | |
6 | * kernel, by Linus Torvalds and others. | |
7 | */ | |
8 | ||
dfffaef1 JP |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | ||
d1eb60cc JG |
11 | #include <linux/if.h> |
12 | #include <linux/skbuff.h> | |
2736b84e JG |
13 | #include <linux/ip.h> |
14 | #include <linux/if_tunnel.h> | |
15 | #include <linux/if_vlan.h> | |
16 | #include <linux/in.h> | |
2736b84e | 17 | |
2736b84e | 18 | #include <net/icmp.h> |
2736b84e | 19 | #include <net/ip.h> |
2736b84e | 20 | #include <net/protocol.h> |
2736b84e | 21 | |
d1eb60cc | 22 | #include "tunnel.h" |
2736b84e | 23 | #include "vport.h" |
b19e8815 | 24 | #include "vport-generic.h" |
2736b84e | 25 | |
d1eb60cc JG |
26 | /* |
27 | * The GRE header is composed of a series of sections: a base and then a variable | |
28 | * number of options. | |
29 | */ | |
2736b84e JG |
30 | #define GRE_HEADER_SECTION 4 |
31 | ||
27b6cec0 JG |
32 | struct gre_base_hdr { |
33 | __be16 flags; | |
34 | __be16 protocol; | |
35 | }; | |
36 | ||
d1eb60cc | 37 | static int gre_hdr_len(const struct tnl_port_config *port_config) |
2736b84e | 38 | { |
d1eb60cc | 39 | int len; |
2736b84e | 40 | |
d1eb60cc | 41 | len = GRE_HEADER_SECTION; |
2736b84e | 42 | |
d1eb60cc JG |
43 | if (port_config->flags & TNL_F_CSUM) |
44 | len += GRE_HEADER_SECTION; | |
2736b84e | 45 | |
d1eb60cc JG |
46 | if (port_config->out_key || |
47 | port_config->flags & TNL_F_OUT_KEY_ACTION) | |
48 | len += GRE_HEADER_SECTION; | |
2736b84e | 49 | |
d1eb60cc | 50 | return len; |
2736b84e JG |
51 | } |
52 | ||
5214f5c4 JG |
53 | static struct sk_buff *gre_build_header(struct sk_buff *skb, |
54 | const struct vport *vport, | |
55 | const struct tnl_mutable_config *mutable, | |
56 | struct dst_entry *dst) | |
2736b84e | 57 | { |
d1eb60cc JG |
58 | struct gre_base_hdr *greh = (struct gre_base_hdr *)skb_transport_header(skb); |
59 | __be32 *options = (__be32 *)(skb_network_header(skb) + mutable->tunnel_hlen | |
2736b84e JG |
60 | - GRE_HEADER_SECTION); |
61 | ||
27b6cec0 JG |
62 | greh->protocol = htons(ETH_P_TEB); |
63 | greh->flags = 0; | |
2736b84e JG |
64 | |
65 | /* Work backwards over the options so the checksum is last. */ | |
66 | if (mutable->port_config.out_key || | |
d1eb60cc | 67 | mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) { |
27b6cec0 | 68 | greh->flags |= GRE_KEY; |
2736b84e | 69 | |
d1eb60cc | 70 | if (mutable->port_config.flags & TNL_F_OUT_KEY_ACTION) |
2736b84e JG |
71 | *options = OVS_CB(skb)->tun_id; |
72 | else | |
73 | *options = mutable->port_config.out_key; | |
74 | ||
75 | options--; | |
76 | } | |
77 | ||
d1eb60cc | 78 | if (mutable->port_config.flags & TNL_F_CSUM) { |
27b6cec0 | 79 | greh->flags |= GRE_CSUM; |
2736b84e JG |
80 | |
81 | *options = 0; | |
82 | *(__sum16 *)options = csum_fold(skb_checksum(skb, | |
83 | sizeof(struct iphdr), | |
84 | skb->len - sizeof(struct iphdr), | |
85 | 0)); | |
86 | } | |
5214f5c4 JG |
87 | |
88 | /* | |
89 | * Allow our local IP stack to fragment the outer packet even if the | |
90 | * DF bit is set as a last resort. | |
91 | */ | |
92 | skb->local_df = 1; | |
93 | ||
94 | return skb; | |
2736b84e JG |
95 | } |
96 | ||
d1eb60cc | 97 | static int parse_header(struct iphdr *iph, __be16 *flags, __be32 *key) |
2736b84e | 98 | { |
eea2aafb | 99 | /* IP and ICMP protocol handlers check that the IHL is valid. */ |
27b6cec0 JG |
100 | struct gre_base_hdr *greh = (struct gre_base_hdr *)((u8 *)iph + (iph->ihl << 2)); |
101 | __be32 *options = (__be32 *)(greh + 1); | |
2736b84e JG |
102 | int hdr_len; |
103 | ||
27b6cec0 | 104 | *flags = greh->flags; |
2736b84e | 105 | |
d1eb60cc | 106 | if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) |
2736b84e JG |
107 | return -EINVAL; |
108 | ||
d1eb60cc | 109 | if (unlikely(greh->protocol != htons(ETH_P_TEB))) |
2736b84e JG |
110 | return -EINVAL; |
111 | ||
112 | hdr_len = GRE_HEADER_SECTION; | |
113 | ||
27b6cec0 | 114 | if (greh->flags & GRE_CSUM) { |
2736b84e JG |
115 | hdr_len += GRE_HEADER_SECTION; |
116 | options++; | |
117 | } | |
118 | ||
27b6cec0 | 119 | if (greh->flags & GRE_KEY) { |
2736b84e JG |
120 | hdr_len += GRE_HEADER_SECTION; |
121 | ||
122 | *key = *options; | |
123 | options++; | |
124 | } else | |
125 | *key = 0; | |
126 | ||
d1eb60cc | 127 | if (unlikely(greh->flags & GRE_SEQ)) |
2736b84e JG |
128 | hdr_len += GRE_HEADER_SECTION; |
129 | ||
130 | return hdr_len; | |
131 | } | |
132 | ||
d1eb60cc | 133 | /* Called with rcu_read_lock and BH disabled. */ |
fceb2a5b | 134 | static void gre_err(struct sk_buff *skb, u32 info) |
2736b84e JG |
135 | { |
136 | struct vport *vport; | |
d1eb60cc | 137 | const struct tnl_mutable_config *mutable; |
2736b84e JG |
138 | const int type = icmp_hdr(skb)->type; |
139 | const int code = icmp_hdr(skb)->code; | |
140 | int mtu = ntohs(icmp_hdr(skb)->un.frag.mtu); | |
141 | ||
142 | struct iphdr *iph; | |
143 | __be16 flags; | |
144 | __be32 key; | |
145 | int tunnel_hdr_len, tot_hdr_len; | |
146 | unsigned int orig_mac_header; | |
147 | unsigned int orig_nw_header; | |
148 | ||
149 | if (type != ICMP_DEST_UNREACH || code != ICMP_FRAG_NEEDED) | |
150 | return; | |
151 | ||
d1eb60cc JG |
152 | /* |
153 | * The mimimum size packet that we would actually be able to process: | |
2736b84e | 154 | * encapsulating IP header, minimum GRE header, Ethernet header, |
d1eb60cc JG |
155 | * inner IPv4 header. |
156 | */ | |
2736b84e JG |
157 | if (!pskb_may_pull(skb, sizeof(struct iphdr) + GRE_HEADER_SECTION + |
158 | ETH_HLEN + sizeof(struct iphdr))) | |
159 | return; | |
160 | ||
161 | iph = (struct iphdr *)skb->data; | |
162 | ||
d1eb60cc | 163 | tunnel_hdr_len = parse_header(iph, &flags, &key); |
2736b84e JG |
164 | if (tunnel_hdr_len < 0) |
165 | return; | |
166 | ||
d1eb60cc JG |
167 | vport = tnl_find_port(iph->saddr, iph->daddr, key, |
168 | TNL_T_PROTO_GRE | TNL_T_KEY_EITHER, &mutable); | |
2736b84e JG |
169 | if (!vport) |
170 | return; | |
171 | ||
d1eb60cc JG |
172 | /* |
173 | * Packets received by this function were previously sent by us, so | |
eea2aafb JG |
174 | * any comparisons should be to the output values, not the input. |
175 | * However, it's not really worth it to have a hash table based on | |
176 | * output keys (especially since ICMP error handling of tunneled packets | |
177 | * isn't that reliable anyways). Therefore, we do a lookup based on the | |
178 | * out key as if it were the in key and then check to see if the input | |
d1eb60cc JG |
179 | * and output keys are the same. |
180 | */ | |
eea2aafb JG |
181 | if (mutable->port_config.in_key != mutable->port_config.out_key) |
182 | return; | |
183 | ||
d1eb60cc JG |
184 | if (!!(mutable->port_config.flags & TNL_F_IN_KEY_MATCH) != |
185 | !!(mutable->port_config.flags & TNL_F_OUT_KEY_ACTION)) | |
eea2aafb JG |
186 | return; |
187 | ||
d1eb60cc | 188 | if ((mutable->port_config.flags & TNL_F_CSUM) && !(flags & GRE_CSUM)) |
2736b84e JG |
189 | return; |
190 | ||
eea2aafb | 191 | tunnel_hdr_len += iph->ihl << 2; |
2736b84e JG |
192 | |
193 | orig_mac_header = skb_mac_header(skb) - skb->data; | |
194 | orig_nw_header = skb_network_header(skb) - skb->data; | |
eea2aafb | 195 | skb_set_mac_header(skb, tunnel_hdr_len); |
2736b84e | 196 | |
eea2aafb | 197 | tot_hdr_len = tunnel_hdr_len + ETH_HLEN; |
2736b84e JG |
198 | |
199 | skb->protocol = eth_hdr(skb)->h_proto; | |
200 | if (skb->protocol == htons(ETH_P_8021Q)) { | |
201 | tot_hdr_len += VLAN_HLEN; | |
202 | skb->protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; | |
203 | } | |
204 | ||
eea2aafb JG |
205 | skb_set_network_header(skb, tot_hdr_len); |
206 | mtu -= tot_hdr_len; | |
207 | ||
2736b84e JG |
208 | if (skb->protocol == htons(ETH_P_IP)) |
209 | tot_hdr_len += sizeof(struct iphdr); | |
6f470982 | 210 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
eea2aafb | 211 | else if (skb->protocol == htons(ETH_P_IPV6)) |
2736b84e | 212 | tot_hdr_len += sizeof(struct ipv6hdr); |
6f470982 | 213 | #endif |
2736b84e JG |
214 | else |
215 | goto out; | |
216 | ||
217 | if (!pskb_may_pull(skb, tot_hdr_len)) | |
218 | goto out; | |
219 | ||
2736b84e JG |
220 | if (skb->protocol == htons(ETH_P_IP)) { |
221 | if (mtu < IP_MIN_MTU) { | |
222 | if (ntohs(ip_hdr(skb)->tot_len) >= IP_MIN_MTU) | |
223 | mtu = IP_MIN_MTU; | |
224 | else | |
225 | goto out; | |
226 | } | |
227 | ||
6f470982 JG |
228 | } |
229 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | |
230 | else if (skb->protocol == htons(ETH_P_IPV6)) { | |
2736b84e JG |
231 | if (mtu < IPV6_MIN_MTU) { |
232 | unsigned int packet_length = sizeof(struct ipv6hdr) + | |
233 | ntohs(ipv6_hdr(skb)->payload_len); | |
234 | ||
235 | if (packet_length >= IPV6_MIN_MTU | |
236 | || ntohs(ipv6_hdr(skb)->payload_len) == 0) | |
237 | mtu = IPV6_MIN_MTU; | |
238 | else | |
239 | goto out; | |
240 | } | |
241 | } | |
6f470982 | 242 | #endif |
2736b84e | 243 | |
d1eb60cc JG |
244 | __skb_pull(skb, tunnel_hdr_len); |
245 | tnl_frag_needed(vport, mutable, skb, mtu, key); | |
246 | __skb_push(skb, tunnel_hdr_len); | |
2736b84e JG |
247 | |
248 | out: | |
249 | skb_set_mac_header(skb, orig_mac_header); | |
250 | skb_set_network_header(skb, orig_nw_header); | |
251 | skb->protocol = htons(ETH_P_IP); | |
252 | } | |
253 | ||
d1eb60cc JG |
254 | static bool check_checksum(struct sk_buff *skb) |
255 | { | |
256 | struct iphdr *iph = ip_hdr(skb); | |
257 | struct gre_base_hdr *greh = (struct gre_base_hdr *)(iph + 1); | |
258 | __sum16 csum = 0; | |
259 | ||
260 | if (greh->flags & GRE_CSUM) { | |
261 | switch (skb->ip_summed) { | |
262 | case CHECKSUM_COMPLETE: | |
263 | csum = csum_fold(skb->csum); | |
264 | ||
265 | if (!csum) | |
266 | break; | |
267 | /* Fall through. */ | |
268 | ||
269 | case CHECKSUM_NONE: | |
270 | skb->csum = 0; | |
271 | csum = __skb_checksum_complete(skb); | |
272 | skb->ip_summed = CHECKSUM_COMPLETE; | |
273 | break; | |
274 | } | |
275 | } | |
276 | ||
277 | return (csum == 0); | |
278 | } | |
279 | ||
280 | /* Called with rcu_read_lock and BH disabled. */ | |
fceb2a5b | 281 | static int gre_rcv(struct sk_buff *skb) |
2736b84e JG |
282 | { |
283 | struct vport *vport; | |
d1eb60cc | 284 | const struct tnl_mutable_config *mutable; |
2736b84e JG |
285 | int hdr_len; |
286 | struct iphdr *iph; | |
287 | __be16 flags; | |
288 | __be32 key; | |
289 | ||
d1eb60cc | 290 | if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr) + ETH_HLEN))) |
2736b84e JG |
291 | goto error; |
292 | ||
d1eb60cc | 293 | if (unlikely(!check_checksum(skb))) |
2736b84e JG |
294 | goto error; |
295 | ||
d1eb60cc JG |
296 | hdr_len = parse_header(ip_hdr(skb), &flags, &key); |
297 | if (unlikely(hdr_len < 0)) | |
2736b84e JG |
298 | goto error; |
299 | ||
d1eb60cc | 300 | if (unlikely(!pskb_may_pull(skb, hdr_len + ETH_HLEN))) |
2736b84e | 301 | goto error; |
2736b84e | 302 | |
d1eb60cc JG |
303 | iph = ip_hdr(skb); |
304 | vport = tnl_find_port(iph->daddr, iph->saddr, key, | |
305 | TNL_T_PROTO_GRE | TNL_T_KEY_EITHER, &mutable); | |
306 | if (unlikely(!vport)) { | |
307 | icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); | |
2736b84e JG |
308 | goto error; |
309 | } | |
310 | ||
d1eb60cc | 311 | if (mutable->port_config.flags & TNL_F_IN_KEY_MATCH) |
2736b84e JG |
312 | OVS_CB(skb)->tun_id = key; |
313 | else | |
314 | OVS_CB(skb)->tun_id = 0; | |
315 | ||
d1eb60cc JG |
316 | __skb_pull(skb, hdr_len); |
317 | skb_postpull_rcsum(skb, skb_transport_header(skb), hdr_len + ETH_HLEN); | |
2736b84e | 318 | |
d1eb60cc | 319 | tnl_rcv(vport, skb); |
2736b84e JG |
320 | return 0; |
321 | ||
322 | error: | |
323 | kfree_skb(skb); | |
324 | return 0; | |
325 | } | |
326 | ||
d1eb60cc JG |
327 | struct tnl_ops gre_tnl_ops = { |
328 | .tunnel_type = TNL_T_PROTO_GRE, | |
329 | .ipproto = IPPROTO_GRE, | |
330 | .hdr_len = gre_hdr_len, | |
331 | .build_header = gre_build_header, | |
332 | }; | |
2736b84e | 333 | |
d1eb60cc | 334 | static struct vport *gre_create(const char *name, const void __user *config) |
2736b84e | 335 | { |
d1eb60cc | 336 | return tnl_create(name, config, &gre_vport_ops, &gre_tnl_ops); |
2736b84e JG |
337 | } |
338 | ||
339 | static struct net_protocol gre_protocol_handlers = { | |
340 | .handler = gre_rcv, | |
341 | .err_handler = gre_err, | |
342 | }; | |
343 | ||
fceb2a5b | 344 | static int gre_init(void) |
2736b84e JG |
345 | { |
346 | int err; | |
347 | ||
348 | err = inet_add_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
d1eb60cc | 349 | if (err) { |
dfffaef1 | 350 | pr_warn("cannot register gre protocol handler\n"); |
d1eb60cc | 351 | goto out; |
2736b84e JG |
352 | } |
353 | ||
d1eb60cc | 354 | err = tnl_init(); |
2736b84e | 355 | |
d1eb60cc | 356 | out: |
2736b84e JG |
357 | return err; |
358 | } | |
359 | ||
d1eb60cc | 360 | static void gre_exit(void) |
2736b84e | 361 | { |
d1eb60cc JG |
362 | tnl_exit(); |
363 | inet_del_protocol(&gre_protocol_handlers, IPPROTO_GRE); | |
2736b84e JG |
364 | } |
365 | ||
366 | struct vport_ops gre_vport_ops = { | |
367 | .type = "gre", | |
368 | .flags = VPORT_F_GEN_STATS | VPORT_F_TUN_ID, | |
369 | .init = gre_init, | |
370 | .exit = gre_exit, | |
371 | .create = gre_create, | |
d1eb60cc JG |
372 | .modify = tnl_modify, |
373 | .destroy = tnl_destroy, | |
374 | .set_mtu = tnl_set_mtu, | |
375 | .set_addr = tnl_set_addr, | |
376 | .get_name = tnl_get_name, | |
377 | .get_addr = tnl_get_addr, | |
b19e8815 JG |
378 | .get_dev_flags = vport_gen_get_dev_flags, |
379 | .is_running = vport_gen_is_running, | |
380 | .get_operstate = vport_gen_get_operstate, | |
d1eb60cc JG |
381 | .get_mtu = tnl_get_mtu, |
382 | .send = tnl_send, | |
2736b84e | 383 | }; |